vxlan.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vxlan
  15. import (
  16. "bytes"
  17. "encoding/json"
  18. "fmt"
  19. "net"
  20. "sync"
  21. "time"
  22. log "github.com/coreos/flannel/Godeps/_workspace/src/github.com/golang/glog"
  23. "github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink"
  24. "github.com/coreos/flannel/Godeps/_workspace/src/golang.org/x/net/context"
  25. "github.com/coreos/flannel/backend"
  26. "github.com/coreos/flannel/pkg/ip"
  27. "github.com/coreos/flannel/subnet"
  28. )
  29. const (
  30. defaultVNI = 1
  31. )
  32. type VXLANBackend struct {
  33. sm subnet.Manager
  34. network string
  35. config *subnet.Config
  36. cfg struct {
  37. VNI int
  38. Port int
  39. }
  40. lease *subnet.Lease
  41. dev *vxlanDevice
  42. ctx context.Context
  43. cancel context.CancelFunc
  44. wg sync.WaitGroup
  45. rts routes
  46. }
  47. func New(sm subnet.Manager, network string, config *subnet.Config) backend.Backend {
  48. ctx, cancel := context.WithCancel(context.Background())
  49. vb := &VXLANBackend{
  50. sm: sm,
  51. network: network,
  52. config: config,
  53. ctx: ctx,
  54. cancel: cancel,
  55. }
  56. vb.cfg.VNI = defaultVNI
  57. return vb
  58. }
  59. func newSubnetAttrs(extEaddr net.IP, mac net.HardwareAddr) (*subnet.LeaseAttrs, error) {
  60. data, err := json.Marshal(&vxlanLeaseAttrs{hardwareAddr(mac)})
  61. if err != nil {
  62. return nil, err
  63. }
  64. return &subnet.LeaseAttrs{
  65. PublicIP: ip.FromIP(extEaddr),
  66. BackendType: "vxlan",
  67. BackendData: json.RawMessage(data),
  68. }, nil
  69. }
  70. func (vb *VXLANBackend) Init(extIface *net.Interface, extIaddr net.IP, extEaddr net.IP) (*backend.SubnetDef, error) {
  71. // Parse our configuration
  72. if len(vb.config.Backend) > 0 {
  73. if err := json.Unmarshal(vb.config.Backend, &vb.cfg); err != nil {
  74. return nil, fmt.Errorf("error decoding VXLAN backend config: %v", err)
  75. }
  76. }
  77. devAttrs := vxlanDeviceAttrs{
  78. vni: uint32(vb.cfg.VNI),
  79. name: fmt.Sprintf("flannel.%v", vb.cfg.VNI),
  80. vtepIndex: extIface.Index,
  81. vtepAddr: extIaddr,
  82. vtepPort: vb.cfg.Port,
  83. }
  84. var err error
  85. for {
  86. vb.dev, err = newVXLANDevice(&devAttrs)
  87. if err == nil {
  88. break
  89. } else {
  90. log.Error("VXLAN init: ", err)
  91. log.Info("Retrying in 1 second...")
  92. // wait 1 sec before retrying
  93. time.Sleep(1 * time.Second)
  94. }
  95. }
  96. sa, err := newSubnetAttrs(extEaddr, vb.dev.MACAddr())
  97. if err != nil {
  98. return nil, err
  99. }
  100. l, err := vb.sm.AcquireLease(vb.ctx, vb.network, sa)
  101. switch err {
  102. case nil:
  103. vb.lease = l
  104. case context.Canceled, context.DeadlineExceeded:
  105. return nil, err
  106. default:
  107. return nil, fmt.Errorf("failed to acquire lease: %v", err)
  108. }
  109. // vxlan's subnet is that of the whole overlay network (e.g. /16)
  110. // and not that of the individual host (e.g. /24)
  111. vxlanNet := ip.IP4Net{
  112. IP: l.Subnet.IP,
  113. PrefixLen: vb.config.Network.PrefixLen,
  114. }
  115. if err = vb.dev.Configure(vxlanNet); err != nil {
  116. return nil, err
  117. }
  118. return &backend.SubnetDef{
  119. Net: l.Subnet,
  120. MTU: vb.dev.MTU(),
  121. }, nil
  122. }
  123. func (vb *VXLANBackend) Run() {
  124. vb.wg.Add(1)
  125. go func() {
  126. subnet.LeaseRenewer(vb.ctx, vb.sm, vb.network, vb.lease)
  127. log.Info("LeaseRenewer exited")
  128. vb.wg.Done()
  129. }()
  130. log.Info("Watching for L3 misses")
  131. misses := make(chan *netlink.Neigh, 100)
  132. // Unfrtunately MonitorMisses does not take a cancel channel
  133. // as there's no wait to interrupt netlink socket recv
  134. go vb.dev.MonitorMisses(misses)
  135. log.Info("Watching for new subnet leases")
  136. evts := make(chan []subnet.Event)
  137. vb.wg.Add(1)
  138. go func() {
  139. subnet.WatchLeases(vb.ctx, vb.sm, vb.network, vb.lease, evts)
  140. log.Info("WatchLeases exited")
  141. vb.wg.Done()
  142. }()
  143. defer vb.wg.Wait()
  144. initialEvtsBatch := <-evts
  145. for {
  146. err := vb.handleInitialSubnetEvents(initialEvtsBatch)
  147. if err == nil {
  148. break
  149. }
  150. log.Error(err, " About to retry")
  151. time.Sleep(time.Second)
  152. }
  153. for {
  154. select {
  155. case miss := <-misses:
  156. vb.handleMiss(miss)
  157. case evtBatch := <-evts:
  158. vb.handleSubnetEvents(evtBatch)
  159. case <-vb.ctx.Done():
  160. return
  161. }
  162. }
  163. }
  164. func (vb *VXLANBackend) Stop() {
  165. vb.cancel()
  166. }
  167. func (vb *VXLANBackend) Name() string {
  168. return "VXLAN"
  169. }
  170. // So we can make it JSON (un)marshalable
  171. type hardwareAddr net.HardwareAddr
  172. func (hw hardwareAddr) MarshalJSON() ([]byte, error) {
  173. return []byte(fmt.Sprintf("%q", net.HardwareAddr(hw))), nil
  174. }
  175. func (hw *hardwareAddr) UnmarshalJSON(b []byte) error {
  176. if len(b) < 2 || b[0] != '"' || b[len(b)-1] != '"' {
  177. return fmt.Errorf("error parsing hardware addr")
  178. }
  179. b = b[1 : len(b)-1]
  180. mac, err := net.ParseMAC(string(b))
  181. if err != nil {
  182. return err
  183. }
  184. *hw = hardwareAddr(mac)
  185. return nil
  186. }
  187. type vxlanLeaseAttrs struct {
  188. VtepMAC hardwareAddr
  189. }
  190. func (vb *VXLANBackend) handleSubnetEvents(batch []subnet.Event) {
  191. for _, evt := range batch {
  192. switch evt.Type {
  193. case subnet.EventAdded:
  194. log.Info("Subnet added: ", evt.Lease.Subnet)
  195. if evt.Lease.Attrs.BackendType != "vxlan" {
  196. log.Warningf("Ignoring non-vxlan subnet: type=%v", evt.Lease.Attrs.BackendType)
  197. continue
  198. }
  199. var attrs vxlanLeaseAttrs
  200. if err := json.Unmarshal(evt.Lease.Attrs.BackendData, &attrs); err != nil {
  201. log.Error("Error decoding subnet lease JSON: ", err)
  202. continue
  203. }
  204. vb.rts.set(evt.Lease.Subnet, net.HardwareAddr(attrs.VtepMAC))
  205. vb.dev.AddL2(neigh{IP: evt.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)})
  206. case subnet.EventRemoved:
  207. log.Info("Subnet removed: ", evt.Lease.Subnet)
  208. if evt.Lease.Attrs.BackendType != "vxlan" {
  209. log.Warningf("Ignoring non-vxlan subnet: type=%v", evt.Lease.Attrs.BackendType)
  210. continue
  211. }
  212. var attrs vxlanLeaseAttrs
  213. if err := json.Unmarshal(evt.Lease.Attrs.BackendData, &attrs); err != nil {
  214. log.Error("Error decoding subnet lease JSON: ", err)
  215. continue
  216. }
  217. if len(attrs.VtepMAC) > 0 {
  218. vb.dev.DelL2(neigh{IP: evt.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)})
  219. }
  220. vb.rts.remove(evt.Lease.Subnet)
  221. default:
  222. log.Error("Internal error: unknown event type: ", int(evt.Type))
  223. }
  224. }
  225. }
  226. func (vb *VXLANBackend) handleInitialSubnetEvents(batch []subnet.Event) error {
  227. log.Infof("Handling initial subnet events")
  228. fdbTable, err := vb.dev.GetL2List()
  229. if err != nil {
  230. return fmt.Errorf("Error fetching L2 table: %v", err)
  231. }
  232. for _, fdbEntry := range fdbTable {
  233. log.Infof("fdb already populated with: %s %s ", fdbEntry.IP, fdbEntry.HardwareAddr)
  234. }
  235. evtMarker := make([]bool, len(batch))
  236. leaseAttrsList := make([]vxlanLeaseAttrs, len(batch))
  237. fdbEntryMarker := make([]bool, len(fdbTable))
  238. for i, evt := range batch {
  239. if evt.Lease.Attrs.BackendType != "vxlan" {
  240. log.Warningf("Ignoring non-vxlan subnet: type=%v", evt.Lease.Attrs.BackendType)
  241. evtMarker[i] = true
  242. continue
  243. }
  244. if err := json.Unmarshal(evt.Lease.Attrs.BackendData, &leaseAttrsList[i]); err != nil {
  245. log.Error("Error decoding subnet lease JSON: ", err)
  246. evtMarker[i] = true
  247. continue
  248. }
  249. for j, fdbEntry := range fdbTable {
  250. if evt.Lease.Attrs.PublicIP.ToIP().Equal(fdbEntry.IP) && bytes.Equal([]byte(leaseAttrsList[i].VtepMAC), []byte(fdbEntry.HardwareAddr)) {
  251. evtMarker[i] = true
  252. fdbEntryMarker[j] = true
  253. break
  254. }
  255. }
  256. vb.rts.set(evt.Lease.Subnet, net.HardwareAddr(leaseAttrsList[i].VtepMAC))
  257. }
  258. for j, marker := range fdbEntryMarker {
  259. if !marker && fdbTable[j].IP != nil {
  260. err := vb.dev.DelL2(neigh{IP: ip.FromIP(fdbTable[j].IP), MAC: fdbTable[j].HardwareAddr})
  261. if err != nil {
  262. log.Error("Delete L2 failed: ", err)
  263. }
  264. }
  265. }
  266. for i, marker := range evtMarker {
  267. if !marker {
  268. err := vb.dev.AddL2(neigh{IP: batch[i].Lease.Attrs.PublicIP, MAC: net.HardwareAddr(leaseAttrsList[i].VtepMAC)})
  269. if err != nil {
  270. log.Error("Add L2 failed: ", err)
  271. }
  272. }
  273. }
  274. return nil
  275. }
  276. func (vb *VXLANBackend) handleMiss(miss *netlink.Neigh) {
  277. switch {
  278. case len(miss.IP) == 0 && len(miss.HardwareAddr) == 0:
  279. log.Info("Ignoring nil miss")
  280. case len(miss.HardwareAddr) == 0:
  281. vb.handleL3Miss(miss)
  282. default:
  283. log.Infof("Ignoring not a miss: %v, %v", miss.HardwareAddr, miss.IP)
  284. }
  285. }
  286. func (vb *VXLANBackend) handleL3Miss(miss *netlink.Neigh) {
  287. log.Infof("L3 miss: %v", miss.IP)
  288. rt := vb.rts.findByNetwork(ip.FromIP(miss.IP))
  289. if rt == nil {
  290. log.Infof("Route for %v not found", miss.IP)
  291. return
  292. }
  293. if err := vb.dev.AddL3(neigh{IP: ip.FromIP(miss.IP), MAC: rt.vtepMAC}); err != nil {
  294. log.Errorf("AddL3 failed: %v", err)
  295. } else {
  296. log.Info("AddL3 succeeded")
  297. }
  298. }