device.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. package vxlan
  2. import (
  3. "fmt"
  4. "net"
  5. "os"
  6. "syscall"
  7. "time"
  8. log "github.com/coreos/flannel/Godeps/_workspace/src/github.com/golang/glog"
  9. "github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink"
  10. "github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
  11. "github.com/coreos/flannel/pkg/ip"
  12. )
  13. type vxlanDeviceAttrs struct {
  14. vni uint32
  15. name string
  16. vtepIndex int
  17. vtepAddr net.IP
  18. vtepPort int
  19. }
  20. type vxlanDevice struct {
  21. link *netlink.Vxlan
  22. }
  23. func sysctlSet(path, value string) error {
  24. f, err := os.Create(path)
  25. if err != nil {
  26. return err
  27. }
  28. defer f.Close()
  29. _, err = f.Write([]byte(value))
  30. return err
  31. }
  32. func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {
  33. link := &netlink.Vxlan{
  34. LinkAttrs: netlink.LinkAttrs{
  35. Name: devAttrs.name,
  36. },
  37. VxlanId: int(devAttrs.vni),
  38. VtepDevIndex: devAttrs.vtepIndex,
  39. SrcAddr: devAttrs.vtepAddr,
  40. Port: devAttrs.vtepPort,
  41. Learning: true,
  42. Proxy: true,
  43. L2miss: true,
  44. }
  45. link, err := ensureLink(link)
  46. if err != nil {
  47. return nil, err
  48. }
  49. // this enables ARP requests being sent to userspace via netlink
  50. sysctlPath := fmt.Sprintf("/proc/sys/net/ipv4/neigh/%s/app_solicit", devAttrs.name)
  51. sysctlSet(sysctlPath, "3")
  52. return &vxlanDevice{
  53. link: link,
  54. }, nil
  55. }
  56. func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) {
  57. err := netlink.LinkAdd(vxlan)
  58. if err == syscall.EEXIST {
  59. // it's ok if the device already exists as long as config is similar
  60. existing, err := netlink.LinkByName(vxlan.Name)
  61. if err != nil {
  62. return nil, err
  63. }
  64. incompat := vxlanLinksIncompat(vxlan, existing)
  65. if incompat == "" {
  66. return existing.(*netlink.Vxlan), nil
  67. }
  68. // delete existing
  69. log.Warningf("%q already exists with incompatable configuration: %v; recreating device", vxlan.Name, incompat)
  70. if err = netlink.LinkDel(existing); err != nil {
  71. return nil, fmt.Errorf("failed to delete interface: %v", err)
  72. }
  73. // create new
  74. if err = netlink.LinkAdd(vxlan); err != nil {
  75. return nil, fmt.Errorf("failed to create vxlan interface: %v", err)
  76. }
  77. } else if err != nil {
  78. return nil, err
  79. }
  80. ifindex := vxlan.Index
  81. link, err := netlink.LinkByIndex(vxlan.Index)
  82. if err != nil {
  83. return nil, fmt.Errorf("can't locate created vxlan device with index %v", ifindex)
  84. }
  85. var ok bool
  86. if vxlan, ok = link.(*netlink.Vxlan); !ok {
  87. return nil, fmt.Errorf("created vxlan device with index %v is not vxlan", ifindex)
  88. }
  89. return vxlan, nil
  90. }
  91. func (dev *vxlanDevice) Configure(ipn ip.IP4Net) error {
  92. setAddr4(dev.link, ipn.ToIPNet())
  93. if err := netlink.LinkSetUp(dev.link); err != nil {
  94. return fmt.Errorf("failed to set interface %s to UP state: %s", dev.link.Attrs().Name, err)
  95. }
  96. // explicitly add a route since there might be a route for a subnet already
  97. // installed by Docker and then it won't get auto added
  98. route := netlink.Route{
  99. LinkIndex: dev.link.Attrs().Index,
  100. Scope: netlink.SCOPE_UNIVERSE,
  101. Dst: ipn.Network().ToIPNet(),
  102. }
  103. if err := netlink.RouteAdd(&route); err != nil && err != syscall.EEXIST {
  104. return fmt.Errorf("failed to add route (%s -> %s): %v", ipn.Network().String(), dev.link.Attrs().Name, err)
  105. }
  106. return nil
  107. }
  108. func (dev *vxlanDevice) Destroy() {
  109. netlink.LinkDel(dev.link)
  110. }
  111. func (dev *vxlanDevice) MACAddr() net.HardwareAddr {
  112. return dev.link.HardwareAddr
  113. }
  114. func (dev *vxlanDevice) MTU() int {
  115. return dev.link.MTU
  116. }
  117. func (dev *vxlanDevice) MonitorMisses(misses chan *netlink.Neigh) {
  118. nlsock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
  119. if err != nil {
  120. log.Error("Failed to subscribe to netlink RTNLGRP_NEIGH messages")
  121. return
  122. }
  123. for {
  124. msgs, err := nlsock.Recieve()
  125. if err != nil {
  126. log.Errorf("Failed to receive from netlink: %v ", err)
  127. // wait 1 sec before retrying but honor the cancel channel
  128. time.Sleep(1*time.Second)
  129. continue
  130. }
  131. for _, msg := range msgs {
  132. dev.processNeighMsg(msg, misses)
  133. }
  134. }
  135. }
  136. func isNeighResolving(state int) bool {
  137. return (state & (netlink.NUD_INCOMPLETE | netlink.NUD_STALE | netlink.NUD_DELAY | netlink.NUD_PROBE)) != 0
  138. }
  139. func (dev *vxlanDevice) processNeighMsg(msg syscall.NetlinkMessage, misses chan *netlink.Neigh) {
  140. neigh, err := netlink.NeighDeserialize(msg.Data)
  141. if err != nil {
  142. log.Error("Failed to deserialize netlink ndmsg: %v", err)
  143. return
  144. }
  145. if int(neigh.LinkIndex) != dev.link.Index {
  146. return
  147. }
  148. if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
  149. return
  150. }
  151. if !isNeighResolving(neigh.State) {
  152. // misses come with NUD_STALE bit set
  153. return
  154. }
  155. misses <- neigh
  156. }
  157. func (dev *vxlanDevice) AddL2(mac net.HardwareAddr, vtep net.IP) error {
  158. neigh := netlink.Neigh{
  159. LinkIndex: dev.link.Index,
  160. State: netlink.NUD_REACHABLE,
  161. Family: syscall.AF_BRIDGE,
  162. Flags: netlink.NTF_SELF,
  163. IP: vtep,
  164. HardwareAddr: mac,
  165. }
  166. log.Infof("calling NeighAdd: %v, %v", vtep, mac)
  167. return netlink.NeighAdd(&neigh)
  168. }
  169. func (dev *vxlanDevice) DelL2(mac net.HardwareAddr, vtep net.IP) error {
  170. neigh := netlink.Neigh{
  171. LinkIndex: dev.link.Index,
  172. Family: syscall.AF_BRIDGE,
  173. Flags: netlink.NTF_SELF,
  174. IP: vtep,
  175. HardwareAddr: mac,
  176. }
  177. log.Infof("calling NeighDel: %v, %v", vtep, mac)
  178. return netlink.NeighDel(&neigh)
  179. }
  180. func (dev *vxlanDevice) AddL3(ip net.IP, mac net.HardwareAddr) error {
  181. neigh := netlink.Neigh{
  182. LinkIndex: dev.link.Index,
  183. State: netlink.NUD_REACHABLE,
  184. Type: syscall.RTN_UNICAST,
  185. IP: ip,
  186. HardwareAddr: mac,
  187. }
  188. log.Infof("calling NeighSet: %v, %v", ip, mac)
  189. return netlink.NeighSet(&neigh)
  190. }
  191. func vxlanLinksIncompat(l1, l2 netlink.Link) string {
  192. if l1.Type() != l2.Type() {
  193. return fmt.Sprintf("link type: %v vs %v", l1.Type(), l2.Type())
  194. }
  195. v1 := l1.(*netlink.Vxlan)
  196. v2 := l2.(*netlink.Vxlan)
  197. if v1.VxlanId != v2.VxlanId {
  198. return fmt.Sprintf("vni: %v vs %v", v1.VxlanId, v2.VxlanId)
  199. }
  200. if v1.VtepDevIndex > 0 && v2.VtepDevIndex > 0 && v1.VtepDevIndex != v2.VtepDevIndex {
  201. return fmt.Sprintf("vtep (external) interface: %v vs %v", v1.VtepDevIndex, v2.VtepDevIndex)
  202. }
  203. if len(v1.SrcAddr) > 0 && len(v2.SrcAddr) > 0 && !v1.SrcAddr.Equal(v2.SrcAddr) {
  204. return fmt.Sprintf("vtep (external) IP: %v vs %v", v1.SrcAddr, v2.SrcAddr)
  205. }
  206. if len(v1.Group) > 0 && len(v2.Group) > 0 && !v1.Group.Equal(v2.Group) {
  207. return fmt.Sprintf("group address: %v vs %v", v1.Group, v2.Group)
  208. }
  209. if v1.L2miss != v2.L2miss {
  210. return fmt.Sprintf("l2miss: %v vs %v", v1.L2miss, v2.L2miss)
  211. }
  212. if v1.Port > 0 && v2.Port > 0 && v1.Port != v2.Port {
  213. return fmt.Sprintf("port: %v vs %v", v1.Port, v2.Port)
  214. }
  215. return ""
  216. }
  217. // sets IP4 addr on link removing any existing ones first
  218. func setAddr4(link *netlink.Vxlan, ipn *net.IPNet) error {
  219. addrs, err := netlink.AddrList(link, syscall.AF_INET)
  220. if err != nil {
  221. return err
  222. }
  223. for _, addr := range addrs {
  224. if err = netlink.AddrDel(link, &addr); err != nil {
  225. return fmt.Errorf("failed to delete IPv4 addr %s from %s", addr.String(), link.Attrs().Name)
  226. }
  227. }
  228. addr := netlink.Addr{ ipn, "" }
  229. if err = netlink.AddrAdd(link, &addr); err != nil {
  230. return fmt.Errorf("failed to add IP address %s to %s: %s", ipn.String(), link.Attrs().Name, err)
  231. }
  232. return nil
  233. }