device.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. // Copyright 2015 flannel authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vxlan
  15. import (
  16. "fmt"
  17. "net"
  18. "os"
  19. "syscall"
  20. "time"
  21. log "github.com/golang/glog"
  22. "github.com/vishvananda/netlink"
  23. "github.com/vishvananda/netlink/nl"
  24. "github.com/coreos/flannel/pkg/ip"
  25. )
  26. type vxlanDeviceAttrs struct {
  27. vni uint32
  28. name string
  29. vtepIndex int
  30. vtepAddr net.IP
  31. vtepPort int
  32. gbp bool
  33. }
  34. type vxlanDevice struct {
  35. link *netlink.Vxlan
  36. }
  37. func sysctlSet(path, value string) error {
  38. f, err := os.Create(path)
  39. if err != nil {
  40. return err
  41. }
  42. defer f.Close()
  43. _, err = f.Write([]byte(value))
  44. return err
  45. }
  46. func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {
  47. link := &netlink.Vxlan{
  48. LinkAttrs: netlink.LinkAttrs{
  49. Name: devAttrs.name,
  50. },
  51. VxlanId: int(devAttrs.vni),
  52. VtepDevIndex: devAttrs.vtepIndex,
  53. SrcAddr: devAttrs.vtepAddr,
  54. Port: devAttrs.vtepPort,
  55. Learning: false,
  56. GBP: devAttrs.gbp,
  57. }
  58. link, err := ensureLink(link)
  59. if err != nil {
  60. return nil, err
  61. }
  62. // this enables ARP requests being sent to userspace via netlink
  63. sysctlPath := fmt.Sprintf("/proc/sys/net/ipv4/neigh/%s/app_solicit", devAttrs.name)
  64. if err := sysctlSet(sysctlPath, "3"); err != nil {
  65. return nil, err
  66. }
  67. return &vxlanDevice{
  68. link: link,
  69. }, nil
  70. }
  71. func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) {
  72. err := netlink.LinkAdd(vxlan)
  73. if err == syscall.EEXIST {
  74. // it's ok if the device already exists as long as config is similar
  75. existing, err := netlink.LinkByName(vxlan.Name)
  76. if err != nil {
  77. return nil, err
  78. }
  79. incompat := vxlanLinksIncompat(vxlan, existing)
  80. if incompat == "" {
  81. return existing.(*netlink.Vxlan), nil
  82. }
  83. // delete existing
  84. log.Warningf("%q already exists with incompatable configuration: %v; recreating device", vxlan.Name, incompat)
  85. if err = netlink.LinkDel(existing); err != nil {
  86. return nil, fmt.Errorf("failed to delete interface: %v", err)
  87. }
  88. // create new
  89. if err = netlink.LinkAdd(vxlan); err != nil {
  90. return nil, fmt.Errorf("failed to create vxlan interface: %v", err)
  91. }
  92. } else if err != nil {
  93. return nil, err
  94. }
  95. ifindex := vxlan.Index
  96. link, err := netlink.LinkByIndex(vxlan.Index)
  97. if err != nil {
  98. return nil, fmt.Errorf("can't locate created vxlan device with index %v", ifindex)
  99. }
  100. var ok bool
  101. if vxlan, ok = link.(*netlink.Vxlan); !ok {
  102. return nil, fmt.Errorf("created vxlan device with index %v is not vxlan", ifindex)
  103. }
  104. return vxlan, nil
  105. }
  106. func (dev *vxlanDevice) Configure(ipn ip.IP4Net) error {
  107. setAddr4(dev.link, ipn.ToIPNet())
  108. if err := netlink.LinkSetUp(dev.link); err != nil {
  109. return fmt.Errorf("failed to set interface %s to UP state: %s", dev.link.Attrs().Name, err)
  110. }
  111. // explicitly add a route since there might be a route for a subnet already
  112. // installed by Docker and then it won't get auto added
  113. route := netlink.Route{
  114. LinkIndex: dev.link.Attrs().Index,
  115. Scope: netlink.SCOPE_UNIVERSE,
  116. Dst: ipn.Network().ToIPNet(),
  117. }
  118. if err := netlink.RouteAdd(&route); err != nil && err != syscall.EEXIST {
  119. return fmt.Errorf("failed to add route (%s -> %s): %v", ipn.Network().String(), dev.link.Attrs().Name, err)
  120. }
  121. return nil
  122. }
  123. func (dev *vxlanDevice) Destroy() {
  124. netlink.LinkDel(dev.link)
  125. }
  126. func (dev *vxlanDevice) MACAddr() net.HardwareAddr {
  127. return dev.link.HardwareAddr
  128. }
  129. func (dev *vxlanDevice) MTU() int {
  130. return dev.link.MTU
  131. }
  132. type neighbor struct {
  133. MAC net.HardwareAddr
  134. IP ip.IP4
  135. }
  136. func (dev *vxlanDevice) GetL2List() ([]netlink.Neigh, error) {
  137. log.V(4).Infof("calling GetL2List() dev.link.Index: %d ", dev.link.Index)
  138. return netlink.NeighList(dev.link.Index, syscall.AF_BRIDGE)
  139. }
  140. func (dev *vxlanDevice) AddL2(n neighbor) error {
  141. log.V(4).Infof("calling NeighAdd: %v, %v", n.IP, n.MAC)
  142. return netlink.NeighAdd(&netlink.Neigh{
  143. LinkIndex: dev.link.Index,
  144. State: netlink.NUD_PERMANENT,
  145. Family: syscall.AF_BRIDGE,
  146. Flags: netlink.NTF_SELF,
  147. IP: n.IP.ToIP(),
  148. HardwareAddr: n.MAC,
  149. })
  150. }
  151. func (dev *vxlanDevice) DelL2(n neighbor) error {
  152. log.V(4).Infof("calling NeighDel: %v, %v", n.IP, n.MAC)
  153. return netlink.NeighDel(&netlink.Neigh{
  154. LinkIndex: dev.link.Index,
  155. Family: syscall.AF_BRIDGE,
  156. Flags: netlink.NTF_SELF,
  157. IP: n.IP.ToIP(),
  158. HardwareAddr: n.MAC,
  159. })
  160. }
  161. func (dev *vxlanDevice) AddL3(n neighbor) error {
  162. log.V(4).Infof("calling NeighSet: %v, %v", n.IP, n.MAC)
  163. return netlink.NeighSet(&netlink.Neigh{
  164. LinkIndex: dev.link.Index,
  165. State: netlink.NUD_REACHABLE,
  166. Type: syscall.RTN_UNICAST,
  167. IP: n.IP.ToIP(),
  168. HardwareAddr: n.MAC,
  169. })
  170. }
  171. func (dev *vxlanDevice) DelL3(n neighbor) error {
  172. log.V(4).Infof("calling NeighDel: %v, %v", n.IP, n.MAC)
  173. return netlink.NeighDel(&netlink.Neigh{
  174. LinkIndex: dev.link.Index,
  175. State: netlink.NUD_REACHABLE,
  176. Type: syscall.RTN_UNICAST,
  177. IP: n.IP.ToIP(),
  178. HardwareAddr: n.MAC,
  179. })
  180. }
  181. func (dev *vxlanDevice) MonitorMisses(misses chan *netlink.Neigh) {
  182. nlsock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
  183. if err != nil {
  184. log.Error("Failed to subscribe to netlink RTNLGRP_NEIGH messages")
  185. return
  186. }
  187. for {
  188. msgs, err := nlsock.Receive()
  189. if err != nil {
  190. log.Errorf("Failed to receive from netlink: %v ", err)
  191. time.Sleep(1 * time.Second)
  192. continue
  193. }
  194. for _, msg := range msgs {
  195. dev.processNeighMsg(msg, misses)
  196. }
  197. }
  198. }
  199. func isNeighResolving(state int) bool {
  200. return (state & (netlink.NUD_INCOMPLETE | netlink.NUD_STALE | netlink.NUD_DELAY | netlink.NUD_PROBE)) != 0
  201. }
  202. func (dev *vxlanDevice) processNeighMsg(msg syscall.NetlinkMessage, misses chan *netlink.Neigh) {
  203. neigh, err := netlink.NeighDeserialize(msg.Data)
  204. if err != nil {
  205. log.Error("Failed to deserialize netlink ndmsg: %v", err)
  206. return
  207. }
  208. if neigh.LinkIndex != dev.link.Index {
  209. return
  210. }
  211. if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
  212. return
  213. }
  214. if !isNeighResolving(neigh.State) {
  215. // misses come with NUD_STALE bit set
  216. return
  217. }
  218. misses <- neigh
  219. }
  220. func vxlanLinksIncompat(l1, l2 netlink.Link) string {
  221. if l1.Type() != l2.Type() {
  222. return fmt.Sprintf("link type: %v vs %v", l1.Type(), l2.Type())
  223. }
  224. v1 := l1.(*netlink.Vxlan)
  225. v2 := l2.(*netlink.Vxlan)
  226. if v1.VxlanId != v2.VxlanId {
  227. return fmt.Sprintf("vni: %v vs %v", v1.VxlanId, v2.VxlanId)
  228. }
  229. if v1.VtepDevIndex > 0 && v2.VtepDevIndex > 0 && v1.VtepDevIndex != v2.VtepDevIndex {
  230. return fmt.Sprintf("vtep (external) interface: %v vs %v", v1.VtepDevIndex, v2.VtepDevIndex)
  231. }
  232. if len(v1.SrcAddr) > 0 && len(v2.SrcAddr) > 0 && !v1.SrcAddr.Equal(v2.SrcAddr) {
  233. return fmt.Sprintf("vtep (external) IP: %v vs %v", v1.SrcAddr, v2.SrcAddr)
  234. }
  235. if len(v1.Group) > 0 && len(v2.Group) > 0 && !v1.Group.Equal(v2.Group) {
  236. return fmt.Sprintf("group address: %v vs %v", v1.Group, v2.Group)
  237. }
  238. if v1.L2miss != v2.L2miss {
  239. return fmt.Sprintf("l2miss: %v vs %v", v1.L2miss, v2.L2miss)
  240. }
  241. if v1.Port > 0 && v2.Port > 0 && v1.Port != v2.Port {
  242. return fmt.Sprintf("port: %v vs %v", v1.Port, v2.Port)
  243. }
  244. if v1.GBP != v2.GBP {
  245. return fmt.Sprintf("gbp: %v vs %v", v1.GBP, v2.GBP)
  246. }
  247. return ""
  248. }
  249. // sets IP4 addr on link
  250. func setAddr4(link *netlink.Vxlan, ipn *net.IPNet) error {
  251. // Ensure that the device has a /32 address so that no broadcast routes are created.
  252. // This IP is just used as a source address for host to workload traffic (so
  253. // the return path for the traffic has a decent address to use as the destination)
  254. ipn.Mask = net.CIDRMask(32, 32)
  255. addr := netlink.Addr{IPNet: ipn, Label: ""}
  256. if err := netlink.AddrAdd(link, &addr); err != nil {
  257. return fmt.Errorf("failed to add IP address %s to %s: %s", ipn.String(), link.Attrs().Name, err)
  258. }
  259. return nil
  260. }