|
@@ -17,13 +17,10 @@ package vxlan
|
|
import (
|
|
import (
|
|
"fmt"
|
|
"fmt"
|
|
"net"
|
|
"net"
|
|
- "os"
|
|
|
|
"syscall"
|
|
"syscall"
|
|
- "time"
|
|
|
|
|
|
|
|
log "github.com/golang/glog"
|
|
log "github.com/golang/glog"
|
|
"github.com/vishvananda/netlink"
|
|
"github.com/vishvananda/netlink"
|
|
- "github.com/vishvananda/netlink/nl"
|
|
|
|
|
|
|
|
"github.com/coreos/flannel/pkg/ip"
|
|
"github.com/coreos/flannel/pkg/ip"
|
|
)
|
|
)
|
|
@@ -41,17 +38,6 @@ type vxlanDevice struct {
|
|
link *netlink.Vxlan
|
|
link *netlink.Vxlan
|
|
}
|
|
}
|
|
|
|
|
|
-func sysctlSet(path, value string) error {
|
|
|
|
- f, err := os.Create(path)
|
|
|
|
- if err != nil {
|
|
|
|
- return err
|
|
|
|
- }
|
|
|
|
- defer f.Close()
|
|
|
|
-
|
|
|
|
- _, err = f.Write([]byte(value))
|
|
|
|
- return err
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {
|
|
func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {
|
|
link := &netlink.Vxlan{
|
|
link := &netlink.Vxlan{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
@@ -69,12 +55,6 @@ func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {
|
|
if err != nil {
|
|
if err != nil {
|
|
return nil, err
|
|
return nil, err
|
|
}
|
|
}
|
|
- // this enables ARP requests being sent to userspace via netlink
|
|
|
|
- sysctlPath := fmt.Sprintf("/proc/sys/net/ipv4/neigh/%s/app_solicit", devAttrs.name)
|
|
|
|
- if err := sysctlSet(sysctlPath, "3"); err != nil {
|
|
|
|
- return nil, err
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
return &vxlanDevice{
|
|
return &vxlanDevice{
|
|
link: link,
|
|
link: link,
|
|
}, nil
|
|
}, nil
|
|
@@ -84,6 +64,7 @@ func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) {
|
|
err := netlink.LinkAdd(vxlan)
|
|
err := netlink.LinkAdd(vxlan)
|
|
if err == syscall.EEXIST {
|
|
if err == syscall.EEXIST {
|
|
// it's ok if the device already exists as long as config is similar
|
|
// it's ok if the device already exists as long as config is similar
|
|
|
|
+ log.V(1).Infof("VXLAN device already exists")
|
|
existing, err := netlink.LinkByName(vxlan.Name)
|
|
existing, err := netlink.LinkByName(vxlan.Name)
|
|
if err != nil {
|
|
if err != nil {
|
|
return nil, err
|
|
return nil, err
|
|
@@ -91,6 +72,7 @@ func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) {
|
|
|
|
|
|
incompat := vxlanLinksIncompat(vxlan, existing)
|
|
incompat := vxlanLinksIncompat(vxlan, existing)
|
|
if incompat == "" {
|
|
if incompat == "" {
|
|
|
|
+ log.V(1).Infof("Returning existing device")
|
|
return existing.(*netlink.Vxlan), nil
|
|
return existing.(*netlink.Vxlan), nil
|
|
}
|
|
}
|
|
|
|
|
|
@@ -123,28 +105,37 @@ func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) {
|
|
}
|
|
}
|
|
|
|
|
|
func (dev *vxlanDevice) Configure(ipn ip.IP4Net) error {
|
|
func (dev *vxlanDevice) Configure(ipn ip.IP4Net) error {
|
|
- setAddr4(dev.link, ipn.ToIPNet())
|
|
|
|
|
|
+ addr := netlink.Addr{IPNet: ipn.ToIPNet()}
|
|
|
|
+ existingAddrs, err := netlink.AddrList(dev.link, netlink.FAMILY_V4)
|
|
|
|
+ if err != nil {
|
|
|
|
+ return err
|
|
|
|
+ }
|
|
|
|
|
|
- if err := netlink.LinkSetUp(dev.link); err != nil {
|
|
|
|
- return fmt.Errorf("failed to set interface %s to UP state: %s", dev.link.Attrs().Name, err)
|
|
|
|
|
|
+ // flannel will never make this happen. This situation can only be caused by a user, so get them to sort it out.
|
|
|
|
+ if len(existingAddrs) > 1 {
|
|
|
|
+ return fmt.Errorf("link has incompatible addresses. Remove additional addresses and try again. %s", dev.link)
|
|
}
|
|
}
|
|
|
|
|
|
- // explicitly add a route since there might be a route for a subnet already
|
|
|
|
- // installed by Docker and then it won't get auto added
|
|
|
|
- route := netlink.Route{
|
|
|
|
- LinkIndex: dev.link.Attrs().Index,
|
|
|
|
- Scope: netlink.SCOPE_UNIVERSE,
|
|
|
|
- Dst: ipn.Network().ToIPNet(),
|
|
|
|
|
|
+ // If the device has an incompatible address then delete it. This can happen if the lease changes for example.
|
|
|
|
+ if len(existingAddrs) == 1 && !existingAddrs[0].Equal(addr) {
|
|
|
|
+ if err := netlink.AddrDel(dev.link, &existingAddrs[0]); err != nil {
|
|
|
|
+ return fmt.Errorf("failed to remove IP address %s from %s: %s", ipn.String(), dev.link.Attrs().Name, err)
|
|
|
|
+ }
|
|
|
|
+ existingAddrs = []netlink.Addr{}
|
|
}
|
|
}
|
|
- if err := netlink.RouteAdd(&route); err != nil && err != syscall.EEXIST {
|
|
|
|
- return fmt.Errorf("failed to add route (%s -> %s): %v", ipn.Network().String(), dev.link.Attrs().Name, err)
|
|
|
|
|
|
+
|
|
|
|
+ // Actually add the desired address to the interface if needed.
|
|
|
|
+ if len(existingAddrs) == 0 {
|
|
|
|
+ if err := netlink.AddrAdd(dev.link, &addr); err != nil {
|
|
|
|
+ return fmt.Errorf("failed to add IP address %s to %s: %s", ipn.String(), dev.link.Attrs().Name, err)
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- return nil
|
|
|
|
-}
|
|
|
|
|
|
+ if err := netlink.LinkSetUp(dev.link); err != nil {
|
|
|
|
+ return fmt.Errorf("failed to set interface %s to UP state: %s", dev.link.Attrs().Name, err)
|
|
|
|
+ }
|
|
|
|
|
|
-func (dev *vxlanDevice) Destroy() {
|
|
|
|
- netlink.LinkDel(dev.link)
|
|
|
|
|
|
+ return nil
|
|
}
|
|
}
|
|
|
|
|
|
func (dev *vxlanDevice) MACAddr() net.HardwareAddr {
|
|
func (dev *vxlanDevice) MACAddr() net.HardwareAddr {
|
|
@@ -160,14 +151,9 @@ type neighbor struct {
|
|
IP ip.IP4
|
|
IP ip.IP4
|
|
}
|
|
}
|
|
|
|
|
|
-func (dev *vxlanDevice) GetL2List() ([]netlink.Neigh, error) {
|
|
|
|
- log.V(4).Infof("calling GetL2List() dev.link.Index: %d ", dev.link.Index)
|
|
|
|
- return netlink.NeighList(dev.link.Index, syscall.AF_BRIDGE)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func (dev *vxlanDevice) AddL2(n neighbor) error {
|
|
|
|
- log.V(4).Infof("calling NeighAdd: %v, %v", n.IP, n.MAC)
|
|
|
|
- return netlink.NeighAdd(&netlink.Neigh{
|
|
|
|
|
|
+func (dev *vxlanDevice) AddFDB(n neighbor) error {
|
|
|
|
+ log.V(4).Infof("calling AddFDB: %v, %v", n.IP, n.MAC)
|
|
|
|
+ return netlink.NeighSet(&netlink.Neigh{
|
|
LinkIndex: dev.link.Index,
|
|
LinkIndex: dev.link.Index,
|
|
State: netlink.NUD_PERMANENT,
|
|
State: netlink.NUD_PERMANENT,
|
|
Family: syscall.AF_BRIDGE,
|
|
Family: syscall.AF_BRIDGE,
|
|
@@ -177,8 +163,8 @@ func (dev *vxlanDevice) AddL2(n neighbor) error {
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
|
|
-func (dev *vxlanDevice) DelL2(n neighbor) error {
|
|
|
|
- log.V(4).Infof("calling NeighDel: %v, %v", n.IP, n.MAC)
|
|
|
|
|
|
+func (dev *vxlanDevice) DelFDB(n neighbor) error {
|
|
|
|
+ log.V(4).Infof("calling DelFDB: %v, %v", n.IP, n.MAC)
|
|
return netlink.NeighDel(&netlink.Neigh{
|
|
return netlink.NeighDel(&netlink.Neigh{
|
|
LinkIndex: dev.link.Index,
|
|
LinkIndex: dev.link.Index,
|
|
Family: syscall.AF_BRIDGE,
|
|
Family: syscall.AF_BRIDGE,
|
|
@@ -188,77 +174,28 @@ func (dev *vxlanDevice) DelL2(n neighbor) error {
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
|
|
-func (dev *vxlanDevice) AddL3(n neighbor) error {
|
|
|
|
- log.V(4).Infof("calling NeighSet: %v, %v", n.IP, n.MAC)
|
|
|
|
|
|
+func (dev *vxlanDevice) AddARP(n neighbor) error {
|
|
|
|
+ log.V(4).Infof("calling AddARP: %v, %v", n.IP, n.MAC)
|
|
return netlink.NeighSet(&netlink.Neigh{
|
|
return netlink.NeighSet(&netlink.Neigh{
|
|
LinkIndex: dev.link.Index,
|
|
LinkIndex: dev.link.Index,
|
|
- State: netlink.NUD_REACHABLE,
|
|
|
|
|
|
+ State: netlink.NUD_PERMANENT,
|
|
Type: syscall.RTN_UNICAST,
|
|
Type: syscall.RTN_UNICAST,
|
|
IP: n.IP.ToIP(),
|
|
IP: n.IP.ToIP(),
|
|
HardwareAddr: n.MAC,
|
|
HardwareAddr: n.MAC,
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
|
|
-func (dev *vxlanDevice) DelL3(n neighbor) error {
|
|
|
|
- log.V(4).Infof("calling NeighDel: %v, %v", n.IP, n.MAC)
|
|
|
|
|
|
+func (dev *vxlanDevice) DelARP(n neighbor) error {
|
|
|
|
+ log.V(4).Infof("calling DelARP: %v, %v", n.IP, n.MAC)
|
|
return netlink.NeighDel(&netlink.Neigh{
|
|
return netlink.NeighDel(&netlink.Neigh{
|
|
LinkIndex: dev.link.Index,
|
|
LinkIndex: dev.link.Index,
|
|
- State: netlink.NUD_REACHABLE,
|
|
|
|
|
|
+ State: netlink.NUD_PERMANENT,
|
|
Type: syscall.RTN_UNICAST,
|
|
Type: syscall.RTN_UNICAST,
|
|
IP: n.IP.ToIP(),
|
|
IP: n.IP.ToIP(),
|
|
HardwareAddr: n.MAC,
|
|
HardwareAddr: n.MAC,
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
|
|
-func (dev *vxlanDevice) MonitorMisses(misses chan *netlink.Neigh) {
|
|
|
|
- nlsock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
|
|
|
|
- if err != nil {
|
|
|
|
- log.Error("Failed to subscribe to netlink RTNLGRP_NEIGH messages")
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- for {
|
|
|
|
- msgs, err := nlsock.Receive()
|
|
|
|
- if err != nil {
|
|
|
|
- log.Errorf("Failed to receive from netlink: %v ", err)
|
|
|
|
-
|
|
|
|
- time.Sleep(1 * time.Second)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- for _, msg := range msgs {
|
|
|
|
- dev.processNeighMsg(msg, misses)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func isNeighResolving(state int) bool {
|
|
|
|
- return (state & (netlink.NUD_INCOMPLETE | netlink.NUD_STALE | netlink.NUD_DELAY | netlink.NUD_PROBE)) != 0
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func (dev *vxlanDevice) processNeighMsg(msg syscall.NetlinkMessage, misses chan *netlink.Neigh) {
|
|
|
|
- neigh, err := netlink.NeighDeserialize(msg.Data)
|
|
|
|
- if err != nil {
|
|
|
|
- log.Error("Failed to deserialize netlink ndmsg: %v", err)
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if neigh.LinkIndex != dev.link.Index {
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if !isNeighResolving(neigh.State) {
|
|
|
|
- // misses come with NUD_STALE bit set
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- misses <- neigh
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
func vxlanLinksIncompat(l1, l2 netlink.Link) string {
|
|
func vxlanLinksIncompat(l1, l2 netlink.Link) string {
|
|
if l1.Type() != l2.Type() {
|
|
if l1.Type() != l2.Type() {
|
|
return fmt.Sprintf("link type: %v vs %v", l1.Type(), l2.Type())
|
|
return fmt.Sprintf("link type: %v vs %v", l1.Type(), l2.Type())
|
|
@@ -297,17 +234,3 @@ func vxlanLinksIncompat(l1, l2 netlink.Link) string {
|
|
|
|
|
|
return ""
|
|
return ""
|
|
}
|
|
}
|
|
-
|
|
|
|
-// sets IP4 addr on link
|
|
|
|
-func setAddr4(link *netlink.Vxlan, ipn *net.IPNet) error {
|
|
|
|
- // Ensure that the device has a /32 address so that no broadcast routes are created.
|
|
|
|
- // This IP is just used as a source address for host to workload traffic (so
|
|
|
|
- // the return path for the traffic has a decent address to use as the destination)
|
|
|
|
- ipn.Mask = net.CIDRMask(32, 32)
|
|
|
|
- addr := netlink.Addr{IPNet: ipn, Label: ""}
|
|
|
|
- if err := netlink.AddrAdd(link, &addr); err != nil {
|
|
|
|
- return fmt.Errorf("failed to add IP address %s to %s: %s", ipn.String(), link.Attrs().Name, err)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return nil
|
|
|
|
-}
|
|
|