فهرست منبع

backend/vxlan: Add support for "direct routing"

This skips vxlan encapsulation if the hosts are on the same subnet
Tom Denham 7 سال پیش
والد
کامیت
bd001e1c39
5فایلهای تغییر یافته به همراه94 افزوده شده و 50 حذف شده
  1. 1 0
      Documentation/backends.md
  2. 2 1
      backend/vxlan/device.go
  3. 11 3
      backend/vxlan/vxlan.go
  4. 78 44
      backend/vxlan/vxlan_network.go
  5. 2 2
      dist/functional-test.sh

+ 1 - 0
Documentation/backends.md

@@ -22,6 +22,7 @@ Type and options:
 * `VNI` (number): VXLAN Identifier (VNI) to be used. Defaults to 1.
 * `Port` (number): UDP port to use for sending encapsulated packets. Defaults to kernel default, currently 8472.
 * `GBP` (Boolean): Enable [VXLAN Group Based Policy](https://github.com/torvalds/linux/commit/3511494ce2f3d3b77544c79b87511a4ddb61dc89).  Defaults to `false`.
+* `DirectRouting` (Boolean): Enable direct routes (like `host-gw`) when the hosts are on the same subnet. VXLAN will only be used to encapsulate packets to hosts on different subnets. Defaults to `false`.
 
 ### host-gw
 

+ 2 - 1
backend/vxlan/device.go

@@ -35,7 +35,8 @@ type vxlanDeviceAttrs struct {
 }
 
 type vxlanDevice struct {
-	link *netlink.Vxlan
+	link          *netlink.Vxlan
+	directRouting bool
 }
 
 func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {

+ 11 - 3
backend/vxlan/vxlan.go

@@ -47,12 +47,17 @@ package vxlan
 // 3) Create an FDB entry with the VTEP MAC and the public IP of the remote flannel daemon.
 //
 // In this scheme the scaling of table entries is linear to the number of remote hosts - 1 route, 1 arp entry and 1 FDB entry per host
+//
+// In this newest scheme, there is also the option of skipping the use of vxlan for hosts that are on the same subnet,
+// this is called "directRouting"
 
 import (
 	"encoding/json"
 	"fmt"
 	"net"
 
+	log "github.com/golang/glog"
+
 	"golang.org/x/net/context"
 
 	"github.com/coreos/flannel/backend"
@@ -98,9 +103,10 @@ func newSubnetAttrs(publicIP net.IP, mac net.HardwareAddr) (*subnet.LeaseAttrs,
 func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Config) (backend.Network, error) {
 	// Parse our configuration
 	cfg := struct {
-		VNI  int
-		Port int
-		GBP  bool
+		VNI           int
+		Port          int
+		GBP           bool
+		DirectRouting bool
 	}{
 		VNI: defaultVNI,
 	}
@@ -110,6 +116,7 @@ func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Conf
 			return nil, fmt.Errorf("error decoding VXLAN backend config: %v", err)
 		}
 	}
+	log.Infof("VXLAN config: VNI=%d Port=%d GBP=%v DirectRouting=%v", cfg.VNI, cfg.Port, cfg.GBP, cfg.DirectRouting)
 
 	devAttrs := vxlanDeviceAttrs{
 		vni:       uint32(cfg.VNI),
@@ -124,6 +131,7 @@ func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Conf
 	if err != nil {
 		return nil, err
 	}
+	dev.directRouting = cfg.DirectRouting
 
 	subnetAttrs, err := newSubnetAttrs(be.extIface.ExtAddr, dev.MACAddr())
 	if err != nil {

+ 78 - 44
backend/vxlan/vxlan_network.go

@@ -85,78 +85,112 @@ type vxlanLeaseAttrs struct {
 
 func (nw *network) handleSubnetEvents(batch []subnet.Event) {
 	for _, event := range batch {
-		if event.Lease.Attrs.BackendType != "vxlan" {
-			log.Warningf("ignoring non-vxlan subnet(%s): type=%v", event.Lease.Subnet, event.Lease.Attrs.BackendType)
+		sn := event.Lease.Subnet
+		attrs := event.Lease.Attrs
+		if attrs.BackendType != "vxlan" {
+			log.Warningf("ignoring non-vxlan subnet(%s): type=%v", sn, attrs.BackendType)
 			continue
 		}
 
-		var attrs vxlanLeaseAttrs
-		if err := json.Unmarshal(event.Lease.Attrs.BackendData, &attrs); err != nil {
+		var vxlanAttrs vxlanLeaseAttrs
+		if err := json.Unmarshal(attrs.BackendData, &vxlanAttrs); err != nil {
 			log.Error("error decoding subnet lease JSON: ", err)
 			continue
 		}
 
-		route := netlink.Route{
+		// This route is used when traffic should be vxlan encapsulated
+		vxlanRoute := netlink.Route{
 			LinkIndex: nw.dev.link.Attrs().Index,
 			Scope:     netlink.SCOPE_UNIVERSE,
-			Dst:       event.Lease.Subnet.ToIPNet(),
-			Gw:        event.Lease.Subnet.IP.ToIP(),
+			Dst:       sn.ToIPNet(),
+			Gw:        sn.IP.ToIP(),
 		}
-		route.SetFlag(syscall.RTNH_F_ONLINK)
+		vxlanRoute.SetFlag(syscall.RTNH_F_ONLINK)
 
-		switch event.Type {
-		case subnet.EventAdded:
-			log.V(2).Infof("adding subnet: %s PublicIP: %s VtepMAC: %s", event.Lease.Subnet, event.Lease.Attrs.PublicIP, net.HardwareAddr(attrs.VtepMAC))
-
-			if err := nw.dev.AddARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-				log.Error("AddARP failed: ", err)
+		// directRouting is where the remote host is on the same subnet so vxlan isn't required.
+		directRoute := netlink.Route{
+			Dst: sn.ToIPNet(),
+			Gw:  attrs.PublicIP.ToIP(),
+		}
+		var directRoutingOK = false
+		if nw.dev.directRouting {
+			routes, err := netlink.RouteGet(attrs.PublicIP.ToIP())
+			if err != nil {
+				log.Errorf("Couldn't lookup route to %v: %v", attrs.PublicIP, err)
 				continue
 			}
+			if len(routes) == 1 && routes[0].Gw == nil {
+				// There is only a single route and there's no gateway (i.e. it's directly connected)
+				directRoutingOK = true
+			}
+		}
 
-			if err := nw.dev.AddFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-				log.Error("AddFDB failed: ", err)
+		switch event.Type {
+		case subnet.EventAdded:
+			if directRoutingOK {
+				log.V(2).Infof("Adding direct route to subnet: %s PublicIP: %s", sn, attrs.PublicIP)
 
-				// Try to clean up the ARP entry then continue
-				if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-					log.Error("DelARP failed: ", err)
+				if err := netlink.RouteReplace(&directRoute); err != nil {
+					log.Errorf("Error adding route to %v via %v: %v", sn, attrs.PublicIP, err)
+					continue
+				}
+			} else {
+				log.V(2).Infof("adding subnet: %s PublicIP: %s VtepMAC: %s", sn, attrs.PublicIP, net.HardwareAddr(vxlanAttrs.VtepMAC))
+				if err := nw.dev.AddARP(neighbor{IP: sn.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+					log.Error("AddARP failed: ", err)
+					continue
 				}
 
-				continue
-			}
+				if err := nw.dev.AddFDB(neighbor{IP: attrs.PublicIP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+					log.Error("AddFDB failed: ", err)
 
-			// Set the route - the kernel would ARP for the Gw IP address if it hadn't already been set above so make sure
-			// this is done last.
-			if err := netlink.RouteReplace(&route); err != nil {
-				log.Errorf("failed to add route (%s -> %s): %v", route.Dst, route.Gw, err)
+					// Try to clean up the ARP entry then continue
+					if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+						log.Error("DelARP failed: ", err)
+					}
 
-				// Try to clean up both the ARP and FDB entries then continue
-				if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-					log.Error("DelARP failed: ", err)
+					continue
 				}
 
-				if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-					log.Error("DelFDB failed: ", err)
-				}
+				// Set the route - the kernel would ARP for the Gw IP address if it hadn't already been set above so make sure
+				// this is done last.
+				if err := netlink.RouteReplace(&vxlanRoute); err != nil {
+					log.Errorf("failed to add vxlanRoute (%s -> %s): %v", vxlanRoute.Dst, vxlanRoute.Gw, err)
 
-				continue
-			}
+					// Try to clean up both the ARP and FDB entries then continue
+					if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+						log.Error("DelARP failed: ", err)
+					}
 
-		case subnet.EventRemoved:
-			log.V(2).Infof("removing subnet: %s PublicIP: %s VtepMAC: %s", event.Lease.Subnet, event.Lease.Attrs.PublicIP, net.HardwareAddr(attrs.VtepMAC))
+					if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+						log.Error("DelFDB failed: ", err)
+					}
 
-			// Try to remove all entries - don't bail out if one of them fails.
-			if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-				log.Error("DelARP failed: ", err)
+					continue
+				}
 			}
+		case subnet.EventRemoved:
+			if directRoutingOK {
+				log.V(2).Infof("Removing direct route to subnet: %s PublicIP: %s", sn, attrs.PublicIP)
+				if err := netlink.RouteDel(&directRoute); err != nil {
+					log.Errorf("Error deleting route to %v via %v: %v", sn, attrs.PublicIP, err)
+				}
+			} else {
+				log.V(2).Infof("removing subnet: %s PublicIP: %s VtepMAC: %s", sn, attrs.PublicIP, net.HardwareAddr(vxlanAttrs.VtepMAC))
 
-			if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
-				log.Error("DelFDB failed: ", err)
-			}
+				// Try to remove all entries - don't bail out if one of them fails.
+				if err := nw.dev.DelARP(neighbor{IP: sn.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+					log.Error("DelARP failed: ", err)
+				}
 
-			if err := netlink.RouteDel(&route); err != nil {
-				log.Errorf("failed to delete route (%s -> %s): %v", route.Dst, route.Gw, err)
-			}
+				if err := nw.dev.DelFDB(neighbor{IP: attrs.PublicIP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
+					log.Error("DelFDB failed: ", err)
+				}
 
+				if err := netlink.RouteDel(&vxlanRoute); err != nil {
+					log.Errorf("failed to delete vxlanRoute (%s -> %s): %v", vxlanRoute.Dst, vxlanRoute.Gw, err)
+				}
+			}
 		default:
 			log.Error("internal error: unknown event type: ", int(event.Type))
 		}

+ 2 - 2
dist/functional-test.sh

@@ -143,8 +143,8 @@ multi_test() {
         docker run --name=flannel-host$host -d -it --privileged --entrypoint /bin/sh $flannel_img
 
         # Start two flanneld instances
-        docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -subnet-file /vxlan.env -etcd-prefix=/vxlan/network --etcd-endpoints=$etcd_endpt 2>vxlan.log"
-        docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -subnet-file /hostgw.env -etcd-prefix=/hostgw/network --etcd-endpoints=$etcd_endpt 2>hostgw.log"
+        docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -v 10 -subnet-file /vxlan.env -etcd-prefix=/vxlan/network --etcd-endpoints=$etcd_endpt 2>vxlan.log"
+        docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -v 10 -subnet-file /hostgw.env -etcd-prefix=/hostgw/network --etcd-endpoints=$etcd_endpt 2>hostgw.log"
     done
 
 	echo flannels running