Browse Source

Basic overlay network using UDP encapsulation

Eugene Yakubovich 10 years ago
parent
commit
e5dd141b95
17 changed files with 1641 additions and 16 deletions
  1. 51 16
      README.md
  2. 7 0
      backend/common.go
  3. 151 0
      main.go
  4. 28 0
      pkg/endianess.go
  5. 60 0
      pkg/iface.go
  6. 157 0
      pkg/ipnet.go
  7. 107 0
      pkg/ipnet_test.go
  8. 17 0
      pkg/rand.go
  9. 51 0
      pkg/tun.go
  10. 52 0
      subnet/config.go
  11. 57 0
      subnet/config_test.go
  12. 410 0
      subnet/subnet.go
  13. BIN
      subnet/subnet.test
  14. 202 0
      subnet/subnet_test.go
  15. 103 0
      udp/router.go
  16. 185 0
      udp/run.go
  17. 3 0
      version.go

+ 51 - 16
README.md

@@ -1,6 +1,6 @@
 # kolach
 
-kolach is a point to point VPN that assigns a subnet to each machine for use with
+kolach is an overlay network that gives a subnet to each machine for use with
 k8s.
 
 In k8s every machine in the cluster is assigned a full subnet. The machine A
@@ -12,25 +12,60 @@ disadvantage is that the only cloud provider that can do this is GCE.
 
 To emulate the Kubernetes model from GCE on other platforms we need to create
 an overlay network on top of the network that we are given from cloud
-providers. Not a fun task but certainly doable.
+providers. Kolach uses the Universal TUN/TAP device and creates an overlay network
+using UDP to encapsulate IP packets. The subnet allocation is done with the help
+of etcd which maintains the overlay to actual IP mappings.
 
-There are few prototype steps we need to explore to bring this together:
+## Configuration
 
-1) Get openvpn (or some similar product) working inside of a container and
-bridging a subnet between CoreOS machines.
+Kolach reads its configuration from etcd. By default, it will read the configuration
+from ```/coreos.com/network/config``` (can be overridden via --etcd-prefix).
+The value of the config should be a JSON dictionary with the following keys:
 
-This blog post outline a configuration that can probably work for openvpn:
-http://blog.wains.be/2008/06/07/routed-openvpn-between-two-subnets-behind-nat-gateways/
+* ```Network``` (string): IPv4 network in CIDR format to use for the entire overlay network. This
+is the only mandatory key.
 
-2) Get two containers connected via this overlay network. The simplest place to
-start would be to createa an interface alias for the openvpn tap device, give
-the container the host networking namespace and then have it bind on that interface.
+* ```HostSubnet``` (number): The size of the subnet allocated to each host. Defaults to 24 (i.e. /24) unless
+the Network was configured to be less than a /24 in which case it is one less than the network.
 
-3) Write a thing that uses etcd to register machines preferred network ip for
-every machine in the cluster to connect to. Machines in the network should
-create a new openvpn connection for every registered machine and ensure it is
-up.
+* ```FirstIP``` (string): The beginning of IP range which the subnet allocation should start with. Defaults
+to the value of Network.
 
-4) Configure this whole thing using etcd and hold the network keys in etcd too.
+* ```LastIP``` (string): The end of the IP range at which the subnet allocation should end with. Defaults to
+one host-sized subnet prior to end of Network range.
 
-5) Ship it!
+## Running
+
+Once you have pushed configuration JSON to etcd, you can start kolach. If you published your
+config at the default location, you can start kolach with no arguments. Kolach will acquire a
+subnet lease, configure its routes based on other leases in the overlay network and start
+routing packets. Additionally it will monitor etcd for new members of the network and adjust
+its routing table accordingly.
+
+After kolach has acquired the subnet and configured the TUN device, it will write out an
+environment variable file (```/run/kolach/subnet.env``` by default) with subnet address and
+MTU that it supports.
+
+## Key command line options
+
+```
+-etcd-endpoint="http://127.0.0.1:4001": etcd endpoint
+-etcd-prefix="/coreos.com/network": etcd prefix
+-iface="": interface to use (IP or name) for inter-host communication. Defaults to the interface for the default route on the machine.
+-port=4242: port to use for inter-node communications
+-subnet-file="/run/kolach/subnet.env": filename where env variables (subnet and MTU values) will be written to
+-v=0: log level for V logs. Set to 1 to see messages related to data path
+```
+
+## Docker integration
+
+Docker daemon accepts ```--bip``` argument to configure the subnet of the docker0 bridge. It also accepts ```--mtu``` to set the MTU
+for docker0 and veth devices that it will be creating. Since kolach writes out the acquired subnet and MTU values into
+a file, the script starting Docker daemon can source in the values and pass them to Docker daemon:
+
+```bash
+source /run/kolach/subnet.env
+docker -d --bip=${KOLACH_SUBNET} --mtu=${KOLACH_MTU}
+```
+
+Systemd users can use ```EnvironmentFile``` directive in the .service file to pull in ```/run/kolach/subnet.env```

+ 7 - 0
backend/common.go

@@ -0,0 +1,7 @@
+package backend
+
+import (
+	"github.com/coreos-inc/kolach/pkg"
+)
+
+type ReadyFunc func(sn pkg.IP4Net, mtu int)

+ 151 - 0
main.go

@@ -0,0 +1,151 @@
+package main
+
+import (
+	"fmt"
+	"net"
+	"os"
+	"time"
+	"flag"
+	"path"
+
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
+	log "github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/golang/glog"
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/coreos/go-systemd/daemon"
+
+	"github.com/coreos-inc/kolach/pkg"
+	"github.com/coreos-inc/kolach/subnet"
+	"github.com/coreos-inc/kolach/udp"
+)
+
+const (
+	defaultPort = 4242
+)
+
+type CmdLineOpts struct {
+	etcdEndpoint string
+	etcdPrefix   string
+	help         bool
+	version      bool
+	port         int
+	subnetFile   string
+	iface        string
+}
+
+var opts CmdLineOpts
+
+func init() {
+	flag.StringVar(&opts.etcdEndpoint, "etcd-endpoint", "http://127.0.0.1:4001", "etcd endpoint")
+	flag.StringVar(&opts.etcdPrefix, "etcd-prefix", "/coreos.com/network", "etcd prefix")
+	flag.IntVar(&opts.port, "port", defaultPort, "port to use for inter-node communications")
+	flag.StringVar(&opts.subnetFile, "subnet-file", "/run/kolach/subnet.env", "filename where env variables (subnet and MTU values) will be written to")
+	flag.StringVar(&opts.iface, "iface", "", "interface to use (IP or name) for inter-host communication")
+	flag.BoolVar(&opts.help, "help", false, "print this message")
+	flag.BoolVar(&opts.version, "version", false, "print version and exit")
+}
+
+func writeSubnet(sn pkg.IP4Net, mtu int) error {
+	// Write out the first usable IP by incrementing
+	// sn.IP by one
+	sn.IP += 1
+
+	dir, _ := path.Split(opts.subnetFile)
+	os.MkdirAll(dir, 0755)
+
+	f, err := os.Create(opts.subnetFile)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	fmt.Fprintf(f, "KOLACH_SUBNET=%s\n", sn)
+	fmt.Fprintf(f, "KOLACH_MTU=%d\n", mtu)
+	return nil
+}
+
+func lookupIface() (*net.Interface, net.IP) {
+	var iface *net.Interface
+	var ip net.IP
+	var err error
+
+	if len(opts.iface) > 0 {
+		if ip = net.ParseIP(opts.iface); ip != nil {
+			iface, err = pkg.GetInterfaceByIP(ip)
+			if err != nil {
+				log.Errorf("Error looking up interface %s: %s", opts.iface, err)
+				return nil, nil
+			}
+		} else {
+			iface, err = net.InterfaceByName(opts.iface)
+			if err != nil {
+				log.Errorf("Error looking up interface %s: %s", opts.iface, err)
+				return nil, nil
+			}
+		}
+	} else {
+		log.Info("Determining IP address of default interface")
+		for {
+			if iface, err = pkg.GetDefaultGatewayIface(); err == nil {
+				break
+			}
+			log.Error("Failed to get default interface: ", err)
+			time.Sleep(time.Second)
+		}
+	}
+
+	if ip == nil {
+		ip, err = pkg.GetIfaceIP4Addr(iface)
+		if err != nil {
+			log.Error("Failed to find IPv4 address for interface ", iface.Name)
+		}
+	}
+
+	return iface, ip
+}
+
+func makeSubnetManager() *subnet.SubnetManager {
+	etcdCli := etcd.NewClient([]string{opts.etcdEndpoint})
+
+	for {
+		sm, err := subnet.NewSubnetManager(etcdCli, opts.etcdPrefix)
+		if err == nil {
+			return sm
+		}
+
+		log.Error("Failed to create SubnetManager: ", err)
+		time.Sleep(time.Second)
+	}
+}
+
+func main() {
+	// glog will log to tmp files by default. override so all entries
+	// can flow into journald (if running under systemd)
+	flag.Set("logtostderr", "true")
+
+	// now parse command line args
+	flag.Parse()
+
+	if opts.help {
+		fmt.Fprintf(os.Stderr, "Usage: %s [OPTION]...\n", os.Args[0])
+		flag.PrintDefaults()
+		os.Exit(0)
+	}
+
+	if opts.version {
+		fmt.Fprintln(os.Stderr, Version)
+		os.Exit(0)
+	}
+
+	iface, ip := lookupIface()
+	if iface == nil || ip == nil {
+		return
+	}
+
+	log.Infof("Using %s to tunnel", ip)
+
+	sm := makeSubnetManager()
+
+	udp.Run(sm, iface, ip, opts.port, func(sn pkg.IP4Net, mtu int) {
+		writeSubnet(sn, mtu)
+		daemon.SdNotify("READY=1")
+	})
+}

+ 28 - 0
pkg/endianess.go

@@ -0,0 +1,28 @@
+package pkg
+
+// Taken from a patch by David Anderson who submitted it
+// but got rejected by the golang team
+
+import (
+	"encoding/binary"
+	"unsafe"
+)
+
+// NativeEndian is the ByteOrder of the current system.
+var NativeEndian binary.ByteOrder
+
+func init() {
+	// Examine the memory layout of an int16 to determine system
+	// endianness.
+	var one int16 = 1
+	b := (*byte)(unsafe.Pointer(&one))
+	if *b == 0 {
+		NativeEndian = binary.BigEndian
+	} else {
+		NativeEndian = binary.LittleEndian
+	}
+}
+
+func NativelyLittle() bool {
+	return NativeEndian == binary.LittleEndian
+}

+ 60 - 0
pkg/iface.go

@@ -0,0 +1,60 @@
+package pkg
+
+import (
+	"errors"
+	"net"
+
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/docker/libcontainer/netlink"
+)
+
+func GetIfaceIP4Addr(iface *net.Interface) (net.IP, error) {
+	addrs, err := iface.Addrs()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, addr := range addrs {
+		// Attempt to parse the address in CIDR notation
+		// and assert it is IPv4
+		ip, _, err := net.ParseCIDR(addr.String())
+		if err == nil && ip.To4() != nil {
+			return ip.To4(), nil
+		}
+	}
+
+	return nil, errors.New("No IPv4 address found for given interface")
+}
+
+func GetDefaultGatewayIface() (*net.Interface, error) {
+	routes, err := netlink.NetworkGetRoutes()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, route := range routes {
+		if route.Default {
+			if route.Iface == nil {
+				return nil, errors.New("Found default route but could not determine interface")
+			}
+			return route.Iface, nil
+		}
+	}
+
+	return nil, errors.New("Unable to find default route")
+}
+
+func GetInterfaceByIP(ip net.IP) (*net.Interface, error) {
+	ifaces, err := net.Interfaces()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, iface := range ifaces {
+		addr, err := GetIfaceIP4Addr(&iface)
+		if err != nil && ip.Equal(addr) {
+			return &iface, nil
+		}
+	}
+
+	return nil, errors.New("No interface with given IP found")
+}

+ 157 - 0
pkg/ipnet.go

@@ -0,0 +1,157 @@
+package pkg
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"net"
+)
+
+type IP4 uint32
+
+func FromBytes(ip []byte) IP4 {
+	if NativelyLittle() {
+		return IP4(uint32(ip[3]) |
+			(uint32(ip[2]) << 8) |
+			(uint32(ip[1]) << 16) |
+			(uint32(ip[0]) << 24))
+	} else {
+		return IP4(uint32(ip[0]) |
+			(uint32(ip[1]) << 8) |
+			(uint32(ip[2]) << 16) |
+			(uint32(ip[3]) << 24))
+	}
+}
+
+func FromIP(ip net.IP) IP4 {
+	return FromBytes(ip.To4())
+}
+
+func ParseIP4(s string) (IP4, error) {
+	ip := net.ParseIP(s)
+	if ip == nil {
+		return IP4(0), errors.New("Invalid IP address format")
+	}
+	return FromIP(ip), nil
+}
+
+func (ip IP4) Octets() (a, b, c, d byte) {
+	if NativelyLittle() {
+		a, b, c, d = byte(ip>>24), byte(ip>>16), byte(ip>>8), byte(ip)
+	} else {
+		a, b, c, d = byte(ip), byte(ip>>8), byte(ip>>16), byte(ip>>24)
+	}
+	return
+}
+
+func (ip IP4) ToIP() net.IP {
+	return net.IPv4(ip.Octets())
+}
+
+func (ip IP4) String() string {
+	return ip.ToIP().String()
+}
+
+func (ip IP4) StringSep(sep string) string {
+	a, b, c, d := ip.Octets()
+	return fmt.Sprintf("%d%s%d%s%d%s%d", a, sep, b, sep, c, sep, d)
+}
+
+// json.Marshaler impl
+func (ip IP4) MarshalJSON() ([]byte, error) {
+	return []byte(fmt.Sprintf(`"%s"`, ip)), nil
+}
+
+// json.Unmarshaler impl
+func (ip *IP4) UnmarshalJSON(j []byte) error {
+	j = bytes.Trim(j, "\"")
+	if val, err := ParseIP4(string(j)); err != nil {
+		return err
+	} else {
+		*ip = val
+		return nil
+	}
+}
+
+// similar to net.IPNet but has uint based representation
+type IP4Net struct {
+	IP        IP4
+	PrefixLen uint
+}
+
+func (n IP4Net) String() string {
+	return fmt.Sprintf("%s/%d", n.IP.String(), n.PrefixLen)
+}
+
+func (n IP4Net) StringSep(octetSep, prefixSep string) string {
+	return fmt.Sprintf("%s%s%d", n.IP.StringSep(octetSep), prefixSep, n.PrefixLen)
+}
+
+func (n IP4Net) Network() IP4Net {
+	return IP4Net{
+		n.IP & IP4(n.Mask()),
+		n.PrefixLen,
+	}
+}
+
+func (n IP4Net) Next() IP4Net {
+	return IP4Net{
+		n.IP + (1 << (32 - n.PrefixLen)),
+		n.PrefixLen,
+	}
+}
+
+func FromIPNet(n *net.IPNet) IP4Net {
+	prefixLen, _ := n.Mask.Size()
+	return IP4Net{
+		FromIP(n.IP),
+		uint(prefixLen),
+	}
+}
+
+func (n IP4Net) ToIPNet() *net.IPNet {
+	return &net.IPNet{
+		IP:   n.IP.ToIP(),
+		Mask: net.CIDRMask(int(n.PrefixLen), 32),
+	}
+}
+
+func (n IP4Net) Overlaps(other IP4Net) bool {
+	var mask uint32
+	if n.PrefixLen < other.PrefixLen {
+		mask = n.Mask()
+	} else {
+		mask = other.Mask()
+	}
+	return (uint32(n.IP) & mask) == (uint32(other.IP) & mask)
+}
+
+func (n IP4Net) Equal(other IP4Net) bool {
+	return n.IP == other.IP && n.PrefixLen == other.PrefixLen
+}
+
+func (n IP4Net) Mask() uint32 {
+	var ones uint32 = 0xFFFFFFFF
+	return ones << (32 - n.PrefixLen)
+}
+
+func (n IP4Net) Contains(ip IP4) bool {
+	return (uint32(n.IP) & n.Mask()) == (uint32(ip) & n.Mask())
+}
+
+// json.Marshaler impl
+func (n IP4Net) MarshalJSON() ([]byte, error) {
+	return []byte(fmt.Sprintf(`"%s"`, n)), nil
+}
+
+// json.Unmarshaler impl
+func (n *IP4Net) UnmarshalJSON(j []byte) error {
+	j = bytes.Trim(j, "\"")
+	if _, val, err := net.ParseCIDR(string(j)); err != nil {
+		fmt.Println(err)
+		return err
+	} else {
+		*n = FromIPNet(val)
+		return nil
+	}
+}

+ 107 - 0
pkg/ipnet_test.go

@@ -0,0 +1,107 @@
+package pkg
+
+import (
+	"encoding/json"
+	"net"
+	"testing"
+)
+
+func mkIP4Net(s string, plen uint) IP4Net {
+	ip, err := ParseIP4(s)
+	if err != nil {
+		panic(err)
+	}
+	return IP4Net{ip, plen}
+}
+
+func mkIP4(s string) IP4 {
+	ip, err := ParseIP4(s)
+	if err != nil {
+		panic(err)
+	}
+	return ip
+}
+
+func TestIP4(t *testing.T) {
+	nip := net.ParseIP("1.2.3.4")
+	ip := FromIP(nip)
+	a, b, c, d := ip.Octets()
+	if a != 1 || b != 2 || c != 3 || d != 4 {
+		t.Error("FromIP failed")
+	}
+
+	ip, err := ParseIP4("1.2.3.4")
+	if err != nil {
+		t.Error("ParseIP4 failed with: ", err)
+	} else {
+		a, b, c, d := ip.Octets()
+		if a != 1 || b != 2 || c != 3 || d != 4 {
+			t.Error("ParseIP4 failed")
+		}
+	}
+
+	if ip.ToIP().String() != "1.2.3.4" {
+		t.Error("ToIP failed")
+	}
+
+	if ip.String() != "1.2.3.4" {
+		t.Error("String failed")
+	}
+
+	if ip.StringSep("*") != "1*2*3*4" {
+		t.Error("StringSep failed")
+	}
+
+	j, err := json.Marshal(ip)
+	if err != nil {
+		t.Error("Marshal of IP4 failed: ", err)
+	} else if string(j) != `"1.2.3.4"` {
+		t.Error("Marshal of IP4 failed with unexpected value: ", j)
+	}
+}
+
+func TestIP4Net(t *testing.T) {
+	n1 := mkIP4Net("1.2.3.0", 24)
+
+	if n1.ToIPNet().String() != "1.2.3.0/24" {
+		t.Error("ToIPNet failed")
+	}
+
+	if !n1.Overlaps(n1) {
+		t.Errorf("%s does not overlap %s", n1, n1)
+	}
+
+	n2 := mkIP4Net("1.2.0.0", 16)
+	if !n1.Overlaps(n2) {
+		t.Errorf("%s does not overlap %s", n1, n2)
+	}
+
+	n2 = mkIP4Net("1.2.4.0", 24)
+	if n1.Overlaps(n2) {
+		t.Errorf("%s overlaps %s", n1, n2)
+	}
+
+	n2 = mkIP4Net("7.2.4.0", 22)
+	if n1.Overlaps(n2) {
+		t.Errorf("%s overlaps %s", n1, n2)
+	}
+
+	if !n1.Contains(mkIP4("1.2.3.0")) {
+		t.Error("Contains failed")
+	}
+
+	if !n1.Contains(mkIP4("1.2.3.4")) {
+		t.Error("Contains failed")
+	}
+
+	if n1.Contains(mkIP4("1.2.4.0")) {
+		t.Error("Contains failed")
+	}
+
+	j, err := json.Marshal(n1)
+	if err != nil {
+		t.Error("Marshal of IP4Net failed: ", err)
+	} else if string(j) != `"1.2.3.0/24"` {
+		t.Error("Marshal of IP4Net failed with unexpected value: ", j)
+	}
+}

+ 17 - 0
pkg/rand.go

@@ -0,0 +1,17 @@
+package pkg
+
+import (
+	"math/rand"
+	"time"
+)
+
+var rnd *rand.Rand
+
+func init() {
+	seed := time.Now().UnixNano()
+	rnd = rand.New(rand.NewSource(seed))
+}
+
+func RandInt(lo, hi int) int {
+	return lo + int(rnd.Int31n(int32(hi-lo)))
+}

+ 51 - 0
pkg/tun.go

@@ -0,0 +1,51 @@
+package pkg
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"syscall"
+	"unsafe"
+
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/docker/libcontainer/netlink"
+)
+
+const (
+	tunDevice = "/dev/net/tun"
+)
+
+type ifreqFlags struct {
+	IfrnName  [netlink.IFNAMSIZ]byte
+	IfruFlags uint16
+}
+
+func ioctl(fd int, request, argp uintptr) error {
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), request, argp)
+	if errno != 0 {
+		return fmt.Errorf("ioctl failed with '%s'", errno)
+	}
+	return nil
+}
+
+func fromZeroTerm(s []byte) string {
+	return string(bytes.TrimRight(s, "\000"))
+}
+
+func OpenTun(name string) (*os.File, string, error) {
+	tun, err := os.OpenFile(tunDevice, os.O_RDWR, 0)
+	if err != nil {
+		return nil, "", err
+	}
+
+	var ifr ifreqFlags
+	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], []byte(name+"\000"))
+	ifr.IfruFlags = syscall.IFF_TUN | syscall.IFF_NO_PI
+
+	err = ioctl(int(tun.Fd()), syscall.TUNSETIFF, uintptr(unsafe.Pointer(&ifr)))
+	if err != nil {
+		return nil, "", err
+	}
+
+	ifname := fromZeroTerm(ifr.IfrnName[:netlink.IFNAMSIZ])
+	return tun, ifname, nil
+}

+ 52 - 0
subnet/config.go

@@ -0,0 +1,52 @@
+package subnet
+
+import (
+	"encoding/json"
+	"errors"
+
+	"github.com/coreos-inc/kolach/pkg"
+)
+
+type Config struct {
+	Network    pkg.IP4Net
+	FirstIP    pkg.IP4
+	LastIP     pkg.IP4
+	HostSubnet uint
+}
+
+func ParseConfig(s string) (*Config, error) {
+	cfg := new(Config)
+	err := json.Unmarshal([]byte(s), cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	if cfg.HostSubnet > 0 {
+		if cfg.HostSubnet < cfg.Network.PrefixLen {
+			return nil, errors.New("HostSubnet is larger network than Network")
+		}
+	} else {
+		// try to give each host a /24 but if the whole network
+		// is /24 or smaller, half the network
+		if cfg.Network.PrefixLen < 24 {
+			cfg.HostSubnet = 24
+		} else {
+			cfg.HostSubnet = cfg.Network.PrefixLen + 1
+		}
+	}
+
+	if cfg.FirstIP == pkg.IP4(0) {
+		cfg.FirstIP = cfg.Network.IP
+	} else if !cfg.Network.Contains(cfg.FirstIP) {
+		return nil, errors.New("FirstIP is not in the range of the Network")
+	}
+
+	if cfg.LastIP == pkg.IP4(0) {
+		cfg.LastIP = cfg.Network.Next().IP
+		cfg.LastIP -= (1 << (32 - cfg.HostSubnet))
+	} else if !cfg.Network.Contains(cfg.LastIP) {
+		return nil, errors.New("LastIP is not in the range of the Network")
+	}
+
+	return cfg, nil
+}

+ 57 - 0
subnet/config_test.go

@@ -0,0 +1,57 @@
+package subnet
+
+import (
+	"testing"
+)
+
+func TestConfigDefaults(t *testing.T) {
+	s := `{ "network": "10.3.0.0/16" }`
+
+	cfg, err := ParseConfig(s)
+	if err != nil {
+		t.Fatalf("ParseConfig failed: %s", err)
+	}
+
+	expectedNet := "10.3.0.0/16"
+	if cfg.Network.String() != expectedNet {
+		t.Errorf("Network mismatch: expected %s, got %s", expectedNet, cfg.Network)
+	}
+
+	if cfg.FirstIP.String() != "10.3.0.0" {
+		t.Errorf("FirstIP mismatch, expected 10.3.0.0, got %s", cfg.FirstIP)
+	}
+
+	if cfg.LastIP.String() != "10.3.255.0" {
+		t.Errorf("LastIP mismatch, expected 10.3.255.0, got %s", cfg.LastIP)
+	}
+
+	if cfg.HostSubnet != 24 {
+		t.Errorf("HostSubnet mismatch: expected 24, got %d", cfg.HostSubnet)
+	}
+}
+
+func TestConfigOverrides(t *testing.T) {
+	s := `{ "Network": "10.3.0.0/16", "FirstIP": "10.3.5.0", "LastIP": "10.3.8.0", "HostSubnet": 28 }`
+
+	cfg, err := ParseConfig(s)
+	if err != nil {
+		t.Fatalf("ParseConfig failed: %s", err)
+	}
+
+	expectedNet := "10.3.0.0/16"
+	if cfg.Network.String() != expectedNet {
+		t.Errorf("Network mismatch: expected %s, got %s", expectedNet, cfg.Network)
+	}
+
+	if cfg.FirstIP.String() != "10.3.5.0" {
+		t.Errorf("FirstIP mismatch: expected 10.3.5.0, got %s", cfg.FirstIP)
+	}
+
+	if cfg.LastIP.String() != "10.3.8.0" {
+		t.Errorf("LastIP mismatch: expected 10.3.8.0, got %s", cfg.LastIP)
+	}
+
+	if cfg.HostSubnet != 28 {
+		t.Errorf("HostSubnet mismatch: expected 28, got %d", cfg.HostSubnet)
+	}
+}

+ 410 - 0
subnet/subnet.go

@@ -0,0 +1,410 @@
+package subnet
+
+import (
+	"encoding/json"
+	"errors"
+	"net"
+	"regexp"
+	"strconv"
+	"time"
+
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
+	log "github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/golang/glog"
+
+	"github.com/coreos-inc/kolach/pkg"
+)
+
+const (
+	registerRetries = 10
+	subnetTTL       = 24 * 3600
+	renewMargin     = time.Hour
+)
+
+// etcd error codes
+const (
+	etcdEventIndexCleared = 401
+)
+
+const (
+	SubnetAdded = iota
+	SubnetRemoved
+)
+
+var (
+	subnetRegex *regexp.Regexp = regexp.MustCompile(`(\d+\.\d+.\d+.\d+)-(\d+)`)
+)
+
+type SubnetLease struct {
+	Network pkg.IP4Net
+	Data    string
+}
+
+type SubnetManager struct {
+	registry  subnetRegistry
+	config    *Config
+	myLease   SubnetLease
+	leaseExp  time.Time
+	lastIndex uint64
+	leases    []SubnetLease
+	stop      chan bool
+}
+
+type EventType int
+
+type Event struct {
+	Type  EventType
+	Lease SubnetLease
+}
+
+type EventBatch []Event
+
+func NewSubnetManager(etcdCli *etcd.Client, prefix string) (*SubnetManager, error) {
+	esr := etcdSubnetRegistry{etcdCli, prefix}
+	return newSubnetManager(&esr)
+}
+
+func (sm *SubnetManager) AcquireLease(ip pkg.IP4, data string) (pkg.IP4Net, error) {
+	for i := 0; i < registerRetries; i++ {
+		var err error
+		sm.leases, err = sm.getLeases()
+		if err != nil {
+			return pkg.IP4Net{}, err
+		}
+
+		// try to reuse a subnet if there's one that match our IP
+		for _, l := range sm.leases {
+			var ba BaseAttrs
+			err = json.Unmarshal([]byte(l.Data), &ba)
+			if err != nil {
+				log.Error("Error parsing subnet lease JSON: ", err)
+			} else {
+				if ip == ba.PublicIP {
+					resp, err := sm.registry.updateSubnet(l.Network.StringSep(".", "-"), data, subnetTTL)
+					if err != nil {
+						return pkg.IP4Net{}, nil
+					}
+
+					sm.myLease.Network = l.Network
+					sm.leaseExp = *(resp.Node.Expiration)
+					return l.Network, nil
+				}
+			}
+		}
+
+		// no existing match, grab a new one
+		sn, err := sm.allocateSubnet()
+		if err != nil {
+			return pkg.IP4Net{}, err
+		}
+
+		resp, err := sm.registry.createSubnet(sn.StringSep(".", "-"), data, subnetTTL)
+		switch {
+		case err == nil:
+			sm.myLease.Network = sn
+			sm.leaseExp = *(resp.Node.Expiration)
+			return sn, nil
+
+		// if etcd returned Key Already Exists, try again.
+		case err.(*etcd.EtcdError).ErrorCode == 105:
+			continue
+
+		default:
+			return pkg.IP4Net{}, err
+		}
+	}
+
+	return pkg.IP4Net{}, errors.New("Max retries reached trying to acquire a subnet")
+}
+
+func (sm *SubnetManager) UpdateSubnet(data string) error {
+	resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), data, subnetTTL)
+	sm.leaseExp = *(resp.Node.Expiration)
+	return err
+}
+
+func (sm *SubnetManager) Start(receiver chan EventBatch) {
+	go sm.watchLeases(receiver)
+	go sm.leaseRenewer()
+}
+
+func (sm *SubnetManager) Stop() {
+	// once for each goroutine
+	sm.stop <- true
+	sm.stop <- true
+}
+
+func (sm *SubnetManager) GetConfig() *Config {
+	return sm.config
+}
+
+/// Implementation
+
+func parseSubnetKey(s string) (pkg.IP4Net, error) {
+	if parts := subnetRegex.FindStringSubmatch(s); len(parts) == 3 {
+		ip := net.ParseIP(parts[1]).To4()
+		prefixLen, err := strconv.ParseUint(parts[2], 10, 5)
+		if ip != nil && err == nil {
+			return pkg.IP4Net{pkg.FromIP(ip), uint(prefixLen)}, nil
+		}
+	}
+
+	return pkg.IP4Net{}, errors.New("Error parsing IP Subnet")
+}
+
+type subnetRegistry interface {
+	getConfig() (*etcd.Response, error)
+	getSubnets() (*etcd.Response, error)
+	createSubnet(sn, data string, ttl uint64) (*etcd.Response, error)
+	updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error)
+	watchSubnets(since uint64, stop chan bool) (*etcd.Response, error)
+}
+
+type etcdSubnetRegistry struct {
+	cli    *etcd.Client
+	prefix string
+}
+
+func (esr *etcdSubnetRegistry) getConfig() (*etcd.Response, error) {
+	resp, err := esr.cli.Get(esr.prefix+"/config", false, false)
+	if err != nil {
+		return nil, err
+	}
+	return resp, nil
+}
+
+func (esr *etcdSubnetRegistry) getSubnets() (*etcd.Response, error) {
+	return esr.cli.Get(esr.prefix+"/subnets", false, true)
+}
+
+func (esr *etcdSubnetRegistry) createSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
+	return esr.cli.Create(esr.prefix+"/subnets/"+sn, data, ttl)
+}
+
+func (esr *etcdSubnetRegistry) updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
+	return esr.cli.Set(esr.prefix+"/subnets/"+sn, data, ttl)
+}
+
+func (esr *etcdSubnetRegistry) watchSubnets(since uint64, stop chan bool) (*etcd.Response, error) {
+	return esr.cli.Watch(esr.prefix+"/subnets", since, true, nil, stop)
+}
+
+func newSubnetManager(r subnetRegistry) (*SubnetManager, error) {
+	cfgResp, err := r.getConfig()
+	if err != nil {
+		return nil, err
+	}
+
+	cfg, err := ParseConfig(cfgResp.Node.Value)
+	if err != nil {
+		return nil, err
+	}
+
+	return &SubnetManager{
+		registry: r,
+		config:   cfg,
+		stop:     make(chan bool, 2),
+	}, nil
+}
+
+func (sm *SubnetManager) getLeases() ([]SubnetLease, error) {
+	resp, err := sm.registry.getSubnets()
+
+	var leases []SubnetLease
+	switch {
+	case err == nil:
+		for _, node := range resp.Node.Nodes {
+			sn, err := parseSubnetKey(node.Key)
+			if err == nil {
+				lease := SubnetLease{sn, node.Value}
+				leases = append(leases, lease)
+			}
+		}
+		sm.lastIndex = resp.EtcdIndex
+
+	case err.(*etcd.EtcdError).ErrorCode == 100:
+		// key not found: treat it as empty set
+		sm.lastIndex = err.(*etcd.EtcdError).Index
+
+	default:
+		return nil, err
+	}
+
+	return leases, nil
+}
+
+func deleteLease(l []SubnetLease, i int) []SubnetLease {
+	l[i], l = l[len(l)-1], l[:len(l)-1]
+	return l
+}
+
+func (sm *SubnetManager) applyLeases(newLeases []SubnetLease) EventBatch {
+	var batch EventBatch
+
+	for _, l := range newLeases {
+		// skip self
+		if l.Network.Equal(sm.myLease.Network) {
+			continue
+		}
+
+		found := false
+		for i, c := range sm.leases {
+			if c.Network.Equal(l.Network) {
+				sm.leases = deleteLease(sm.leases, i)
+				found = true
+				break
+			}
+		}
+
+		if !found {
+			// new subnet
+			batch = append(batch, Event{SubnetAdded, l})
+		}
+	}
+
+	// everything left in sm.leases has been deleted
+	for _, c := range sm.leases {
+		batch = append(batch, Event{SubnetRemoved, c})
+	}
+
+	sm.leases = newLeases
+
+	return batch
+}
+
+func (sm *SubnetManager) applySubnetChange(action string, ipn pkg.IP4Net, data string) Event {
+	switch action {
+	case "delete", "expire":
+		for i, l := range sm.leases {
+			if l.Network.Equal(ipn) {
+				deleteLease(sm.leases, i)
+				return Event{SubnetRemoved, l}
+			}
+		}
+
+		log.Errorf("Removed subnet (%s) was not found", ipn)
+		return Event{
+			SubnetRemoved,
+			SubnetLease{ipn, ""},
+		}
+
+	default:
+		for i, l := range sm.leases {
+			if l.Network.Equal(ipn) {
+				sm.leases[i] = SubnetLease{ipn, data}
+				return Event{SubnetAdded, sm.leases[i]}
+			}
+		}
+
+		sm.leases = append(sm.leases, SubnetLease{ipn, data})
+		return Event{SubnetAdded, sm.leases[len(sm.leases)-1]}
+	}
+}
+
+type BaseAttrs struct {
+	PublicIP pkg.IP4
+}
+
+func (sm *SubnetManager) allocateSubnet() (pkg.IP4Net, error) {
+	log.Infof("Picking subnet in range %s ... %s", sm.config.FirstIP, sm.config.LastIP)
+
+	var bag []pkg.IP4
+	sn := pkg.IP4Net{sm.config.FirstIP, sm.config.HostSubnet}
+
+OuterLoop:
+	for ; sn.IP <= sm.config.LastIP && len(bag) < 100; sn = sn.Next() {
+		for _, l := range sm.leases {
+			if sn.Overlaps(l.Network) {
+				continue OuterLoop
+			}
+		}
+		bag = append(bag, sn.IP)
+	}
+
+	if len(bag) == 0 {
+		return pkg.IP4Net{}, errors.New("out of subnets")
+	} else {
+		i := pkg.RandInt(0, len(bag))
+		return pkg.IP4Net{bag[i], sm.config.HostSubnet}, nil
+	}
+}
+
+func (sm *SubnetManager) watchLeases(receiver chan EventBatch) {
+	// "catch up" by replaying all the leases we discovered during
+	// AcquireLease
+	var batch EventBatch
+	for _, l := range sm.leases {
+		if !sm.myLease.Network.Equal(l.Network) {
+			batch = append(batch, Event{SubnetAdded, l})
+		}
+	}
+	if len(batch) > 0 {
+		receiver <- batch
+	}
+
+	for {
+		resp, err := sm.registry.watchSubnets(sm.lastIndex+1, sm.stop)
+
+		if err == nil {
+			if resp == nil {
+				// watchSubnets exited by stop chan being signaled
+				return
+			}
+			sm.lastIndex = resp.EtcdIndex
+
+			sn, err := parseSubnetKey(resp.Node.Key)
+			if err != nil {
+				log.Error("Error parsing subnet IP: ", resp.Node.Key)
+				time.Sleep(time.Second)
+				continue
+			}
+
+			// Don't process our own changes
+			if !sm.myLease.Network.Equal(sn) {
+				evt := sm.applySubnetChange(resp.Action, sn, resp.Node.Value)
+				receiver <- EventBatch{evt}
+			}
+
+		} else if etcdErr, ok := err.(*etcd.EtcdError); ok && etcdErr.ErrorCode == etcdEventIndexCleared {
+			// etcd maintains a history window for events and it's possible to fall behind.
+			// to recover, get the current state and then "diff" against our cache to generate
+			// events for the caller
+			log.Warning("Watch of subnet leases failed b/c index outside history window")
+			leases, err := sm.getLeases()
+			if err != nil {
+				log.Errorf("Failed to retrieve subnet leases: ", err)
+				time.Sleep(time.Second)
+				continue
+			}
+
+			batch = sm.applyLeases(leases)
+			receiver <- batch
+
+		} else {
+			log.Error("Watch of subnet leases failed: ", err)
+			continue
+		}
+	}
+}
+
+func (sm *SubnetManager) leaseRenewer() {
+	dur := sm.leaseExp.Sub(time.Now()) - renewMargin
+
+	for {
+		select {
+		case <-time.After(dur):
+			resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), sm.myLease.Data, subnetTTL)
+			if err != nil {
+				log.Error("Error renewing lease (trying again in 1 min): ", err)
+				dur = time.Minute
+				continue
+			}
+
+			sm.leaseExp = *(resp.Node.Expiration)
+			log.Info("Lease renewed, new expiration: ", sm.leaseExp)
+			dur = sm.leaseExp.Sub(time.Now()) - renewMargin
+
+		case <-sm.stop:
+			return
+		}
+	}
+}

BIN
subnet/subnet.test


+ 202 - 0
subnet/subnet_test.go

@@ -0,0 +1,202 @@
+package subnet
+
+import (
+	"fmt"
+	"net"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
+
+	"github.com/coreos-inc/kolach/pkg"
+)
+
+type mockSubnetRegistry struct {
+	subnets *etcd.Node
+	ch      chan string
+	index   uint64
+}
+
+func newMockSubnetRegistry(ch chan string) *mockSubnetRegistry {
+	subnodes := []*etcd.Node{
+		&etcd.Node{Key: "10.3.1.0-24", Value: `{ "PublicIP": "1.1.1.1" }`, ModifiedIndex: 10},
+		&etcd.Node{Key: "10.3.2.0-24", Value: `{ "PublicIP": "1.1.1.1" }`, ModifiedIndex: 11},
+		&etcd.Node{Key: "10.3.4.0-24", Value: `{ "PublicIP": "1.1.1.1" }`, ModifiedIndex: 12},
+		&etcd.Node{Key: "10.3.5.0-24", Value: `{ "PublicIP": "1.1.1.1" }`, ModifiedIndex: 13},
+	}
+
+	return &mockSubnetRegistry{
+		subnets: &etcd.Node{
+			Nodes: subnodes,
+		},
+		ch:    ch,
+		index: 14,
+	}
+}
+
+func (msr *mockSubnetRegistry) getConfig() (*etcd.Response, error) {
+	return &etcd.Response{
+		EtcdIndex: msr.index,
+		Node: &etcd.Node{
+			Value: `{ "Network": "10.3.0.0/16", "FirstIP": "10.3.1.0", "LastIP": "10.3.5.0" }`,
+		},
+	}, nil
+}
+
+func (msr *mockSubnetRegistry) getSubnets() (*etcd.Response, error) {
+	return &etcd.Response{
+		Node:      msr.subnets,
+		EtcdIndex: msr.index,
+	}, nil
+}
+
+func (msr *mockSubnetRegistry) createSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
+	msr.index += 1
+
+	// add squared durations :)
+	exp := time.Now().Add(time.Duration(ttl) * time.Second)
+
+	node := &etcd.Node{
+		Key:           sn,
+		Value:         data,
+		ModifiedIndex: msr.index,
+		Expiration:    &exp,
+	}
+
+	msr.subnets.Nodes = append(msr.subnets.Nodes, node)
+	return &etcd.Response{
+		Node:      node,
+		EtcdIndex: msr.index,
+	}, nil
+}
+
+func (msr *mockSubnetRegistry) updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
+	msr.index += 1
+
+	// add squared durations :)
+	exp := time.Now().Add(time.Duration(ttl) * time.Second)
+
+	node := &etcd.Node{
+		Key:           sn,
+		Value:         data,
+		ModifiedIndex: msr.index,
+		Expiration:    &exp,
+	}
+
+	return &etcd.Response{
+		Node:      node,
+		EtcdIndex: msr.index,
+	}, nil
+}
+
+func (msr *mockSubnetRegistry) watchSubnets(since uint64, stop chan bool) (*etcd.Response, error) {
+	for {
+		var sn string
+		select {
+		case <-stop:
+			return nil, nil
+		case sn = <-msr.ch:
+			n := etcd.Node{
+				Key:           sn,
+				ModifiedIndex: msr.index,
+			}
+			msr.subnets.Nodes = append(msr.subnets.Nodes, &n)
+			return &etcd.Response{Node: &n}, nil
+		}
+	}
+}
+
+func (msr *mockSubnetRegistry) hasSubnet(sn string) bool {
+	for _, n := range msr.subnets.Nodes {
+		if n.Key == sn {
+			return true
+		}
+	}
+	return false
+}
+
+func netIPNetToString(n *net.IPNet) string {
+	return strings.Replace(n.String(), "/", "-", 1)
+}
+
+func TestAcquireLease(t *testing.T) {
+	msr := newMockSubnetRegistry(nil)
+	sm, err := newSubnetManager(msr)
+	if err != nil {
+		t.Fatalf("Failed to create subnet manager: %s", err)
+	}
+
+	ip, _ := pkg.ParseIP4("1.2.3.4")
+	data := `{ "PublicIP": "1.2.3.4" }`
+
+	sn, err := sm.AcquireLease(ip, data)
+	if err != nil {
+		t.Fatal("AcquireLease failed: ", err)
+	}
+
+	if sn.String() != "10.3.3.0/24" {
+		t.Fatal("Subnet mismatch: expected 10.3.3.0/24, got: ", sn)
+	}
+
+	// Acquire again, should reuse
+	if sn, err = sm.AcquireLease(ip, data); err != nil {
+		t.Fatal("AcquireLease failed: ", err)
+	}
+
+	if sn.String() != "10.3.3.0/24" {
+		t.Fatal("Subnet mismatch: expected 10.3.3.0/24, got: ", sn)
+	}
+}
+
+func TestWatchLeases(t *testing.T) {
+	msr := newMockSubnetRegistry(make(chan string))
+	sm, err := newSubnetManager(msr)
+	if err != nil {
+		t.Fatalf("Failed to create subnet manager: %s", err)
+	}
+
+	ip, _ := pkg.ParseIP4("1.2.3.4")
+	data := `{ "PublicIP": "1.2.3.4" }`
+
+	_, err = sm.AcquireLease(ip, data)
+	if err != nil {
+		t.Fatalf("RegisterSubnet failed: %s", err)
+	}
+
+	events := make(chan EventBatch)
+	sm.Start(events)
+
+	<-events
+
+	var expected string
+	for i := 1; i <= 9; i++ {
+		expected = fmt.Sprintf("10.3.%d.0-24", i)
+		if !msr.hasSubnet(expected) {
+			msr.ch <- expected
+			break
+		}
+	}
+
+	evtBatch, ok := <-events
+	if !ok {
+		t.Fatalf("WatchSubnets did not publish")
+	}
+
+	if len(evtBatch) != 1 {
+		t.Fatalf("WatchSubnets produced wrong sized event batch")
+	}
+
+	evt := evtBatch[0]
+
+	if evt.Type != SubnetAdded {
+		t.Fatalf("WatchSubnets produced wrong event type")
+	}
+
+	actual := evt.Lease.Network.StringSep(".", "-")
+	if actual != expected {
+		t.Errorf("WatchSubnet produced wrong subnet: expected %s, got %s", expected, actual)
+	}
+
+	sm.Stop()
+}

+ 103 - 0
udp/router.go

@@ -0,0 +1,103 @@
+package udp
+
+import (
+	"net"
+	"sync"
+
+	log "github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/golang/glog"
+
+	"github.com/coreos-inc/kolach/pkg"
+)
+
+const (
+	minIP4HdrSize = 20
+)
+
+type routeEntry struct {
+	sn   pkg.IP4Net
+	addr *net.UDPAddr
+}
+
+type Router struct {
+	mux    sync.Mutex
+	port   int
+	routes []routeEntry
+}
+
+func NewRouter(port int) *Router {
+	return &Router{
+		port: port,
+	}
+}
+
+func (r *Router) SetRoute(sn pkg.IP4Net, dst pkg.IP4) {
+	r.mux.Lock()
+	defer r.mux.Unlock()
+
+	for _, re := range r.routes {
+		if re.sn.Equal(sn) {
+			re.addr = &net.UDPAddr{
+				IP: dst.ToIP(),
+				Port: r.port,
+			}
+			return
+		}
+	}
+
+	re := routeEntry{
+		sn: sn,
+		addr: &net.UDPAddr{
+			IP: dst.ToIP(),
+			Port: r.port,
+		},
+	}
+
+	r.routes = append(r.routes, re)
+}
+
+func (r *Router) DelRoute(sn pkg.IP4Net) {
+	r.mux.Lock()
+	defer r.mux.Unlock()
+
+	for i, re := range r.routes {
+		if re.sn.Equal(sn) {
+			r.routes[i] = r.routes[len(r.routes)-1]
+			r.routes = r.routes[:len(r.routes)-1]
+			return
+		}
+	}
+}
+
+func (r *Router) routePacket(pkt []byte, conn *net.UDPConn) {
+	if len(pkt) < minIP4HdrSize {
+		log.V(1).Infof("Packet too small (%d bytes), unable to route", len(pkt))
+		return
+	}
+
+	r.mux.Lock()
+	defer r.mux.Unlock()
+
+	dstIP := pkg.FromBytes(pkt[16:20])
+
+	for i, re := range r.routes {
+		if re.sn.Contains(dstIP) {
+			nbytes, err := conn.WriteToUDP(pkt, re.addr)
+			if err != nil || nbytes != len(pkt) {
+				if err != nil {
+					log.V(1).Info("UDP write failed with: ", err)
+				} else {
+					log.V(1).Infof("Was only able to send %d out of %d bytes to %s: ", nbytes, len(pkt), re.addr.IP)
+				}
+			}
+
+			// packets for same dest tend to come in burst. swap to front make it faster for subsequent ones
+			if i != 0 {
+				r.routes[0], r.routes[i] = r.routes[i], r.routes[0]
+			}
+			return
+		}
+	}
+
+	log.V(1).Info("No route found for ", dstIP)
+}
+

+ 185 - 0
udp/run.go

@@ -0,0 +1,185 @@
+package udp
+
+import (
+	"os"
+	"net"
+	"time"
+	"encoding/json"
+
+	"github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/docker/libcontainer/netlink"
+	log "github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/golang/glog"
+
+	"github.com/coreos-inc/kolach/pkg"
+	"github.com/coreos-inc/kolach/subnet"
+	"github.com/coreos-inc/kolach/backend"
+)
+
+const (
+	encapOverhead = 28 // 20 bytes IP hdr + 8 bytes UDP hdr
+	defaultMTU    = 1500 - encapOverhead
+)
+
+func configureIface(ifname string, ipn pkg.IP4Net, mtu int) error {
+	iface, err := net.InterfaceByName(ifname)
+	if err != nil {
+		log.Error("Failed to lookup interface ", ifname)
+		return err
+	}
+
+	n := ipn.ToIPNet()
+	err = netlink.NetworkLinkAddIp(iface, n.IP, n)
+	if err != nil {
+		log.Errorf("Failed to add IP address %s to %s: %s", n.IP, ifname, err)
+		return err
+	}
+
+	err = netlink.NetworkSetMTU(iface, mtu)
+	if err != nil {
+		log.Errorf("Failed to set MTU for %s: ", ifname, err)
+		return err
+	}
+
+	err = netlink.NetworkLinkUp(iface)
+	if err != nil {
+		log.Errorf("Failed set interface %s to UP state: %s", ifname, err)
+		return err
+	}
+
+	return nil
+}
+
+func proxyTunToUdp(r *Router, tun *os.File, conn *net.UDPConn) {
+	pkt := make([]byte, 1600)
+	for {
+		nbytes, err := tun.Read(pkt)
+		if err != nil {
+			log.V(1).Info("Error reading from TUN device: ", err)
+		} else {
+			r.routePacket(pkt[:nbytes], conn)
+		}
+	}
+}
+
+func proxyUdpToTun(conn *net.UDPConn, tun *os.File) {
+	pkt := make([]byte, 1600)
+	for {
+		nrecv, err := conn.Read(pkt)
+		if err != nil {
+			log.V(1).Info("Error reading from socket: ", err)
+		} else {
+			nsent, err := tun.Write(pkt[:nrecv])
+			switch {
+			case err != nil:
+				log.V(1).Info("Error writing to TUN device: ", err)
+			case nsent != nrecv:
+				log.V(1).Infof("Was only able to write %d out of %d bytes to TUN device: ", nsent, nrecv)
+			}
+		}
+	}
+}
+
+func acquireLease(sm *subnet.SubnetManager, pubIP net.IP) (pkg.IP4Net, error) {
+	attrs := subnet.BaseAttrs{
+		PublicIP: pkg.FromIP(pubIP),
+	}
+	data, err := json.Marshal(&attrs)
+	if err != nil {
+		return pkg.IP4Net{}, err
+	}
+
+	var sn pkg.IP4Net
+	for {
+		sn, err = sm.AcquireLease(attrs.PublicIP, string(data))
+		if err == nil {
+			log.Info("Subnet lease acquired: ", sn)
+			break
+		}
+		log.Error("Failed to acquire subnet: ", err)
+		time.Sleep(time.Second)
+	}
+
+	return sn, nil
+}
+
+func monitorEvents(sm *subnet.SubnetManager, rtr *Router) {
+	evts := make(chan subnet.EventBatch)
+	sm.Start(evts)
+
+	for evtBatch := range evts {
+		for _, evt := range evtBatch {
+			if evt.Type == subnet.SubnetAdded {
+				log.Info("Subnet added: ", evt.Lease.Network)
+				var attrs subnet.BaseAttrs
+				if err := json.Unmarshal([]byte(evt.Lease.Data), &attrs); err != nil {
+					log.Error("Error decoding subnet lease JSON: ", err)
+					continue
+				}
+				rtr.SetRoute(evt.Lease.Network, attrs.PublicIP)
+
+			} else if evt.Type == subnet.SubnetRemoved {
+				log.Info("Subnet removed: %v", evt.Lease.Network)
+				rtr.DelRoute(evt.Lease.Network)
+
+			} else {
+				log.Errorf("Internal error: unknown event type: %d", int(evt.Type))
+			}
+		}
+	}
+}
+
+func Run(sm *subnet.SubnetManager, iface *net.Interface, ip net.IP, port int, ready backend.ReadyFunc) {
+	sn, err := acquireLease(sm, ip)
+	if err != nil {
+		log.Error("Failed to acquire lease: ", err)
+		return
+	}
+
+	tun, tunName, err := pkg.OpenTun("kolach%d")
+	if err != nil {
+		log.Error("Failed to open TUN device: ", err)
+		return
+	}
+
+	localAddr := net.UDPAddr{
+		Port: port,
+	}
+
+	conn, err := net.ListenUDP("udp4", &localAddr)
+	if err != nil {
+		log.Error("Failed to start listening on UDP socket: ", err)
+		return
+	}
+
+	// Interface's subnet is that of the whole overlay network (e.g. /16)
+	// and not that of the individual host (e.g. /24)
+	ipn := pkg.IP4Net{
+		IP:        sn.IP,
+		PrefixLen: sm.GetConfig().Network.PrefixLen,
+	}
+
+	// TUN MTU will be smaller b/c of encap (IP+UDP hdrs)
+	var mtu int
+	if iface.MTU > 0 {
+		mtu = iface.MTU - encapOverhead
+	} else {
+		mtu = defaultMTU
+	}
+
+	err = configureIface(tunName, ipn, mtu)
+	if err != nil {
+		return
+	}
+
+	rtr := NewRouter(port)
+
+	// all initialized and ready for business
+	log.Info("UDP encapsulation initialized")
+	ready(sn, mtu)
+
+	log.Info("Dispatching to run the proxy loop")
+	go proxyTunToUdp(rtr, tun, conn)
+	go proxyUdpToTun(conn, tun)
+
+	log.Info("Watching for new subnet leases")
+	monitorEvents(sm, rtr)
+}

+ 3 - 0
version.go

@@ -0,0 +1,3 @@
+package main
+
+const Version = "0.1"