Browse Source

Revendor netlink library

This is primarily to pick up kernel filtering in NeighList function.
The fix was previously vendored off a fork:
https://github.com/coreos/flannel/commit/c5ba18b863788d701d9f8fd9faefa423f1e63a96
but now the fix is upstreamed.
Eugene Yakubovich 9 years ago
parent
commit
0e8bc717d7
30 changed files with 3852 additions and 119 deletions
  1. 1 1
      Godeps/Godeps.json
  2. 7 1
      Godeps/_workspace/src/github.com/vishvananda/netlink/README.md
  3. 2 2
      Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go
  4. 1 1
      Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go
  5. 110 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/class.go
  6. 168 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/class_linux.go
  7. 406 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/class_test.go
  8. 140 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/filter.go
  9. 322 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/filter_linux.go
  10. 248 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/filter_test.go
  11. 60 6
      Godeps/_workspace/src/github.com/vishvananda/netlink/link.go
  12. 110 29
      Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go
  13. 184 4
      Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go
  14. 14 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/link_tuntap_linux.go
  15. 4 3
      Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go
  16. 9 1
      Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go
  17. 20 13
      Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go
  18. 9 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go
  19. 627 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux.go
  20. 173 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux_test.go
  21. 3 4
      Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go
  22. 1 1
      Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go
  23. 290 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc.go
  24. 415 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_linux.go
  25. 345 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_test.go
  26. 44 2
      Godeps/_workspace/src/github.com/vishvananda/netlink/route.go
  27. 75 49
      Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go
  28. 62 0
      Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go
  29. 1 1
      Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go
  30. 1 1
      Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go

+ 1 - 1
Godeps/Godeps.json

@@ -125,7 +125,7 @@
 		},
 		{
 			"ImportPath": "github.com/vishvananda/netlink",
-			"Rev": "991a7a2fa7c073968fb27f36669df199b1fdf412"
+			"Rev": "a57a12c1b1d8aa37cf2f9da4fa7657b1f1fed88c"
 		},
 		{
 			"ImportPath": "golang.org/x/net/context",

+ 7 - 1
Godeps/_workspace/src/github.com/vishvananda/netlink/README.md

@@ -43,13 +43,19 @@ import (
 )
 
 func main() {
-    mybridge := &netlink.Bridge{netlink.LinkAttrs{Name: "foo"}}
+    la := netlink.NewLinkAttrs()
+    la.Name = "foo"
+    mybridge := &netlink.Bridge{la}}
     _ := netlink.LinkAdd(mybridge)
     eth1, _ := netlink.LinkByName("eth1")
     netlink.LinkSetMaster(eth1, mybridge)
 }
 
 ```
+Note `NewLinkAttrs` constructor, it sets default values in structure. For now
+it sets only `TxQLen` to `-1`, so kernel will set default by itself. If you're
+using simple initialization(`LinkAttrs{Name: "foo"}`) `TxQLen` will be set to
+`0` unless you specify it like `LinkAttrs{Name: "foo", TxQLen: 1000}`.
 
 Add a new ip address to loopback:
 

+ 2 - 2
Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go

@@ -14,8 +14,8 @@ type Addr struct {
 }
 
 // String returns $ip/$netmask $label
-func (addr Addr) String() string {
-	return fmt.Sprintf("%s %s", addr.IPNet, addr.Label)
+func (a Addr) String() string {
+	return strings.TrimSpace(fmt.Sprintf("%s %s", a.IPNet, a.Label))
 }
 
 // ParseAddr parses the string representation of an address in the

+ 1 - 1
Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go

@@ -81,7 +81,7 @@ func AddrList(link Link, family int) ([]Addr, error) {
 		index = base.Index
 	}
 
-	res := make([]Addr, 0)
+	var res []Addr
 	for _, m := range msgs {
 		msg := nl.DeserializeIfAddrmsg(m)
 

+ 110 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/class.go

@@ -0,0 +1,110 @@
+package netlink
+
+import (
+	"fmt"
+)
+
+type Class interface {
+	Attrs() *ClassAttrs
+	Type() string
+}
+
+// Class represents a netlink class. A filter is associated with a link,
+// has a handle and a parent. The root filter of a device should have a
+// parent == HANDLE_ROOT.
+type ClassAttrs struct {
+	LinkIndex int
+	Handle    uint32
+	Parent    uint32
+	Leaf      uint32
+}
+
+func (q ClassAttrs) String() string {
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
+}
+
+type HtbClassAttrs struct {
+	// TODO handle all attributes
+	Rate    uint64
+	Ceil    uint64
+	Buffer  uint32
+	Cbuffer uint32
+	Quantum uint32
+	Level   uint32
+	Prio    uint32
+}
+
+func (q HtbClassAttrs) String() string {
+	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
+}
+
+// Htb class
+type HtbClass struct {
+	ClassAttrs
+	Rate    uint64
+	Ceil    uint64
+	Buffer  uint32
+	Cbuffer uint32
+	Quantum uint32
+	Level   uint32
+	Prio    uint32
+}
+
+func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
+	mtu := 1600
+	rate := cattrs.Rate / 8
+	ceil := cattrs.Ceil / 8
+	buffer := cattrs.Buffer
+	cbuffer := cattrs.Cbuffer
+	if ceil == 0 {
+		ceil = rate
+	}
+
+	if buffer == 0 {
+		buffer = uint32(float64(rate)/Hz() + float64(mtu))
+	}
+	buffer = uint32(Xmittime(rate, buffer))
+
+	if cbuffer == 0 {
+		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
+	}
+	cbuffer = uint32(Xmittime(ceil, cbuffer))
+
+	return &HtbClass{
+		ClassAttrs: attrs,
+		Rate:       rate,
+		Ceil:       ceil,
+		Buffer:     buffer,
+		Cbuffer:    cbuffer,
+		Quantum:    10,
+		Level:      0,
+		Prio:       0,
+	}
+}
+
+func (q HtbClass) String() string {
+	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
+}
+
+func (class *HtbClass) Attrs() *ClassAttrs {
+	return &class.ClassAttrs
+}
+
+func (class *HtbClass) Type() string {
+	return "htb"
+}
+
+// GenericClass classes represent types that are not currently understood
+// by this netlink library.
+type GenericClass struct {
+	ClassAttrs
+	ClassType string
+}
+
+func (class *GenericClass) Attrs() *ClassAttrs {
+	return &class.ClassAttrs
+}
+
+func (class *GenericClass) Type() string {
+	return class.ClassType
+}

+ 168 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/class_linux.go

@@ -0,0 +1,168 @@
+package netlink
+
+import (
+	"syscall"
+
+	"github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+// ClassDel will delete a class from the system.
+// Equivalent to: `tc class del $class`
+func ClassDel(class Class) error {
+	return classModify(syscall.RTM_DELTCLASS, 0, class)
+}
+
+// ClassChange will change a class in place
+// Equivalent to: `tc class change $class`
+// The parent and handle MUST NOT be changed.
+
+func ClassChange(class Class) error {
+	return classModify(syscall.RTM_NEWTCLASS, 0, class)
+}
+
+// ClassReplace will replace a class to the system.
+// quivalent to: `tc class replace $class`
+// The handle MAY be changed.
+// If a class already exist with this parent/handle pair, the class is changed.
+// If a class does not already exist with this parent/handle, a new class is created.
+func ClassReplace(class Class) error {
+	return classModify(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE, class)
+}
+
+// ClassAdd will add a class to the system.
+// Equivalent to: `tc class add $class`
+func ClassAdd(class Class) error {
+	return classModify(
+		syscall.RTM_NEWTCLASS,
+		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
+		class,
+	)
+}
+
+func classModify(cmd, flags int, class Class) error {
+	req := nl.NewNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
+	base := class.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+	}
+	req.AddData(msg)
+
+	if cmd != syscall.RTM_DELTCLASS {
+		if err := classPayload(req, class); err != nil {
+			return err
+		}
+	}
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+func classPayload(req *nl.NetlinkRequest, class Class) error {
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
+
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
+	if htb, ok := class.(*HtbClass); ok {
+		opt := nl.TcHtbCopt{}
+		opt.Rate.Rate = uint32(htb.Rate)
+		opt.Ceil.Rate = uint32(htb.Ceil)
+		opt.Buffer = htb.Buffer
+		opt.Cbuffer = htb.Cbuffer
+		opt.Quantum = htb.Quantum
+		opt.Level = htb.Level
+		opt.Prio = htb.Prio
+		// TODO: Handle Debug properly. For now default to 0
+		nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
+	}
+	req.AddData(options)
+	return nil
+}
+
+// ClassList gets a list of classes in the system.
+// Equivalent to: `tc class show`.
+// Generally returns nothing if link and parent are not specified.
+func ClassList(link Link, parent uint32) ([]Class, error) {
+	req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
+	msg := &nl.TcMsg{
+		Family: nl.FAMILY_ALL,
+		Parent: parent,
+	}
+	if link != nil {
+		base := link.Attrs()
+		ensureIndex(base)
+		msg.Ifindex = int32(base.Index)
+	}
+	req.AddData(msg)
+
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
+	if err != nil {
+		return nil, err
+	}
+
+	var res []Class
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		base := ClassAttrs{
+			LinkIndex: int(msg.Ifindex),
+			Handle:    msg.Handle,
+			Parent:    msg.Parent,
+		}
+
+		var class Class
+		classType := ""
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_KIND:
+				classType = string(attr.Value[:len(attr.Value)-1])
+				switch classType {
+				case "htb":
+					class = &HtbClass{}
+				default:
+					class = &GenericClass{ClassType: classType}
+				}
+			case nl.TCA_OPTIONS:
+				switch classType {
+				case "htb":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					_, err = parseHtbClassData(class, data)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+		}
+		*class.Attrs() = base
+		res = append(res, class)
+	}
+
+	return res, nil
+}
+
+func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
+	htb := class.(*HtbClass)
+	detailed := false
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_HTB_PARMS:
+			opt := nl.DeserializeTcHtbCopt(datum.Value)
+			htb.Rate = uint64(opt.Rate.Rate)
+			htb.Ceil = uint64(opt.Ceil.Rate)
+			htb.Buffer = opt.Buffer
+			htb.Cbuffer = opt.Cbuffer
+			htb.Quantum = opt.Quantum
+			htb.Level = opt.Level
+			htb.Prio = opt.Prio
+		}
+	}
+	return detailed, nil
+}

+ 406 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/class_test.go

@@ -0,0 +1,406 @@
+package netlink
+
+import (
+	"testing"
+)
+
+func TestClassAddDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "bar"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+	attrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(0xffff, 0),
+		Parent:    HANDLE_ROOT,
+	}
+	qdisc := NewHtb(attrs)
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	_, ok := qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	classattrs := ClassAttrs{
+		LinkIndex: link.Attrs().Index,
+		Parent:    MakeHandle(0xffff, 0),
+		Handle:    MakeHandle(0xffff, 2),
+	}
+
+	htbclassattrs := HtbClassAttrs{
+		Rate:    1234000,
+		Cbuffer: 1690,
+	}
+	class := NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassAdd(class); err != nil {
+		t.Fatal(err)
+	}
+	classes, err := ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 1 {
+		t.Fatal("Failed to add class")
+	}
+
+	htb, ok := classes[0].(*HtbClass)
+	if !ok {
+		t.Fatal("Class is the wrong type")
+	}
+	if htb.Rate != class.Rate {
+		t.Fatal("Rate doesn't match")
+	}
+	if htb.Ceil != class.Ceil {
+		t.Fatal("Ceil doesn't match")
+	}
+	if htb.Buffer != class.Buffer {
+		t.Fatal("Buffer doesn't match")
+	}
+	if htb.Cbuffer != class.Cbuffer {
+		t.Fatal("Cbuffer doesn't match")
+	}
+
+	qattrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(0x2, 0),
+		Parent:    MakeHandle(0xffff, 2),
+	}
+	nattrs := NetemQdiscAttrs{
+		Latency:     20000,
+		Loss:        23.4,
+		Duplicate:   14.3,
+		LossCorr:    8.34,
+		Jitter:      1000,
+		DelayCorr:   12.3,
+		ReorderProb: 23.4,
+		CorruptProb: 10.0,
+		CorruptCorr: 10,
+	}
+	qdiscnetem := NewNetem(qattrs, nattrs)
+	if err := QdiscAdd(qdiscnetem); err != nil {
+		t.Fatal(err)
+	}
+
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 2 {
+		t.Fatal("Failed to add qdisc")
+	}
+	_, ok = qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	netem, ok := qdiscs[1].(*Netem)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	// Compare the record we got from the list with the one we created
+	if netem.Loss != qdiscnetem.Loss {
+		t.Fatal("Loss does not match")
+	}
+	if netem.Latency != qdiscnetem.Latency {
+		t.Fatal("Latency does not match")
+	}
+	if netem.CorruptProb != qdiscnetem.CorruptProb {
+		t.Fatal("CorruptProb does not match")
+	}
+	if netem.Jitter != qdiscnetem.Jitter {
+		t.Fatal("Jitter does not match")
+	}
+	if netem.LossCorr != qdiscnetem.LossCorr {
+		t.Fatal("Loss does not match")
+	}
+	if netem.DuplicateCorr != qdiscnetem.DuplicateCorr {
+		t.Fatal("DuplicateCorr does not match")
+	}
+
+	// Deletion
+	if err := ClassDel(class); err != nil {
+		t.Fatal(err)
+	}
+	classes, err = ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 0 {
+		t.Fatal("Failed to remove class")
+	}
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}
+
+func TestHtbClassAddHtbClassChangeDel(t *testing.T) {
+	/**
+	This test first set up a interface ans set up a Htb qdisc
+	A HTB class is attach to it and a Netem qdisc is attached to that class
+	Next, we test changing the HTB class in place and confirming the Netem is
+	still attached. We also check that invoting ClassChange with a non-existing
+	class will fail.
+	Finally, we test ClassReplace. We confirm it correctly behave like
+	ClassChange when the parent/handle pair exists and that it will create a
+	new class if the handle is modified.
+	*/
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+	attrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(0xffff, 0),
+		Parent:    HANDLE_ROOT,
+	}
+	qdisc := NewHtb(attrs)
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	_, ok := qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	classattrs := ClassAttrs{
+		LinkIndex: link.Attrs().Index,
+		Parent:    MakeHandle(0xffff, 0),
+		Handle:    MakeHandle(0xffff, 2),
+	}
+
+	htbclassattrs := HtbClassAttrs{
+		Rate:    1234000,
+		Cbuffer: 1690,
+	}
+	class := NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassAdd(class); err != nil {
+		t.Fatal(err)
+	}
+	classes, err := ClassList(link, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 1 {
+		t.Fatal("Failed to add class")
+	}
+
+	htb, ok := classes[0].(*HtbClass)
+	if !ok {
+		t.Fatal("Class is the wrong type")
+	}
+
+	qattrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(0x2, 0),
+		Parent:    MakeHandle(0xffff, 2),
+	}
+	nattrs := NetemQdiscAttrs{
+		Latency:     20000,
+		Loss:        23.4,
+		Duplicate:   14.3,
+		LossCorr:    8.34,
+		Jitter:      1000,
+		DelayCorr:   12.3,
+		ReorderProb: 23.4,
+		CorruptProb: 10.0,
+		CorruptCorr: 10,
+	}
+	qdiscnetem := NewNetem(qattrs, nattrs)
+	if err := QdiscAdd(qdiscnetem); err != nil {
+		t.Fatal(err)
+	}
+
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 2 {
+		t.Fatal("Failed to add qdisc")
+	}
+
+	_, ok = qdiscs[1].(*Netem)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	// Change
+	// For change to work, the handle and parent cannot be changed.
+
+	// First, test it fails if we change the Handle.
+	old_handle := classattrs.Handle
+	classattrs.Handle = MakeHandle(0xffff, 3)
+	class = NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassChange(class); err == nil {
+		t.Fatal("ClassChange should not work when using a different handle.")
+	}
+	// It should work with the same handle
+	classattrs.Handle = old_handle
+	htbclassattrs.Rate = 4321000
+	class = NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassChange(class); err != nil {
+		t.Fatal(err)
+	}
+
+	classes, err = ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 1 {
+		t.Fatalf(
+			"1 class expected, %d found",
+			len(classes),
+		)
+	}
+
+	htb, ok = classes[0].(*HtbClass)
+	if !ok {
+		t.Fatal("Class is the wrong type")
+	}
+	// Verify that the rate value has changed.
+	if htb.Rate != class.Rate {
+		t.Fatal("Rate did not get changed while changing the class.")
+	}
+
+	// Check that we still have the netem child qdisc
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(qdiscs) != 2 {
+		t.Fatalf("2 qdisc expected, %d found", len(qdiscs))
+	}
+	_, ok = qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	_, ok = qdiscs[1].(*Netem)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	// Replace
+	// First replace by keeping the same handle, class will be changed.
+	// Then, replace by providing a new handle, n new class will be created.
+
+	// Replace acting as Change
+	class = NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassReplace(class); err != nil {
+		t.Fatal("Failed to replace class that is existing.")
+	}
+
+	classes, err = ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 1 {
+		t.Fatalf(
+			"1 class expected, %d found",
+			len(classes),
+		)
+	}
+
+	htb, ok = classes[0].(*HtbClass)
+	if !ok {
+		t.Fatal("Class is the wrong type")
+	}
+	// Verify that the rate value has changed.
+	if htb.Rate != class.Rate {
+		t.Fatal("Rate did not get changed while changing the class.")
+	}
+
+	// It should work with the same handle
+	classattrs.Handle = MakeHandle(0xffff, 3)
+	class = NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassReplace(class); err != nil {
+		t.Fatal(err)
+	}
+
+	classes, err = ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 2 {
+		t.Fatalf(
+			"2 classes expected, %d found",
+			len(classes),
+		)
+	}
+
+	htb, ok = classes[1].(*HtbClass)
+	if !ok {
+		t.Fatal("Class is the wrong type")
+	}
+	// Verify that the rate value has changed.
+	if htb.Rate != class.Rate {
+		t.Fatal("Rate did not get changed while changing the class.")
+	}
+
+	// Deletion
+	for _, class := range classes {
+		if err := ClassDel(class); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	classes, err = ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 0 {
+		t.Fatal("Failed to remove class")
+	}
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}

+ 140 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/filter.go

@@ -0,0 +1,140 @@
+package netlink
+
+import (
+	"errors"
+	"fmt"
+	"github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+type Filter interface {
+	Attrs() *FilterAttrs
+	Type() string
+}
+
+// Filter represents a netlink filter. A filter is associated with a link,
+// has a handle and a parent. The root filter of a device should have a
+// parent == HANDLE_ROOT.
+type FilterAttrs struct {
+	LinkIndex int
+	Handle    uint32
+	Parent    uint32
+	Priority  uint16 // lower is higher priority
+	Protocol  uint16 // syscall.ETH_P_*
+}
+
+func (q FilterAttrs) String() string {
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
+}
+
+// U32 filters on many packet related properties
+type U32 struct {
+	FilterAttrs
+	// Currently only supports redirecting to another interface
+	RedirIndex int
+}
+
+func (filter *U32) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *U32) Type() string {
+	return "u32"
+}
+
+type FilterFwAttrs struct {
+	ClassId   uint32
+	InDev     string
+	Mask      uint32
+	Index     uint32
+	Buffer    uint32
+	Mtu       uint32
+	Mpu       uint16
+	Rate      uint32
+	AvRate    uint32
+	PeakRate  uint32
+	Action    int
+	Overhead  uint16
+	LinkLayer int
+}
+
+// FwFilter filters on firewall marks
+type Fw struct {
+	FilterAttrs
+	ClassId uint32
+	Police  nl.TcPolice
+	InDev   string
+	// TODO Action
+	Mask   uint32
+	AvRate uint32
+	Rtab   [256]uint32
+	Ptab   [256]uint32
+}
+
+func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
+	var rtab [256]uint32
+	var ptab [256]uint32
+	rcell_log := -1
+	pcell_log := -1
+	avrate := fattrs.AvRate / 8
+	police := nl.TcPolice{}
+	police.Rate.Rate = fattrs.Rate / 8
+	police.PeakRate.Rate = fattrs.PeakRate / 8
+	buffer := fattrs.Buffer
+	linklayer := nl.LINKLAYER_ETHERNET
+
+	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
+		linklayer = fattrs.LinkLayer
+	}
+
+	police.Action = int32(fattrs.Action)
+	if police.Rate.Rate != 0 {
+		police.Rate.Mpu = fattrs.Mpu
+		police.Rate.Overhead = fattrs.Overhead
+		if CalcRtable(&police.Rate, rtab, rcell_log, fattrs.Mtu, linklayer) < 0 {
+			return nil, errors.New("TBF: failed to calculate rate table.")
+		}
+		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
+	}
+	police.Mtu = fattrs.Mtu
+	if police.PeakRate.Rate != 0 {
+		police.PeakRate.Mpu = fattrs.Mpu
+		police.PeakRate.Overhead = fattrs.Overhead
+		if CalcRtable(&police.PeakRate, ptab, pcell_log, fattrs.Mtu, linklayer) < 0 {
+			return nil, errors.New("POLICE: failed to calculate peak rate table.")
+		}
+	}
+
+	return &Fw{
+		FilterAttrs: attrs,
+		ClassId:     fattrs.ClassId,
+		InDev:       fattrs.InDev,
+		Mask:        fattrs.Mask,
+		Police:      police,
+		AvRate:      avrate,
+		Rtab:        rtab,
+		Ptab:        ptab,
+	}, nil
+}
+
+func (filter *Fw) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *Fw) Type() string {
+	return "fw"
+}
+
+// GenericFilter filters represent types that are not currently understood
+// by this netlink library.
+type GenericFilter struct {
+	FilterAttrs
+	FilterType string
+}
+
+func (filter *GenericFilter) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *GenericFilter) Type() string {
+	return filter.FilterType
+}

+ 322 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/filter_linux.go

@@ -0,0 +1,322 @@
+package netlink
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"syscall"
+
+	"github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+// FilterDel will delete a filter from the system.
+// Equivalent to: `tc filter del $filter`
+func FilterDel(filter Filter) error {
+	req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
+	base := filter.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
+	}
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// FilterAdd will add a filter to the system.
+// Equivalent to: `tc filter add $filter`
+func FilterAdd(filter Filter) error {
+	native = nl.NativeEndian()
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+	base := filter.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
+	}
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
+
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
+	if u32, ok := filter.(*U32); ok {
+		// match all
+		sel := nl.TcU32Sel{
+			Nkeys: 1,
+			Flags: nl.TC_U32_TERMINAL,
+		}
+		sel.Keys = append(sel.Keys, nl.TcU32Key{})
+		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
+		actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
+		table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil)
+		nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred"))
+		// redirect to other interface
+		mir := nl.TcMirred{
+			Action:  nl.TC_ACT_STOLEN,
+			Eaction: nl.TCA_EGRESS_REDIR,
+			Ifindex: uint32(u32.RedirIndex),
+		}
+		aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil)
+		nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize())
+	} else if fw, ok := filter.(*Fw); ok {
+		if fw.Mask != 0 {
+			b := make([]byte, 4)
+			native.PutUint32(b, fw.Mask)
+			nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
+		}
+		if fw.InDev != "" {
+			nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
+		}
+		if (fw.Police != nl.TcPolice{}) {
+
+			police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
+			nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
+			if (fw.Police.Rate != nl.TcRateSpec{}) {
+				payload := SerializeRtab(fw.Rtab)
+				nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
+			}
+			if (fw.Police.PeakRate != nl.TcRateSpec{}) {
+				payload := SerializeRtab(fw.Ptab)
+				nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
+			}
+		}
+		if fw.ClassId != 0 {
+			b := make([]byte, 4)
+			native.PutUint32(b, fw.ClassId)
+			nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
+		}
+	}
+
+	req.AddData(options)
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// FilterList gets a list of filters in the system.
+// Equivalent to: `tc filter show`.
+// Generally retunrs nothing if link and parent are not specified.
+func FilterList(link Link, parent uint32) ([]Filter, error) {
+	req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
+	msg := &nl.TcMsg{
+		Family: nl.FAMILY_ALL,
+		Parent: parent,
+	}
+	if link != nil {
+		base := link.Attrs()
+		ensureIndex(base)
+		msg.Ifindex = int32(base.Index)
+	}
+	req.AddData(msg)
+
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
+	if err != nil {
+		return nil, err
+	}
+
+	var res []Filter
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		base := FilterAttrs{
+			LinkIndex: int(msg.Ifindex),
+			Handle:    msg.Handle,
+			Parent:    msg.Parent,
+		}
+		base.Priority, base.Protocol = MajorMinor(msg.Info)
+		base.Protocol = nl.Swap16(base.Protocol)
+
+		var filter Filter
+		filterType := ""
+		detailed := false
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_KIND:
+				filterType = string(attr.Value[:len(attr.Value)-1])
+				switch filterType {
+				case "u32":
+					filter = &U32{}
+				case "fw":
+					filter = &Fw{}
+				default:
+					filter = &GenericFilter{FilterType: filterType}
+				}
+			case nl.TCA_OPTIONS:
+				switch filterType {
+				case "u32":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					detailed, err = parseU32Data(filter, data)
+					if err != nil {
+						return nil, err
+					}
+				case "fw":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					detailed, err = parseFwData(filter, data)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+		}
+		// only return the detailed version of the filter
+		if detailed {
+			*filter.Attrs() = base
+			res = append(res, filter)
+		}
+	}
+
+	return res, nil
+}
+
+func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+	native = nl.NativeEndian()
+	u32 := filter.(*U32)
+	detailed := false
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_U32_SEL:
+			detailed = true
+			sel := nl.DeserializeTcU32Sel(datum.Value)
+			// only parse if we have a very basic redirect
+			if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
+				return detailed, nil
+			}
+		case nl.TCA_U32_ACT:
+			table, err := nl.ParseRouteAttr(datum.Value)
+			if err != nil {
+				return detailed, err
+			}
+			if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB {
+				return detailed, fmt.Errorf("Action table not formed properly")
+			}
+			aattrs, err := nl.ParseRouteAttr(table[0].Value)
+			for _, aattr := range aattrs {
+				switch aattr.Attr.Type {
+				case nl.TCA_KIND:
+					actionType := string(aattr.Value[:len(aattr.Value)-1])
+					// only parse if the action is mirred
+					if actionType != "mirred" {
+						return detailed, nil
+					}
+				case nl.TCA_OPTIONS:
+					adata, err := nl.ParseRouteAttr(aattr.Value)
+					if err != nil {
+						return detailed, err
+					}
+					for _, adatum := range adata {
+						switch adatum.Attr.Type {
+						case nl.TCA_MIRRED_PARMS:
+							mir := nl.DeserializeTcMirred(adatum.Value)
+							u32.RedirIndex = int(mir.Ifindex)
+						}
+					}
+				}
+			}
+		}
+	}
+	return detailed, nil
+}
+
+func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+	native = nl.NativeEndian()
+	fw := filter.(*Fw)
+	detailed := true
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_FW_MASK:
+			fw.Mask = native.Uint32(datum.Value[0:4])
+		case nl.TCA_FW_CLASSID:
+			fw.ClassId = native.Uint32(datum.Value[0:4])
+		case nl.TCA_FW_INDEV:
+			fw.InDev = string(datum.Value[:len(datum.Value)-1])
+		case nl.TCA_FW_POLICE:
+			adata, _ := nl.ParseRouteAttr(datum.Value)
+			for _, aattr := range adata {
+				switch aattr.Attr.Type {
+				case nl.TCA_POLICE_TBF:
+					fw.Police = *nl.DeserializeTcPolice(aattr.Value)
+				case nl.TCA_POLICE_RATE:
+					fw.Rtab = DeserializeRtab(aattr.Value)
+				case nl.TCA_POLICE_PEAKRATE:
+					fw.Ptab = DeserializeRtab(aattr.Value)
+				}
+			}
+		}
+	}
+	return detailed, nil
+}
+
+func AlignToAtm(size uint) uint {
+	var linksize, cells int
+	cells = int(size / nl.ATM_CELL_PAYLOAD)
+	if (size % nl.ATM_CELL_PAYLOAD) > 0 {
+		cells++
+	}
+	linksize = cells * nl.ATM_CELL_SIZE
+	return uint(linksize)
+}
+
+func AdjustSize(sz uint, mpu uint, linklayer int) uint {
+	if sz < mpu {
+		sz = mpu
+	}
+	switch linklayer {
+	case nl.LINKLAYER_ATM:
+		return AlignToAtm(sz)
+	default:
+		return sz
+	}
+}
+
+func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cell_log int, mtu uint32, linklayer int) int {
+	bps := rate.Rate
+	mpu := rate.Mpu
+	var sz uint
+	if mtu == 0 {
+		mtu = 2047
+	}
+	if cell_log < 0 {
+		cell_log = 0
+		for (mtu >> uint(cell_log)) > 255 {
+			cell_log++
+		}
+	}
+	for i := 0; i < 256; i++ {
+		sz = AdjustSize(uint((i+1)<<uint32(cell_log)), uint(mpu), linklayer)
+		rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
+	}
+	rate.CellAlign = -1
+	rate.CellLog = uint8(cell_log)
+	rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
+	return cell_log
+}
+
+func DeserializeRtab(b []byte) [256]uint32 {
+	var rtab [256]uint32
+	native := nl.NativeEndian()
+	r := bytes.NewReader(b)
+	_ = binary.Read(r, native, &rtab)
+	return rtab
+}
+
+func SerializeRtab(rtab [256]uint32) []byte {
+	native := nl.NativeEndian()
+	var w bytes.Buffer
+	_ = binary.Write(&w, native, rtab)
+	return w.Bytes()
+}

+ 248 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/filter_test.go

@@ -0,0 +1,248 @@
+package netlink
+
+import (
+	"syscall"
+	"testing"
+
+	"github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+func TestFilterAddDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "bar"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+	redir, err := LinkByName("bar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(redir); err != nil {
+		t.Fatal(err)
+	}
+	qdisc := &Ingress{
+		QdiscAttrs: QdiscAttrs{
+			LinkIndex: link.Attrs().Index,
+			Handle:    MakeHandle(0xffff, 0),
+			Parent:    HANDLE_INGRESS,
+		},
+	}
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	_, ok := qdiscs[0].(*Ingress)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	filter := &U32{
+		FilterAttrs: FilterAttrs{
+			LinkIndex: link.Attrs().Index,
+			Parent:    MakeHandle(0xffff, 0),
+			Priority:  1,
+			Protocol:  syscall.ETH_P_IP,
+		},
+		RedirIndex: redir.Attrs().Index,
+	}
+	if err := FilterAdd(filter); err != nil {
+		t.Fatal(err)
+	}
+	filters, err := FilterList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(filters) != 1 {
+		t.Fatal("Failed to add filter")
+	}
+	if err := FilterDel(filter); err != nil {
+		t.Fatal(err)
+	}
+	filters, err = FilterList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(filters) != 0 {
+		t.Fatal("Failed to remove filter")
+	}
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}
+
+func TestFilterFwAddDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "bar"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+	redir, err := LinkByName("bar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(redir); err != nil {
+		t.Fatal(err)
+	}
+	attrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(0xffff, 0),
+		Parent:    HANDLE_ROOT,
+	}
+	qdisc := NewHtb(attrs)
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	_, ok := qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+
+	classattrs := ClassAttrs{
+		LinkIndex: link.Attrs().Index,
+		Parent:    MakeHandle(0xffff, 0),
+		Handle:    MakeHandle(0xffff, 2),
+	}
+
+	htbclassattrs := HtbClassAttrs{
+		Rate:    1234000,
+		Cbuffer: 1690,
+	}
+	class := NewHtbClass(classattrs, htbclassattrs)
+	if err := ClassAdd(class); err != nil {
+		t.Fatal(err)
+	}
+	classes, err := ClassList(link, MakeHandle(0xffff, 2))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 1 {
+		t.Fatal("Failed to add class")
+	}
+
+	filterattrs := FilterAttrs{
+		LinkIndex: link.Attrs().Index,
+		Parent:    MakeHandle(0xffff, 0),
+		Handle:    MakeHandle(0, 0x6),
+		Priority:  1,
+		Protocol:  syscall.ETH_P_IP,
+	}
+	fwattrs := FilterFwAttrs{
+		Buffer:   12345,
+		Rate:     1234,
+		PeakRate: 2345,
+		Action:   nl.TC_POLICE_SHOT,
+		ClassId:  MakeHandle(0xffff, 2),
+	}
+
+	filter, err := NewFw(filterattrs, fwattrs)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := FilterAdd(filter); err != nil {
+		t.Fatal(err)
+	}
+
+	filters, err := FilterList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(filters) != 1 {
+		t.Fatal("Failed to add filter")
+	}
+	fw, ok := filters[0].(*Fw)
+	if !ok {
+		t.Fatal("Filter is the wrong type")
+	}
+	if fw.Police.Rate.Rate != filter.Police.Rate.Rate {
+		t.Fatal("Police Rate doesn't match")
+	}
+	for i := range fw.Rtab {
+		if fw.Rtab[i] != filter.Rtab[i] {
+			t.Fatal("Rtab doesn't match")
+		}
+		if fw.Ptab[i] != filter.Ptab[i] {
+			t.Fatal("Ptab doesn't match")
+		}
+	}
+	if fw.ClassId != filter.ClassId {
+		t.Fatal("ClassId doesn't match")
+	}
+	if fw.InDev != filter.InDev {
+		t.Fatal("InDev doesn't match")
+	}
+	if fw.AvRate != filter.AvRate {
+		t.Fatal("AvRate doesn't match")
+	}
+
+	if err := FilterDel(filter); err != nil {
+		t.Fatal(err)
+	}
+	filters, err = FilterList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(filters) != 0 {
+		t.Fatal("Failed to remove filter")
+	}
+	if err := ClassDel(class); err != nil {
+		t.Fatal(err)
+	}
+	classes, err = ClassList(link, MakeHandle(0xffff, 0))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(classes) != 0 {
+		t.Fatal("Failed to remove class")
+	}
+
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}

+ 60 - 6
Godeps/_workspace/src/github.com/vishvananda/netlink/link.go

@@ -1,6 +1,9 @@
 package netlink
 
-import "net"
+import (
+	"net"
+	"syscall"
+)
 
 // Link represents a link device from netlink. Shared link attributes
 // like name may be retrieved using the Attrs() method. Unique data
@@ -19,7 +22,7 @@ type (
 type LinkAttrs struct {
 	Index        int
 	MTU          int
-	TxQLen       uint32 // Transmit Queue Length
+	TxQLen       int // Transmit Queue Length
 	Name         string
 	HardwareAddr net.HardwareAddr
 	Flags        net.Flags
@@ -28,6 +31,13 @@ type LinkAttrs struct {
 	Namespace    interface{} // nil | NsPid | NsFd
 }
 
+// NewLinkAttrs returns LinkAttrs structure filled with default values
+func NewLinkAttrs() LinkAttrs {
+	return LinkAttrs{
+		TxQLen: -1,
+	}
+}
+
 // Device links cannot be created via netlink. These links
 // are links created by udev like 'lo' and 'etho0'
 type Device struct {
@@ -55,6 +65,19 @@ func (dummy *Dummy) Type() string {
 	return "dummy"
 }
 
+// Ifb links are advanced dummy devices for packet filtering
+type Ifb struct {
+	LinkAttrs
+}
+
+func (ifb *Ifb) Attrs() *LinkAttrs {
+	return &ifb.LinkAttrs
+}
+
+func (ifb *Ifb) Type() string {
+	return "ifb"
+}
+
 // Bridge links are simple linux bridges
 type Bridge struct {
 	LinkAttrs
@@ -107,6 +130,36 @@ func (macvlan *Macvlan) Type() string {
 	return "macvlan"
 }
 
+// Macvtap - macvtap is a virtual interfaces based on macvlan
+type Macvtap struct {
+	Macvlan
+}
+
+func (macvtap Macvtap) Type() string {
+	return "macvtap"
+}
+
+type TuntapMode uint16
+
+const (
+	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
+	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
+)
+
+// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
+type Tuntap struct {
+	LinkAttrs
+	Mode TuntapMode
+}
+
+func (tuntap *Tuntap) Attrs() *LinkAttrs {
+	return &tuntap.LinkAttrs
+}
+
+func (tuntap *Tuntap) Type() string {
+	return "tuntap"
+}
+
 // Veth devices must specify PeerName on create
 type Veth struct {
 	LinkAttrs
@@ -121,18 +174,18 @@ func (veth *Veth) Type() string {
 	return "veth"
 }
 
-// Generic links represent types that are not currently understood
+// GenericLink links represent types that are not currently understood
 // by this netlink library.
-type Generic struct {
+type GenericLink struct {
 	LinkAttrs
 	LinkType string
 }
 
-func (generic *Generic) Attrs() *LinkAttrs {
+func (generic *GenericLink) Attrs() *LinkAttrs {
 	return &generic.LinkAttrs
 }
 
-func (generic *Generic) Type() string {
+func (generic *GenericLink) Type() string {
 	return generic.LinkType
 }
 
@@ -150,6 +203,7 @@ type Vxlan struct {
 	L2miss       bool
 	L3miss       bool
 	NoAge        bool
+	GBP          bool
 	Age          int
 	Limit        int
 	Port         int

+ 110 - 29
Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go

@@ -5,7 +5,9 @@ import (
 	"encoding/binary"
 	"fmt"
 	"net"
+	"os"
 	"syscall"
+	"unsafe"
 
 	"github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
 )
@@ -48,7 +50,7 @@ func LinkSetUp(link Link) error {
 	return err
 }
 
-// LinkSetUp disables link device.
+// LinkSetDown disables link device.
 // Equivalent to: `ip link set $link down`
 func LinkSetDown(link Link) error {
 	base := link.Attrs()
@@ -73,10 +75,7 @@ func LinkSetMTU(link Link, mtu int) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = syscall.IFLA_MTU
 	req.AddData(msg)
 
 	b := make([]byte, 4)
@@ -97,10 +96,7 @@ func LinkSetName(link Link, name string) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = syscall.IFLA_IFNAME
 	req.AddData(msg)
 
 	data := nl.NewRtAttr(syscall.IFLA_IFNAME, []byte(name))
@@ -118,10 +114,7 @@ func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = syscall.IFLA_ADDRESS
 	req.AddData(msg)
 
 	data := nl.NewRtAttr(syscall.IFLA_ADDRESS, []byte(hwaddr))
@@ -151,10 +144,7 @@ func LinkSetMasterByIndex(link Link, masterIndex int) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = syscall.IFLA_MASTER
 	req.AddData(msg)
 
 	b := make([]byte, 4)
@@ -176,10 +166,7 @@ func LinkSetNsPid(link Link, nspid int) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = syscall.IFLA_NET_NS_PID
 	req.AddData(msg)
 
 	b := make([]byte, 4)
@@ -201,10 +188,7 @@ func LinkSetNsFd(link Link, fd int) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = nl.IFLA_NET_NS_FD
 	req.AddData(msg)
 
 	b := make([]byte, 4)
@@ -266,6 +250,10 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
 	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss))
 	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss))
 
+	if vxlan.GBP {
+		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP))
+	}
+
 	if vxlan.NoAge {
 		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
 	} else if vxlan.Age > 0 {
@@ -299,6 +287,44 @@ func LinkAdd(link Link) error {
 		return fmt.Errorf("LinkAttrs.Name cannot be empty!")
 	}
 
+	if tuntap, ok := link.(*Tuntap); ok {
+		// TODO: support user
+		// TODO: support group
+		// TODO: support non- one_queue
+		// TODO: support pi | vnet_hdr | multi_queue
+		// TODO: support non- exclusive
+		// TODO: support non- persistent
+		if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP {
+			return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
+		}
+		file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
+		if err != nil {
+			return err
+		}
+		defer file.Close()
+		var req ifReq
+		req.Flags |= syscall.IFF_ONE_QUEUE
+		req.Flags |= syscall.IFF_TUN_EXCL
+		copy(req.Name[:15], base.Name)
+		req.Flags |= uint16(tuntap.Mode)
+		_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req)))
+		if errno != 0 {
+			return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno)
+		}
+		_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1)
+		if errno != 0 {
+			return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
+		}
+		ensureIndex(base)
+
+		// can't set master during create, so set it afterwards
+		if base.MasterIndex != 0 {
+			// TODO: verify MasterIndex is actually a bridge?
+			return LinkSetMasterByIndex(link, base.MasterIndex)
+		}
+		return nil
+	}
+
 	req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
@@ -321,6 +347,11 @@ func LinkAdd(link Link) error {
 		req.AddData(mtu)
 	}
 
+	if base.TxQLen >= 0 {
+		qlen := nl.NewRtAttr(syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen)))
+		req.AddData(qlen)
+	}
+
 	if base.Namespace != nil {
 		var attr *nl.RtAttr
 		switch base.Namespace.(type) {
@@ -338,8 +369,6 @@ func LinkAdd(link Link) error {
 	linkInfo := nl.NewRtAttr(syscall.IFLA_LINKINFO, nil)
 	nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
 
-	nl.NewRtAttrChild(linkInfo, syscall.IFLA_TXQLEN, nl.Uint32Attr(base.TxQLen))
-
 	if vlan, ok := link.(*Vlan); ok {
 		b := make([]byte, 2)
 		native.PutUint16(b, uint16(vlan.VlanId))
@@ -350,10 +379,13 @@ func LinkAdd(link Link) error {
 		peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil)
 		nl.NewIfInfomsgChild(peer, syscall.AF_UNSPEC)
 		nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(veth.PeerName))
-		nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(base.TxQLen))
+		if base.TxQLen >= 0 {
+			nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen)))
+		}
 		if base.MTU > 0 {
 			nl.NewRtAttrChild(peer, syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
 		}
+
 	} else if vxlan, ok := link.(*Vxlan); ok {
 		addVxlanAttrs(vxlan, linkInfo)
 	} else if ipv, ok := link.(*IPVlan); ok {
@@ -501,6 +533,8 @@ func linkDeserialize(m []byte) (Link, error) {
 					switch linkType {
 					case "dummy":
 						link = &Dummy{}
+					case "ifb":
+						link = &Ifb{}
 					case "bridge":
 						link = &Bridge{}
 					case "vlan":
@@ -513,8 +547,10 @@ func linkDeserialize(m []byte) (Link, error) {
 						link = &IPVlan{}
 					case "macvlan":
 						link = &Macvlan{}
+					case "macvtap":
+						link = &Macvtap{}
 					default:
-						link = &Generic{LinkType: linkType}
+						link = &GenericLink{LinkType: linkType}
 					}
 				case nl.IFLA_INFO_DATA:
 					data, err := nl.ParseRouteAttr(info.Value)
@@ -530,6 +566,8 @@ func linkDeserialize(m []byte) (Link, error) {
 						parseIPVlanData(link, data)
 					case "macvlan":
 						parseMacvlanData(link, data)
+					case "macvtap":
+						parseMacvtapData(link, data)
 					}
 				}
 			}
@@ -552,7 +590,7 @@ func linkDeserialize(m []byte) (Link, error) {
 		case syscall.IFLA_MASTER:
 			base.MasterIndex = int(native.Uint32(attr.Value[0:4]))
 		case syscall.IFLA_TXQLEN:
-			base.TxQLen = native.Uint32(attr.Value[0:4])
+			base.TxQLen = int(native.Uint32(attr.Value[0:4]))
 		}
 	}
 	// Links that don't have IFLA_INFO_KIND are hardware devices
@@ -579,8 +617,7 @@ func LinkList() ([]Link, error) {
 		return nil, err
 	}
 
-	res := make([]Link, 0)
-
+	var res []Link
 	for _, m := range msgs {
 		link, err := linkDeserialize(m)
 		if err != nil {
@@ -592,6 +629,46 @@ func LinkList() ([]Link, error) {
 	return res, nil
 }
 
+// LinkUpdate is used to pass information back from LinkSubscribe()
+type LinkUpdate struct {
+	nl.IfInfomsg
+	Link
+}
+
+// LinkSubscribe takes a chan down which notifications will be sent
+// when links change.  Close the 'done' chan to stop subscription.
+func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
+	s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
+	if err != nil {
+		return err
+	}
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
+	go func() {
+		defer close(ch)
+		for {
+			msgs, err := s.Receive()
+			if err != nil {
+				return
+			}
+			for _, m := range msgs {
+				ifmsg := nl.DeserializeIfInfomsg(m.Data)
+				link, err := linkDeserialize(m.Data)
+				if err != nil {
+					return
+				}
+				ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
+			}
+		}
+	}()
+
+	return nil
+}
+
 func LinkSetHairpin(link Link, mode bool) error {
 	return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
 }
@@ -622,10 +699,7 @@ func setProtinfoAttr(link Link, mode bool, attr int) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
 	msg.Index = int32(base.Index)
-	msg.Change = syscall.IFLA_PROTINFO | syscall.NLA_F_NESTED
 	req.AddData(msg)
 
 	br := nl.NewRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil)
@@ -678,6 +752,8 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) {
 			vxlan.L2miss = int8(datum.Value[0]) != 0
 		case nl.IFLA_VXLAN_L3MISS:
 			vxlan.L3miss = int8(datum.Value[0]) != 0
+		case nl.IFLA_VXLAN_GBP:
+			vxlan.GBP = int8(datum.Value[0]) != 0
 		case nl.IFLA_VXLAN_AGEING:
 			vxlan.Age = int(native.Uint32(datum.Value[0:4]))
 			vxlan.NoAge = vxlan.Age == 0
@@ -706,6 +782,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	}
 }
 
+func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
+	macv := link.(*Macvtap)
+	parseMacvlanData(&macv.Macvlan, data)
+}
+
 func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	macv := link.(*Macvlan)
 	for _, datum := range data {

+ 184 - 4
Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go

@@ -3,12 +3,17 @@ package netlink
 import (
 	"bytes"
 	"net"
+	"syscall"
 	"testing"
+	"time"
 
 	"github.com/vishvananda/netns"
 )
 
-const testTxQLen uint32 = 100
+const (
+	testTxQLen    int = 100
+	defaultTxQLen int = 1000
+)
 
 func testLinkAddDel(t *testing.T, link Link) {
 	links, err := LinkList()
@@ -50,9 +55,9 @@ func testLinkAddDel(t *testing.T, link Link) {
 		}
 	}
 
-	if veth, ok := link.(*Veth); ok {
-		if veth.TxQLen != testTxQLen {
-			t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, testTxQLen)
+	if veth, ok := result.(*Veth); ok {
+		if rBase.TxQLen != base.TxQLen {
+			t.Fatalf("qlen is %d, should be %d", rBase.TxQLen, base.TxQLen)
 		}
 		if rBase.MTU != base.MTU {
 			t.Fatalf("MTU is %d, should be %d", rBase.MTU, base.MTU)
@@ -147,6 +152,9 @@ func compareVxlan(t *testing.T, expected, actual *Vxlan) {
 	if actual.L3miss != expected.L3miss {
 		t.Fatal("Vxlan.L3miss doesn't match")
 	}
+	if actual.GBP != expected.GBP {
+		t.Fatal("Vxlan.GBP doesn't match")
+	}
 	if expected.NoAge {
 		if !actual.NoAge {
 			t.Fatal("Vxlan.NoAge doesn't match")
@@ -177,6 +185,13 @@ func TestLinkAddDelDummy(t *testing.T) {
 	testLinkAddDel(t, &Dummy{LinkAttrs{Name: "foo"}})
 }
 
+func TestLinkAddDelIfb(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+
+	testLinkAddDel(t, &Ifb{LinkAttrs{Name: "foo"}})
+}
+
 func TestLinkAddDelBridge(t *testing.T) {
 	tearDown := setUpNetlinkTest(t)
 	defer tearDown()
@@ -219,6 +234,27 @@ func TestLinkAddDelMacvlan(t *testing.T) {
 	}
 }
 
+func TestLinkAddDelMacvtap(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+
+	parent := &Dummy{LinkAttrs{Name: "foo"}}
+	if err := LinkAdd(parent); err != nil {
+		t.Fatal(err)
+	}
+
+	testLinkAddDel(t, &Macvtap{
+		Macvlan: Macvlan{
+			LinkAttrs: LinkAttrs{Name: "bar", ParentIndex: parent.Attrs().Index},
+			Mode:      MACVLAN_MODE_PRIVATE,
+		},
+	})
+
+	if err := LinkDel(parent); err != nil {
+		t.Fatal(err)
+	}
+}
+
 func TestLinkAddDelVeth(t *testing.T) {
 	tearDown := setUpNetlinkTest(t)
 	defer tearDown()
@@ -226,6 +262,99 @@ func TestLinkAddDelVeth(t *testing.T) {
 	testLinkAddDel(t, &Veth{LinkAttrs{Name: "foo", TxQLen: testTxQLen, MTU: 1400}, "bar"})
 }
 
+func TestLinkAddVethWithDefaultTxQLen(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	la := NewLinkAttrs()
+	la.Name = "foo"
+
+	veth := &Veth{LinkAttrs: la, PeerName: "bar"}
+	if err := LinkAdd(veth); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if veth, ok := link.(*Veth); !ok {
+		t.Fatalf("unexpected link type: %T", link)
+	} else {
+		if veth.TxQLen != defaultTxQLen {
+			t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, defaultTxQLen)
+		}
+	}
+	peer, err := LinkByName("bar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if veth, ok := peer.(*Veth); !ok {
+		t.Fatalf("unexpected link type: %T", link)
+	} else {
+		if veth.TxQLen != defaultTxQLen {
+			t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, defaultTxQLen)
+		}
+	}
+}
+
+func TestLinkAddVethWithZeroTxQLen(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	la := NewLinkAttrs()
+	la.Name = "foo"
+	la.TxQLen = 0
+
+	veth := &Veth{LinkAttrs: la, PeerName: "bar"}
+	if err := LinkAdd(veth); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if veth, ok := link.(*Veth); !ok {
+		t.Fatalf("unexpected link type: %T", link)
+	} else {
+		if veth.TxQLen != 0 {
+			t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, 0)
+		}
+	}
+	peer, err := LinkByName("bar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if veth, ok := peer.(*Veth); !ok {
+		t.Fatalf("unexpected link type: %T", link)
+	} else {
+		if veth.TxQLen != 0 {
+			t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, 0)
+		}
+	}
+}
+
+func TestLinkAddDummyWithTxQLen(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	la := NewLinkAttrs()
+	la.Name = "foo"
+	la.TxQLen = 1500
+
+	dummy := &Dummy{LinkAttrs: la}
+	if err := LinkAdd(dummy); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dummy, ok := link.(*Dummy); !ok {
+		t.Fatalf("unexpected link type: %T", link)
+	} else {
+		if dummy.TxQLen != 1500 {
+			t.Fatalf("TxQLen is %d, should be %d", dummy.TxQLen, 1500)
+		}
+	}
+}
+
 func TestLinkAddDelBridgeMaster(t *testing.T) {
 	tearDown := setUpNetlinkTest(t)
 	defer tearDown()
@@ -542,3 +671,54 @@ func TestLinkSet(t *testing.T) {
 		t.Fatalf("hardware address not changed!")
 	}
 }
+
+func expectLinkUpdate(ch <-chan LinkUpdate, ifaceName string, up bool) bool {
+	for {
+		timeout := time.After(time.Minute)
+		select {
+		case update := <-ch:
+			if ifaceName == update.Link.Attrs().Name && (update.IfInfomsg.Flags&syscall.IFF_UP != 0) == up {
+				return true
+			}
+		case <-timeout:
+			return false
+		}
+	}
+}
+
+func TestLinkSubscribe(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+
+	ch := make(chan LinkUpdate)
+	done := make(chan struct{})
+	defer close(done)
+	if err := LinkSubscribe(ch, done); err != nil {
+		t.Fatal(err)
+	}
+
+	link := &Veth{LinkAttrs{Name: "foo", TxQLen: testTxQLen, MTU: 1400}, "bar"}
+	if err := LinkAdd(link); err != nil {
+		t.Fatal(err)
+	}
+
+	if !expectLinkUpdate(ch, "foo", false) {
+		t.Fatal("Add update not received as expected")
+	}
+
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+
+	if !expectLinkUpdate(ch, "foo", true) {
+		t.Fatal("Link Up update not received as expected")
+	}
+
+	if err := LinkDel(link); err != nil {
+		t.Fatal(err)
+	}
+
+	if !expectLinkUpdate(ch, "foo", false) {
+		t.Fatal("Del update not received as expected")
+	}
+}

+ 14 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/link_tuntap_linux.go

@@ -0,0 +1,14 @@
+package netlink
+
+// ideally golang.org/x/sys/unix would define IfReq but it only has
+// IFNAMSIZ, hence this minimalistic implementation
+const (
+	SizeOfIfReq = 40
+	IFNAMSIZ    = 16
+)
+
+type ifReq struct {
+	Name  [IFNAMSIZ]byte
+	Flags uint16
+	pad   [SizeOfIfReq - IFNAMSIZ - 2]byte
+}

+ 4 - 3
Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go

@@ -70,10 +70,10 @@ func NeighAdd(neigh *Neigh) error {
 	return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL)
 }
 
-// NeighAdd will add or replace an IP to MAC mapping to the ARP table
+// NeighSet will add or replace an IP to MAC mapping to the ARP table
 // Equivalent to: `ip neigh replace....`
 func NeighSet(neigh *Neigh) error {
-	return neighAdd(neigh, syscall.NLM_F_CREATE)
+	return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE)
 }
 
 // NeighAppend will append an entry to FDB
@@ -133,6 +133,7 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
 	req := nl.NewNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP)
 	msg := Ndmsg{
 		Family: uint8(family),
+		Index:  uint32(linkIndex),
 	}
 	req.AddData(&msg)
 
@@ -141,7 +142,7 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
 		return nil, err
 	}
 
-	res := make([]Neigh, 0)
+	var res []Neigh
 	for _, m := range msgs {
 		ndm := deserializeNdmsg(m)
 		if linkIndex != 0 && int(ndm.Index) != linkIndex {

+ 9 - 1
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go

@@ -47,7 +47,15 @@ const (
 	IFLA_VXLAN_PORT
 	IFLA_VXLAN_GROUP6
 	IFLA_VXLAN_LOCAL6
-	IFLA_VXLAN_MAX = IFLA_VXLAN_LOCAL6
+	IFLA_VXLAN_UDP_CSUM
+	IFLA_VXLAN_UDP_ZERO_CSUM6_TX
+	IFLA_VXLAN_UDP_ZERO_CSUM6_RX
+	IFLA_VXLAN_REMCSUM_TX
+	IFLA_VXLAN_REMCSUM_RX
+	IFLA_VXLAN_GBP
+	IFLA_VXLAN_REMCSUM_NOPARTIAL
+	IFLA_VXLAN_FLOWBASED
+	IFLA_VXLAN_MAX = IFLA_VXLAN_FLOWBASED
 )
 
 const (

+ 20 - 13
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -39,8 +39,9 @@ func NativeEndian() binary.ByteOrder {
 		var x uint32 = 0x01020304
 		if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
 			nativeEndian = binary.BigEndian
+		} else {
+			nativeEndian = binary.LittleEndian
 		}
-		nativeEndian = binary.LittleEndian
 	}
 	return nativeEndian
 }
@@ -141,17 +142,19 @@ func (a *RtAttr) Len() int {
 }
 
 // Serialize the RtAttr into a byte array
-// This can't ust unsafe.cast because it must iterate through children.
+// This can't just unsafe.cast because it must iterate through children.
 func (a *RtAttr) Serialize() []byte {
 	native := NativeEndian()
 
 	length := a.Len()
 	buf := make([]byte, rtaAlignOf(length))
 
+	next := 4
 	if a.Data != nil {
-		copy(buf[4:], a.Data)
-	} else {
-		next := 4
+		copy(buf[next:], a.Data)
+		next += rtaAlignOf(len(a.Data))
+	}
+	if len(a.children) > 0 {
 		for _, child := range a.children {
 			childBuf := child.Serialize()
 			copy(buf[next:], childBuf)
@@ -172,16 +175,16 @@ type NetlinkRequest struct {
 }
 
 // Serialize the Netlink Request into a byte array
-func (msg *NetlinkRequest) Serialize() []byte {
+func (req *NetlinkRequest) Serialize() []byte {
 	length := syscall.SizeofNlMsghdr
-	dataBytes := make([][]byte, len(msg.Data))
-	for i, data := range msg.Data {
+	dataBytes := make([][]byte, len(req.Data))
+	for i, data := range req.Data {
 		dataBytes[i] = data.Serialize()
 		length = length + len(dataBytes[i])
 	}
-	msg.Len = uint32(length)
+	req.Len = uint32(length)
 	b := make([]byte, length)
-	hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(msg)))[:]
+	hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
 	next := syscall.SizeofNlMsghdr
 	copy(b[0:next], hdr)
 	for _, data := range dataBytes {
@@ -193,9 +196,9 @@ func (msg *NetlinkRequest) Serialize() []byte {
 	return b
 }
 
-func (msg *NetlinkRequest) AddData(data NetlinkRequestData) {
+func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
 	if data != nil {
-		msg.Data = append(msg.Data, data)
+		req.Data = append(req.Data, data)
 	}
 }
 
@@ -218,7 +221,7 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
 		return nil, err
 	}
 
-	res := make([][]byte, 0)
+	var res [][]byte
 
 done:
 	for {
@@ -322,6 +325,10 @@ func (s *NetlinkSocket) Close() {
 	syscall.Close(s.fd)
 }
 
+func (s *NetlinkSocket) GetFd() int {
+	return s.fd
+}
+
 func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
 	if err := syscall.Sendto(s.fd, request.Serialize(), 0, &s.lsa); err != nil {
 		return err

+ 9 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go

@@ -20,6 +20,15 @@ func NewRtMsg() *RtMsg {
 	}
 }
 
+func NewRtDelMsg() *RtMsg {
+	return &RtMsg{
+		RtMsg: syscall.RtMsg{
+			Table: syscall.RT_TABLE_MAIN,
+			Scope: syscall.RT_SCOPE_NOWHERE,
+		},
+	}
+}
+
 func (msg *RtMsg) Len() int {
 	return syscall.SizeofRtMsg
 }

+ 627 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux.go

@@ -0,0 +1,627 @@
+package nl
+
+import (
+	"unsafe"
+)
+
+// LinkLayer
+const (
+	LINKLAYER_UNSPEC = iota
+	LINKLAYER_ETHERNET
+	LINKLAYER_ATM
+)
+
+// ATM
+const (
+	ATM_CELL_PAYLOAD = 48
+	ATM_CELL_SIZE    = 53
+)
+
+const TC_LINKLAYER_MASK = 0x0F
+
+// Police
+const (
+	TCA_POLICE_UNSPEC = iota
+	TCA_POLICE_TBF
+	TCA_POLICE_RATE
+	TCA_POLICE_PEAKRATE
+	TCA_POLICE_AVRATE
+	TCA_POLICE_RESULT
+	TCA_POLICE_MAX = TCA_POLICE_RESULT
+)
+
+// Message types
+const (
+	TCA_UNSPEC = iota
+	TCA_KIND
+	TCA_OPTIONS
+	TCA_STATS
+	TCA_XSTATS
+	TCA_RATE
+	TCA_FCNT
+	TCA_STATS2
+	TCA_STAB
+	TCA_MAX = TCA_STAB
+)
+
+const (
+	TCA_ACT_TAB = 1
+	TCAA_MAX    = 1
+)
+
+const (
+	TCA_PRIO_UNSPEC = iota
+	TCA_PRIO_MQ
+	TCA_PRIO_MAX = TCA_PRIO_MQ
+)
+
+const (
+	SizeofTcMsg          = 0x14
+	SizeofTcActionMsg    = 0x04
+	SizeofTcPrioMap      = 0x14
+	SizeofTcRateSpec     = 0x0c
+	SizeofTcNetemQopt    = 0x18
+	SizeofTcNetemCorr    = 0x0c
+	SizeofTcNetemReorder = 0x08
+	SizeofTcNetemCorrupt = 0x08
+	SizeofTcTbfQopt      = 2*SizeofTcRateSpec + 0x0c
+	SizeofTcHtbCopt      = 2*SizeofTcRateSpec + 0x14
+	SizeofTcHtbGlob      = 0x14
+	SizeofTcU32Key       = 0x10
+	SizeofTcU32Sel       = 0x10 // without keys
+	SizeofTcMirred       = 0x1c
+	SizeofTcPolice       = 2*SizeofTcRateSpec + 0x20
+)
+
+// struct tcmsg {
+//   unsigned char tcm_family;
+//   unsigned char tcm__pad1;
+//   unsigned short  tcm__pad2;
+//   int   tcm_ifindex;
+//   __u32   tcm_handle;
+//   __u32   tcm_parent;
+//   __u32   tcm_info;
+// };
+
+type TcMsg struct {
+	Family  uint8
+	Pad     [3]byte
+	Ifindex int32
+	Handle  uint32
+	Parent  uint32
+	Info    uint32
+}
+
+func (msg *TcMsg) Len() int {
+	return SizeofTcMsg
+}
+
+func DeserializeTcMsg(b []byte) *TcMsg {
+	return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0]))
+}
+
+func (x *TcMsg) Serialize() []byte {
+	return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tcamsg {
+//   unsigned char tca_family;
+//   unsigned char tca__pad1;
+//   unsigned short  tca__pad2;
+// };
+
+type TcActionMsg struct {
+	Family uint8
+	Pad    [3]byte
+}
+
+func (msg *TcActionMsg) Len() int {
+	return SizeofTcActionMsg
+}
+
+func DeserializeTcActionMsg(b []byte) *TcActionMsg {
+	return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0]))
+}
+
+func (x *TcActionMsg) Serialize() []byte {
+	return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TC_PRIO_MAX = 15
+)
+
+// struct tc_prio_qopt {
+// 	int bands;      /* Number of bands */
+// 	__u8  priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+// };
+
+type TcPrioMap struct {
+	Bands   int32
+	Priomap [TC_PRIO_MAX + 1]uint8
+}
+
+func (msg *TcPrioMap) Len() int {
+	return SizeofTcPrioMap
+}
+
+func DeserializeTcPrioMap(b []byte) *TcPrioMap {
+	return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0]))
+}
+
+func (x *TcPrioMap) Serialize() []byte {
+	return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_TBF_UNSPEC = iota
+	TCA_TBF_PARMS
+	TCA_TBF_RTAB
+	TCA_TBF_PTAB
+	TCA_TBF_RATE64
+	TCA_TBF_PRATE64
+	TCA_TBF_BURST
+	TCA_TBF_PBURST
+	TCA_TBF_MAX = TCA_TBF_PBURST
+)
+
+// struct tc_ratespec {
+//   unsigned char cell_log;
+//   __u8    linklayer; /* lower 4 bits */
+//   unsigned short  overhead;
+//   short   cell_align;
+//   unsigned short  mpu;
+//   __u32   rate;
+// };
+
+type TcRateSpec struct {
+	CellLog   uint8
+	Linklayer uint8
+	Overhead  uint16
+	CellAlign int16
+	Mpu       uint16
+	Rate      uint32
+}
+
+func (msg *TcRateSpec) Len() int {
+	return SizeofTcRateSpec
+}
+
+func DeserializeTcRateSpec(b []byte) *TcRateSpec {
+	return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0]))
+}
+
+func (x *TcRateSpec) Serialize() []byte {
+	return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:]
+}
+
+/**
+* NETEM
+ */
+
+const (
+	TCA_NETEM_UNSPEC = iota
+	TCA_NETEM_CORR
+	TCA_NETEM_DELAY_DIST
+	TCA_NETEM_REORDER
+	TCA_NETEM_CORRUPT
+	TCA_NETEM_LOSS
+	TCA_NETEM_RATE
+	TCA_NETEM_ECN
+	TCA_NETEM_RATE64
+	TCA_NETEM_MAX = TCA_NETEM_RATE64
+)
+
+// struct tc_netem_qopt {
+//	__u32	latency;	/* added delay (us) */
+//	__u32   limit;		/* fifo limit (packets) */
+//	__u32	loss;		/* random packet loss (0=none ~0=100%) */
+//	__u32	gap;		/* re-ordering gap (0 for none) */
+//	__u32   duplicate;	/* random packet dup  (0=none ~0=100%) */
+// 	__u32	jitter;		/* random jitter in latency (us) */
+// };
+
+type TcNetemQopt struct {
+	Latency   uint32
+	Limit     uint32
+	Loss      uint32
+	Gap       uint32
+	Duplicate uint32
+	Jitter    uint32
+}
+
+func (msg *TcNetemQopt) Len() int {
+	return SizeofTcNetemQopt
+}
+
+func DeserializeTcNetemQopt(b []byte) *TcNetemQopt {
+	return (*TcNetemQopt)(unsafe.Pointer(&b[0:SizeofTcNetemQopt][0]))
+}
+
+func (x *TcNetemQopt) Serialize() []byte {
+	return (*(*[SizeofTcNetemQopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_netem_corr {
+//  __u32   delay_corr; /* delay correlation */
+//  __u32   loss_corr;  /* packet loss correlation */
+//  __u32   dup_corr;   /* duplicate correlation  */
+// };
+
+type TcNetemCorr struct {
+	DelayCorr uint32
+	LossCorr  uint32
+	DupCorr   uint32
+}
+
+func (msg *TcNetemCorr) Len() int {
+	return SizeofTcNetemCorr
+}
+
+func DeserializeTcNetemCorr(b []byte) *TcNetemCorr {
+	return (*TcNetemCorr)(unsafe.Pointer(&b[0:SizeofTcNetemCorr][0]))
+}
+
+func (x *TcNetemCorr) Serialize() []byte {
+	return (*(*[SizeofTcNetemCorr]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_netem_reorder {
+//  __u32   probability;
+//  __u32   correlation;
+// };
+
+type TcNetemReorder struct {
+	Probability uint32
+	Correlation uint32
+}
+
+func (msg *TcNetemReorder) Len() int {
+	return SizeofTcNetemReorder
+}
+
+func DeserializeTcNetemReorder(b []byte) *TcNetemReorder {
+	return (*TcNetemReorder)(unsafe.Pointer(&b[0:SizeofTcNetemReorder][0]))
+}
+
+func (x *TcNetemReorder) Serialize() []byte {
+	return (*(*[SizeofTcNetemReorder]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_netem_corrupt {
+//  __u32   probability;
+//  __u32   correlation;
+// };
+
+type TcNetemCorrupt struct {
+	Probability uint32
+	Correlation uint32
+}
+
+func (msg *TcNetemCorrupt) Len() int {
+	return SizeofTcNetemCorrupt
+}
+
+func DeserializeTcNetemCorrupt(b []byte) *TcNetemCorrupt {
+	return (*TcNetemCorrupt)(unsafe.Pointer(&b[0:SizeofTcNetemCorrupt][0]))
+}
+
+func (x *TcNetemCorrupt) Serialize() []byte {
+	return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_tbf_qopt {
+//   struct tc_ratespec rate;
+//   struct tc_ratespec peakrate;
+//   __u32   limit;
+//   __u32   buffer;
+//   __u32   mtu;
+// };
+
+type TcTbfQopt struct {
+	Rate     TcRateSpec
+	Peakrate TcRateSpec
+	Limit    uint32
+	Buffer   uint32
+	Mtu      uint32
+}
+
+func (msg *TcTbfQopt) Len() int {
+	return SizeofTcTbfQopt
+}
+
+func DeserializeTcTbfQopt(b []byte) *TcTbfQopt {
+	return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0]))
+}
+
+func (x *TcTbfQopt) Serialize() []byte {
+	return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_HTB_UNSPEC = iota
+	TCA_HTB_PARMS
+	TCA_HTB_INIT
+	TCA_HTB_CTAB
+	TCA_HTB_RTAB
+	TCA_HTB_DIRECT_QLEN
+	TCA_HTB_RATE64
+	TCA_HTB_CEIL64
+	TCA_HTB_MAX = TCA_HTB_CEIL64
+)
+
+//struct tc_htb_opt {
+//	struct tc_ratespec	rate;
+//	struct tc_ratespec	ceil;
+//	__u32	buffer;
+//	__u32	cbuffer;
+//	__u32	quantum;
+//	__u32	level;		/* out only */
+//	__u32	prio;
+//};
+
+type TcHtbCopt struct {
+	Rate    TcRateSpec
+	Ceil    TcRateSpec
+	Buffer  uint32
+	Cbuffer uint32
+	Quantum uint32
+	Level   uint32
+	Prio    uint32
+}
+
+func (msg *TcHtbCopt) Len() int {
+	return SizeofTcHtbCopt
+}
+
+func DeserializeTcHtbCopt(b []byte) *TcHtbCopt {
+	return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0]))
+}
+
+func (x *TcHtbCopt) Serialize() []byte {
+	return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+type TcHtbGlob struct {
+	Version      uint32
+	Rate2Quantum uint32
+	Defcls       uint32
+	Debug        uint32
+	DirectPkts   uint32
+}
+
+func (msg *TcHtbGlob) Len() int {
+	return SizeofTcHtbGlob
+}
+
+func DeserializeTcHtbGlob(b []byte) *TcHtbGlob {
+	return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0]))
+}
+
+func (x *TcHtbGlob) Serialize() []byte {
+	return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_U32_UNSPEC = iota
+	TCA_U32_CLASSID
+	TCA_U32_HASH
+	TCA_U32_LINK
+	TCA_U32_DIVISOR
+	TCA_U32_SEL
+	TCA_U32_POLICE
+	TCA_U32_ACT
+	TCA_U32_INDEV
+	TCA_U32_PCNT
+	TCA_U32_MARK
+	TCA_U32_MAX = TCA_U32_MARK
+)
+
+// struct tc_u32_key {
+//   __be32    mask;
+//   __be32    val;
+//   int   off;
+//   int   offmask;
+// };
+
+type TcU32Key struct {
+	Mask    uint32 // big endian
+	Val     uint32 // big endian
+	Off     int32
+	OffMask int32
+}
+
+func (msg *TcU32Key) Len() int {
+	return SizeofTcU32Key
+}
+
+func DeserializeTcU32Key(b []byte) *TcU32Key {
+	return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0]))
+}
+
+func (x *TcU32Key) Serialize() []byte {
+	return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_u32_sel {
+//   unsigned char   flags;
+//   unsigned char   offshift;
+//   unsigned char   nkeys;
+//
+//   __be16      offmask;
+//   __u16     off;
+//   short     offoff;
+//
+//   short     hoff;
+//   __be32      hmask;
+//   struct tc_u32_key keys[0];
+// };
+
+const (
+	TC_U32_TERMINAL  = 1 << iota
+	TC_U32_OFFSET    = 1 << iota
+	TC_U32_VAROFFSET = 1 << iota
+	TC_U32_EAT       = 1 << iota
+)
+
+type TcU32Sel struct {
+	Flags    uint8
+	Offshift uint8
+	Nkeys    uint8
+	Pad      uint8
+	Offmask  uint16 // big endian
+	Off      uint16
+	Offoff   int16
+	Hoff     int16
+	Hmask    uint32 // big endian
+	Keys     []TcU32Key
+}
+
+func (msg *TcU32Sel) Len() int {
+	return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key
+}
+
+func DeserializeTcU32Sel(b []byte) *TcU32Sel {
+	x := &TcU32Sel{}
+	copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b)
+	next := SizeofTcU32Sel
+	var i uint8
+	for i = 0; i < x.Nkeys; i++ {
+		x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:]))
+		next += SizeofTcU32Key
+	}
+	return x
+}
+
+func (x *TcU32Sel) Serialize() []byte {
+	// This can't just unsafe.cast because it must iterate through keys.
+	buf := make([]byte, x.Len())
+	copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:])
+	next := SizeofTcU32Sel
+	for _, key := range x.Keys {
+		keyBuf := key.Serialize()
+		copy(buf[next:], keyBuf)
+		next += SizeofTcU32Key
+	}
+	return buf
+}
+
+const (
+	TCA_ACT_MIRRED = 8
+)
+
+const (
+	TCA_MIRRED_UNSPEC = iota
+	TCA_MIRRED_TM
+	TCA_MIRRED_PARMS
+	TCA_MIRRED_MAX = TCA_MIRRED_PARMS
+)
+
+const (
+	TCA_EGRESS_REDIR   = 1 /* packet redirect to EGRESS*/
+	TCA_EGRESS_MIRROR  = 2 /* mirror packet to EGRESS */
+	TCA_INGRESS_REDIR  = 3 /* packet redirect to INGRESS*/
+	TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */
+)
+
+const (
+	TC_ACT_UNSPEC     = int32(-1)
+	TC_ACT_OK         = 0
+	TC_ACT_RECLASSIFY = 1
+	TC_ACT_SHOT       = 2
+	TC_ACT_PIPE       = 3
+	TC_ACT_STOLEN     = 4
+	TC_ACT_QUEUED     = 5
+	TC_ACT_REPEAT     = 6
+	TC_ACT_JUMP       = 0x10000000
+)
+
+// #define tc_gen \
+//   __u32                 index; \
+//   __u32                 capab; \
+//   int                   action; \
+//   int                   refcnt; \
+//   int                   bindcnt
+// struct tc_mirred {
+// 	tc_gen;
+// 	int                     eaction;   /* one of IN/EGRESS_MIRROR/REDIR */
+// 	__u32                   ifindex;  /* ifindex of egress port */
+// };
+
+type TcMirred struct {
+	Index   uint32
+	Capab   uint32
+	Action  int32
+	Refcnt  int32
+	Bindcnt int32
+	Eaction int32
+	Ifindex uint32
+}
+
+func (msg *TcMirred) Len() int {
+	return SizeofTcMirred
+}
+
+func DeserializeTcMirred(b []byte) *TcMirred {
+	return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0]))
+}
+
+func (x *TcMirred) Serialize() []byte {
+	return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TC_POLICE_UNSPEC     = TC_ACT_UNSPEC
+	TC_POLICE_OK         = TC_ACT_OK
+	TC_POLICE_RECLASSIFY = TC_ACT_RECLASSIFY
+	TC_POLICE_SHOT       = TC_ACT_SHOT
+	TC_POLICE_PIPE       = TC_ACT_PIPE
+)
+
+// struct tc_police {
+// 	__u32			index;
+// 	int			action;
+// 	__u32			limit;
+// 	__u32			burst;
+// 	__u32			mtu;
+// 	struct tc_ratespec	rate;
+// 	struct tc_ratespec	peakrate;
+// 	int				refcnt;
+// 	int				bindcnt;
+// 	__u32			capab;
+// };
+
+type TcPolice struct {
+	Index    uint32
+	Action   int32
+	Limit    uint32
+	Burst    uint32
+	Mtu      uint32
+	Rate     TcRateSpec
+	PeakRate TcRateSpec
+	Refcnt   int32
+	Bindcnt  int32
+	Capab    uint32
+}
+
+func (msg *TcPolice) Len() int {
+	return SizeofTcPolice
+}
+
+func DeserializeTcPolice(b []byte) *TcPolice {
+	return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0]))
+}
+
+func (x *TcPolice) Serialize() []byte {
+	return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_FW_UNSPEC = iota
+	TCA_FW_CLASSID
+	TCA_FW_POLICE
+	TCA_FW_INDEV
+	TCA_FW_ACT
+	TCA_FW_MASK
+	TCA_FW_MAX = TCA_FW_MASK
+)

+ 173 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux_test.go

@@ -0,0 +1,173 @@
+package nl
+
+import (
+	"bytes"
+	"crypto/rand"
+	"encoding/binary"
+	"testing"
+)
+
+/* TcMsg */
+func (msg *TcMsg) write(b []byte) {
+	native := NativeEndian()
+	b[0] = msg.Family
+	copy(b[1:4], msg.Pad[:])
+	native.PutUint32(b[4:8], uint32(msg.Ifindex))
+	native.PutUint32(b[8:12], msg.Handle)
+	native.PutUint32(b[12:16], msg.Parent)
+	native.PutUint32(b[16:20], msg.Info)
+}
+
+func (msg *TcMsg) serializeSafe() []byte {
+	length := SizeofTcMsg
+	b := make([]byte, length)
+	msg.write(b)
+	return b
+}
+
+func deserializeTcMsgSafe(b []byte) *TcMsg {
+	var msg = TcMsg{}
+	binary.Read(bytes.NewReader(b[0:SizeofTcMsg]), NativeEndian(), &msg)
+	return &msg
+}
+
+func TestTcMsgDeserializeSerialize(t *testing.T) {
+	var orig = make([]byte, SizeofTcMsg)
+	rand.Read(orig)
+	safemsg := deserializeTcMsgSafe(orig)
+	msg := DeserializeTcMsg(orig)
+	testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+/* TcActionMsg */
+func (msg *TcActionMsg) write(b []byte) {
+	b[0] = msg.Family
+	copy(b[1:4], msg.Pad[:])
+}
+
+func (msg *TcActionMsg) serializeSafe() []byte {
+	length := SizeofTcActionMsg
+	b := make([]byte, length)
+	msg.write(b)
+	return b
+}
+
+func deserializeTcActionMsgSafe(b []byte) *TcActionMsg {
+	var msg = TcActionMsg{}
+	binary.Read(bytes.NewReader(b[0:SizeofTcActionMsg]), NativeEndian(), &msg)
+	return &msg
+}
+
+func TestTcActionMsgDeserializeSerialize(t *testing.T) {
+	var orig = make([]byte, SizeofTcActionMsg)
+	rand.Read(orig)
+	safemsg := deserializeTcActionMsgSafe(orig)
+	msg := DeserializeTcActionMsg(orig)
+	testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+/* TcRateSpec */
+func (msg *TcRateSpec) write(b []byte) {
+	native := NativeEndian()
+	b[0] = msg.CellLog
+	b[1] = msg.Linklayer
+	native.PutUint16(b[2:4], msg.Overhead)
+	native.PutUint16(b[4:6], uint16(msg.CellAlign))
+	native.PutUint16(b[6:8], msg.Mpu)
+	native.PutUint32(b[8:12], msg.Rate)
+}
+
+func (msg *TcRateSpec) serializeSafe() []byte {
+	length := SizeofTcRateSpec
+	b := make([]byte, length)
+	msg.write(b)
+	return b
+}
+
+func deserializeTcRateSpecSafe(b []byte) *TcRateSpec {
+	var msg = TcRateSpec{}
+	binary.Read(bytes.NewReader(b[0:SizeofTcRateSpec]), NativeEndian(), &msg)
+	return &msg
+}
+
+func TestTcRateSpecDeserializeSerialize(t *testing.T) {
+	var orig = make([]byte, SizeofTcRateSpec)
+	rand.Read(orig)
+	safemsg := deserializeTcRateSpecSafe(orig)
+	msg := DeserializeTcRateSpec(orig)
+	testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+/* TcTbfQopt */
+func (msg *TcTbfQopt) write(b []byte) {
+	native := NativeEndian()
+	msg.Rate.write(b[0:SizeofTcRateSpec])
+	start := SizeofTcRateSpec
+	msg.Peakrate.write(b[start : start+SizeofTcRateSpec])
+	start += SizeofTcRateSpec
+	native.PutUint32(b[start:start+4], msg.Limit)
+	start += 4
+	native.PutUint32(b[start:start+4], msg.Buffer)
+	start += 4
+	native.PutUint32(b[start:start+4], msg.Mtu)
+}
+
+func (msg *TcTbfQopt) serializeSafe() []byte {
+	length := SizeofTcTbfQopt
+	b := make([]byte, length)
+	msg.write(b)
+	return b
+}
+
+func deserializeTcTbfQoptSafe(b []byte) *TcTbfQopt {
+	var msg = TcTbfQopt{}
+	binary.Read(bytes.NewReader(b[0:SizeofTcTbfQopt]), NativeEndian(), &msg)
+	return &msg
+}
+
+func TestTcTbfQoptDeserializeSerialize(t *testing.T) {
+	var orig = make([]byte, SizeofTcTbfQopt)
+	rand.Read(orig)
+	safemsg := deserializeTcTbfQoptSafe(orig)
+	msg := DeserializeTcTbfQopt(orig)
+	testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+/* TcHtbCopt */
+func (msg *TcHtbCopt) write(b []byte) {
+	native := NativeEndian()
+	msg.Rate.write(b[0:SizeofTcRateSpec])
+	start := SizeofTcRateSpec
+	msg.Ceil.write(b[start : start+SizeofTcRateSpec])
+	start += SizeofTcRateSpec
+	native.PutUint32(b[start:start+4], msg.Buffer)
+	start += 4
+	native.PutUint32(b[start:start+4], msg.Cbuffer)
+	start += 4
+	native.PutUint32(b[start:start+4], msg.Quantum)
+	start += 4
+	native.PutUint32(b[start:start+4], msg.Level)
+	start += 4
+	native.PutUint32(b[start:start+4], msg.Prio)
+}
+
+func (msg *TcHtbCopt) serializeSafe() []byte {
+	length := SizeofTcHtbCopt
+	b := make([]byte, length)
+	msg.write(b)
+	return b
+}
+
+func deserializeTcHtbCoptSafe(b []byte) *TcHtbCopt {
+	var msg = TcHtbCopt{}
+	binary.Read(bytes.NewReader(b[0:SizeofTcHtbCopt]), NativeEndian(), &msg)
+	return &msg
+}
+
+func TestTcHtbCoptDeserializeSerialize(t *testing.T) {
+	var orig = make([]byte, SizeofTcHtbCopt)
+	rand.Read(orig)
+	safemsg := deserializeTcHtbCoptSafe(orig)
+	msg := DeserializeTcHtbCopt(orig)
+	testDeserializeSerialize(t, orig, safemsg, msg)
+}

+ 3 - 4
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go

@@ -104,9 +104,8 @@ func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet {
 	ip := x.ToIP()
 	if GetIPFamily(ip) == FAMILY_V4 {
 		return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)}
-	} else {
-		return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)}
 	}
+	return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)}
 }
 
 func (x *XfrmAddress) FromIP(ip net.IP) {
@@ -125,8 +124,8 @@ func DeserializeXfrmAddress(b []byte) *XfrmAddress {
 	return (*XfrmAddress)(unsafe.Pointer(&b[0:SizeofXfrmAddress][0]))
 }
 
-func (msg *XfrmAddress) Serialize() []byte {
-	return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(msg)))[:]
+func (x *XfrmAddress) Serialize() []byte {
+	return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(x)))[:]
 }
 
 // struct xfrm_selector {

+ 1 - 1
Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go

@@ -16,7 +16,7 @@ type Protinfo struct {
 
 // String returns a list of enabled flags
 func (prot *Protinfo) String() string {
-	boolStrings := make([]string, 0)
+	var boolStrings []string
 	if prot.Hairpin {
 		boolStrings = append(boolStrings, "Hairpin")
 	}

+ 290 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc.go

@@ -0,0 +1,290 @@
+package netlink
+
+import (
+	"fmt"
+	"math"
+)
+
+const (
+	HANDLE_NONE      = 0
+	HANDLE_INGRESS   = 0xFFFFFFF1
+	HANDLE_ROOT      = 0xFFFFFFFF
+	PRIORITY_MAP_LEN = 16
+)
+
+type Qdisc interface {
+	Attrs() *QdiscAttrs
+	Type() string
+}
+
+// Qdisc represents a netlink qdisc. A qdisc is associated with a link,
+// has a handle, a parent and a refcnt. The root qdisc of a device should
+// have parent == HANDLE_ROOT.
+type QdiscAttrs struct {
+	LinkIndex int
+	Handle    uint32
+	Parent    uint32
+	Refcnt    uint32 // read only
+}
+
+func (q QdiscAttrs) String() string {
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
+}
+
+func MakeHandle(major, minor uint16) uint32 {
+	return (uint32(major) << 16) | uint32(minor)
+}
+
+func MajorMinor(handle uint32) (uint16, uint16) {
+	return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF)
+}
+
+func HandleStr(handle uint32) string {
+	switch handle {
+	case HANDLE_NONE:
+		return "none"
+	case HANDLE_INGRESS:
+		return "ingress"
+	case HANDLE_ROOT:
+		return "root"
+	default:
+		major, minor := MajorMinor(handle)
+		return fmt.Sprintf("%x:%x", major, minor)
+	}
+}
+
+func Percentage2u32(percentage float32) uint32 {
+	// FIXME this is most likely not the best way to convert from % to uint32
+	if percentage == 100 {
+		return math.MaxUint32
+	}
+	return uint32(math.MaxUint32 * (percentage / 100))
+}
+
+// PfifoFast is the default qdisc created by the kernel if one has not
+// been defined for the interface
+type PfifoFast struct {
+	QdiscAttrs
+	Bands       uint8
+	PriorityMap [PRIORITY_MAP_LEN]uint8
+}
+
+func (qdisc *PfifoFast) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *PfifoFast) Type() string {
+	return "pfifo_fast"
+}
+
+// Prio is a basic qdisc that works just like PfifoFast
+type Prio struct {
+	QdiscAttrs
+	Bands       uint8
+	PriorityMap [PRIORITY_MAP_LEN]uint8
+}
+
+func NewPrio(attrs QdiscAttrs) *Prio {
+	return &Prio{
+		QdiscAttrs:  attrs,
+		Bands:       3,
+		PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+	}
+}
+
+func (qdisc *Prio) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Prio) Type() string {
+	return "prio"
+}
+
+// Htb is a classful qdisc that rate limits based on tokens
+type Htb struct {
+	QdiscAttrs
+	Version      uint32
+	Rate2Quantum uint32
+	Defcls       uint32
+	Debug        uint32
+	DirectPkts   uint32
+}
+
+func NewHtb(attrs QdiscAttrs) *Htb {
+	return &Htb{
+		QdiscAttrs:   attrs,
+		Version:      3,
+		Defcls:       0,
+		Rate2Quantum: 10,
+		Debug:        0,
+		DirectPkts:   0,
+	}
+}
+
+func (qdisc *Htb) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Htb) Type() string {
+	return "htb"
+}
+
+// Netem is a classless qdisc that rate limits based on tokens
+
+type NetemQdiscAttrs struct {
+	Latency       uint32  // in us
+	DelayCorr     float32 // in %
+	Limit         uint32
+	Loss          float32 // in %
+	LossCorr      float32 // in %
+	Gap           uint32
+	Duplicate     float32 // in %
+	DuplicateCorr float32 // in %
+	Jitter        uint32  // in us
+	ReorderProb   float32 // in %
+	ReorderCorr   float32 // in %
+	CorruptProb   float32 // in %
+	CorruptCorr   float32 // in %
+}
+
+func (q NetemQdiscAttrs) String() string {
+	return fmt.Sprintf(
+		"{Latency: %d, Limit: %d, Loss: %d, Gap: %d, Duplicate: %d, Jitter: %d}",
+		q.Latency, q.Limit, q.Loss, q.Gap, q.Duplicate, q.Jitter,
+	)
+}
+
+type Netem struct {
+	QdiscAttrs
+	Latency       uint32
+	DelayCorr     uint32
+	Limit         uint32
+	Loss          uint32
+	LossCorr      uint32
+	Gap           uint32
+	Duplicate     uint32
+	DuplicateCorr uint32
+	Jitter        uint32
+	ReorderProb   uint32
+	ReorderCorr   uint32
+	CorruptProb   uint32
+	CorruptCorr   uint32
+}
+
+func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
+	var limit uint32 = 1000
+	var loss_corr, delay_corr, duplicate_corr uint32
+	var reorder_prob, reorder_corr uint32
+	var corrupt_prob, corrupt_corr uint32
+
+	latency := nattrs.Latency
+	loss := Percentage2u32(nattrs.Loss)
+	gap := nattrs.Gap
+	duplicate := Percentage2u32(nattrs.Duplicate)
+	jitter := nattrs.Jitter
+
+	// Correlation
+	if latency > 0 && jitter > 0 {
+		delay_corr = Percentage2u32(nattrs.DelayCorr)
+	}
+	if loss > 0 {
+		loss_corr = Percentage2u32(nattrs.LossCorr)
+	}
+	if duplicate > 0 {
+		duplicate_corr = Percentage2u32(nattrs.DuplicateCorr)
+	}
+	// FIXME should validate values(like loss/duplicate are percentages...)
+	latency = time2Tick(latency)
+
+	if nattrs.Limit != 0 {
+		limit = nattrs.Limit
+	}
+	// Jitter is only value if latency is > 0
+	if latency > 0 {
+		jitter = time2Tick(jitter)
+	}
+
+	reorder_prob = Percentage2u32(nattrs.ReorderProb)
+	reorder_corr = Percentage2u32(nattrs.ReorderCorr)
+
+	if reorder_prob > 0 {
+		// ERROR if lantency == 0
+		if gap == 0 {
+			gap = 1
+		}
+	}
+
+	corrupt_prob = Percentage2u32(nattrs.CorruptProb)
+	corrupt_corr = Percentage2u32(nattrs.CorruptCorr)
+
+	return &Netem{
+		QdiscAttrs:    attrs,
+		Latency:       latency,
+		DelayCorr:     delay_corr,
+		Limit:         limit,
+		Loss:          loss,
+		LossCorr:      loss_corr,
+		Gap:           gap,
+		Duplicate:     duplicate,
+		DuplicateCorr: duplicate_corr,
+		Jitter:        jitter,
+		ReorderProb:   reorder_prob,
+		ReorderCorr:   reorder_corr,
+		CorruptProb:   corrupt_prob,
+		CorruptCorr:   corrupt_corr,
+	}
+}
+
+func (qdisc *Netem) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Netem) Type() string {
+	return "netem"
+}
+
+// Tbf is a classless qdisc that rate limits based on tokens
+type Tbf struct {
+	QdiscAttrs
+	// TODO: handle 64bit rate properly
+	Rate   uint64
+	Limit  uint32
+	Buffer uint32
+	// TODO: handle other settings
+}
+
+func (qdisc *Tbf) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Tbf) Type() string {
+	return "tbf"
+}
+
+// Ingress is a qdisc for adding ingress filters
+type Ingress struct {
+	QdiscAttrs
+}
+
+func (qdisc *Ingress) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Ingress) Type() string {
+	return "ingress"
+}
+
+// GenericQdisc qdiscs represent types that are not currently understood
+// by this netlink library.
+type GenericQdisc struct {
+	QdiscAttrs
+	QdiscType string
+}
+
+func (qdisc *GenericQdisc) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *GenericQdisc) Type() string {
+	return qdisc.QdiscType
+}

+ 415 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_linux.go

@@ -0,0 +1,415 @@
+package netlink
+
+import (
+	"fmt"
+	"io/ioutil"
+	"strconv"
+	"strings"
+	"syscall"
+
+	"github.com/coreos/flannel/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+// QdiscDel will delete a qdisc from the system.
+// Equivalent to: `tc qdisc del $qdisc`
+func QdiscDel(qdisc Qdisc) error {
+	return qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
+}
+
+// QdiscChange will change a qdisc in place
+// Equivalent to: `tc qdisc change $qdisc`
+// The parent and handle MUST NOT be changed.
+func QdiscChange(qdisc Qdisc) error {
+	return qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
+}
+
+// QdiscReplace will replace a qdisc to the system.
+// Equivalent to: `tc qdisc replace $qdisc`
+// The handle MUST change.
+func QdiscReplace(qdisc Qdisc) error {
+	return qdiscModify(
+		syscall.RTM_NEWQDISC,
+		syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
+		qdisc)
+}
+
+// QdiscAdd will add a qdisc to the system.
+// Equivalent to: `tc qdisc add $qdisc`
+func QdiscAdd(qdisc Qdisc) error {
+	return qdiscModify(
+		syscall.RTM_NEWQDISC,
+		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
+		qdisc)
+}
+
+func qdiscModify(cmd, flags int, qdisc Qdisc) error {
+	req := nl.NewNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
+	base := qdisc.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+	}
+	req.AddData(msg)
+
+	// When deleting don't bother building the rest of the netlink payload
+	if cmd != syscall.RTM_DELQDISC {
+		if err := qdiscPayload(req, qdisc); err != nil {
+			return err
+		}
+	}
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
+
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
+
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
+	if prio, ok := qdisc.(*Prio); ok {
+		tcmap := nl.TcPrioMap{
+			Bands:   int32(prio.Bands),
+			Priomap: prio.PriorityMap,
+		}
+		options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
+	} else if tbf, ok := qdisc.(*Tbf); ok {
+		opt := nl.TcTbfQopt{}
+		// TODO: handle rate > uint32
+		opt.Rate.Rate = uint32(tbf.Rate)
+		opt.Limit = tbf.Limit
+		opt.Buffer = tbf.Buffer
+		nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
+	} else if htb, ok := qdisc.(*Htb); ok {
+		opt := nl.TcHtbGlob{}
+		opt.Version = htb.Version
+		opt.Rate2Quantum = htb.Rate2Quantum
+		opt.Defcls = htb.Defcls
+		// TODO: Handle Debug properly. For now default to 0
+		opt.Debug = htb.Debug
+		opt.DirectPkts = htb.DirectPkts
+		nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
+		// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
+	} else if netem, ok := qdisc.(*Netem); ok {
+		opt := nl.TcNetemQopt{}
+		opt.Latency = netem.Latency
+		opt.Limit = netem.Limit
+		opt.Loss = netem.Loss
+		opt.Gap = netem.Gap
+		opt.Duplicate = netem.Duplicate
+		opt.Jitter = netem.Jitter
+		options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
+		// Correlation
+		corr := nl.TcNetemCorr{}
+		corr.DelayCorr = netem.DelayCorr
+		corr.LossCorr = netem.LossCorr
+		corr.DupCorr = netem.DuplicateCorr
+
+		if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
+			nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
+		}
+		// Corruption
+		corruption := nl.TcNetemCorrupt{}
+		corruption.Probability = netem.CorruptProb
+		corruption.Correlation = netem.CorruptCorr
+		if corruption.Probability > 0 {
+			nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
+		}
+		// Reorder
+		reorder := nl.TcNetemReorder{}
+		reorder.Probability = netem.ReorderProb
+		reorder.Correlation = netem.ReorderCorr
+		if reorder.Probability > 0 {
+			nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
+		}
+	} else if _, ok := qdisc.(*Ingress); ok {
+		// ingress filters must use the proper handle
+		if qdisc.Attrs().Parent != HANDLE_INGRESS {
+			return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
+		}
+	}
+
+	req.AddData(options)
+	return nil
+}
+
+// QdiscList gets a list of qdiscs in the system.
+// Equivalent to: `tc qdisc show`.
+// The list can be filtered by link.
+func QdiscList(link Link) ([]Qdisc, error) {
+	req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
+	index := int32(0)
+	if link != nil {
+		base := link.Attrs()
+		ensureIndex(base)
+		index = int32(base.Index)
+	}
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: index,
+	}
+	req.AddData(msg)
+
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
+	if err != nil {
+		return nil, err
+	}
+
+	var res []Qdisc
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		// skip qdiscs from other interfaces
+		if link != nil && msg.Ifindex != index {
+			continue
+		}
+
+		base := QdiscAttrs{
+			LinkIndex: int(msg.Ifindex),
+			Handle:    msg.Handle,
+			Parent:    msg.Parent,
+			Refcnt:    msg.Info,
+		}
+		var qdisc Qdisc
+		qdiscType := ""
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_KIND:
+				qdiscType = string(attr.Value[:len(attr.Value)-1])
+				switch qdiscType {
+				case "pfifo_fast":
+					qdisc = &PfifoFast{}
+				case "prio":
+					qdisc = &Prio{}
+				case "tbf":
+					qdisc = &Tbf{}
+				case "ingress":
+					qdisc = &Ingress{}
+				case "htb":
+					qdisc = &Htb{}
+				case "netem":
+					qdisc = &Netem{}
+				default:
+					qdisc = &GenericQdisc{QdiscType: qdiscType}
+				}
+			case nl.TCA_OPTIONS:
+				switch qdiscType {
+				case "pfifo_fast":
+					// pfifo returns TcPrioMap directly without wrapping it in rtattr
+					if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
+						return nil, err
+					}
+				case "prio":
+					// prio returns TcPrioMap directly without wrapping it in rtattr
+					if err := parsePrioData(qdisc, attr.Value); err != nil {
+						return nil, err
+					}
+				case "tbf":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					if err := parseTbfData(qdisc, data); err != nil {
+						return nil, err
+					}
+				case "htb":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					if err := parseHtbData(qdisc, data); err != nil {
+						return nil, err
+					}
+				case "netem":
+					if err := parseNetemData(qdisc, attr.Value); err != nil {
+						return nil, err
+					}
+
+					// no options for ingress
+				}
+			}
+		}
+		*qdisc.Attrs() = base
+		res = append(res, qdisc)
+	}
+
+	return res, nil
+}
+
+func parsePfifoFastData(qdisc Qdisc, value []byte) error {
+	pfifo := qdisc.(*PfifoFast)
+	tcmap := nl.DeserializeTcPrioMap(value)
+	pfifo.PriorityMap = tcmap.Priomap
+	pfifo.Bands = uint8(tcmap.Bands)
+	return nil
+}
+
+func parsePrioData(qdisc Qdisc, value []byte) error {
+	prio := qdisc.(*Prio)
+	tcmap := nl.DeserializeTcPrioMap(value)
+	prio.PriorityMap = tcmap.Priomap
+	prio.Bands = uint8(tcmap.Bands)
+	return nil
+}
+
+func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
+	native = nl.NativeEndian()
+	htb := qdisc.(*Htb)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_HTB_INIT:
+			opt := nl.DeserializeTcHtbGlob(datum.Value)
+			htb.Version = opt.Version
+			htb.Rate2Quantum = opt.Rate2Quantum
+			htb.Defcls = opt.Defcls
+			htb.Debug = opt.Debug
+			htb.DirectPkts = opt.DirectPkts
+		case nl.TCA_HTB_DIRECT_QLEN:
+			// TODO
+			//htb.DirectQlen = native.uint32(datum.Value)
+		}
+	}
+	return nil
+}
+
+func parseNetemData(qdisc Qdisc, value []byte) error {
+	netem := qdisc.(*Netem)
+	opt := nl.DeserializeTcNetemQopt(value)
+	netem.Latency = opt.Latency
+	netem.Limit = opt.Limit
+	netem.Loss = opt.Loss
+	netem.Gap = opt.Gap
+	netem.Duplicate = opt.Duplicate
+	netem.Jitter = opt.Jitter
+	data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:])
+	if err != nil {
+		return err
+	}
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_NETEM_CORR:
+			opt := nl.DeserializeTcNetemCorr(datum.Value)
+			netem.DelayCorr = opt.DelayCorr
+			netem.LossCorr = opt.LossCorr
+			netem.DuplicateCorr = opt.DupCorr
+		case nl.TCA_NETEM_CORRUPT:
+			opt := nl.DeserializeTcNetemCorrupt(datum.Value)
+			netem.CorruptProb = opt.Probability
+			netem.CorruptCorr = opt.Correlation
+		case nl.TCA_NETEM_REORDER:
+			opt := nl.DeserializeTcNetemReorder(datum.Value)
+			netem.ReorderProb = opt.Probability
+			netem.ReorderCorr = opt.Correlation
+		}
+	}
+	return nil
+}
+
+func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
+	native = nl.NativeEndian()
+	tbf := qdisc.(*Tbf)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_TBF_PARMS:
+			opt := nl.DeserializeTcTbfQopt(datum.Value)
+			tbf.Rate = uint64(opt.Rate.Rate)
+			tbf.Limit = opt.Limit
+			tbf.Buffer = opt.Buffer
+		case nl.TCA_TBF_RATE64:
+			tbf.Rate = native.Uint64(datum.Value[0:4])
+		}
+	}
+	return nil
+}
+
+const (
+	TIME_UNITS_PER_SEC = 1000000
+)
+
+var (
+	tickInUsec  float64 = 0.0
+	clockFactor float64 = 0.0
+	hz          float64 = 0.0
+)
+
+func initClock() {
+	data, err := ioutil.ReadFile("/proc/net/psched")
+	if err != nil {
+		return
+	}
+	parts := strings.Split(strings.TrimSpace(string(data)), " ")
+	if len(parts) < 3 {
+		return
+	}
+	var vals [3]uint64
+	for i := range vals {
+		val, err := strconv.ParseUint(parts[i], 16, 32)
+		if err != nil {
+			return
+		}
+		vals[i] = val
+	}
+	// compatibility
+	if vals[2] == 1000000000 {
+		vals[0] = vals[1]
+	}
+	clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
+	tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
+	hz = float64(vals[0])
+}
+
+func TickInUsec() float64 {
+	if tickInUsec == 0.0 {
+		initClock()
+	}
+	return tickInUsec
+}
+
+func ClockFactor() float64 {
+	if clockFactor == 0.0 {
+		initClock()
+	}
+	return clockFactor
+}
+
+func Hz() float64 {
+	if hz == 0.0 {
+		initClock()
+	}
+	return hz
+}
+
+func time2Tick(time uint32) uint32 {
+	return uint32(float64(time) * TickInUsec())
+}
+
+func tick2Time(tick uint32) uint32 {
+	return uint32(float64(tick) / TickInUsec())
+}
+
+func time2Ktime(time uint32) uint32 {
+	return uint32(float64(time) * ClockFactor())
+}
+
+func ktime2Time(ktime uint32) uint32 {
+	return uint32(float64(ktime) / ClockFactor())
+}
+
+func burst(rate uint64, buffer uint32) uint32 {
+	return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
+}
+
+func latency(rate uint64, limit, buffer uint32) float64 {
+	return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
+}
+
+func Xmittime(rate uint64, size uint32) float64 {
+	return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
+}

+ 345 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_test.go

@@ -0,0 +1,345 @@
+package netlink
+
+import (
+	"testing"
+)
+
+func TestTbfAddDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+	qdisc := &Tbf{
+		QdiscAttrs: QdiscAttrs{
+			LinkIndex: link.Attrs().Index,
+			Handle:    MakeHandle(1, 0),
+			Parent:    HANDLE_ROOT,
+		},
+		Rate:   131072,
+		Limit:  1220703,
+		Buffer: 16793,
+	}
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	tbf, ok := qdiscs[0].(*Tbf)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if tbf.Rate != qdisc.Rate {
+		t.Fatal("Rate doesn't match")
+	}
+	if tbf.Limit != qdisc.Limit {
+		t.Fatal("Limit doesn't match")
+	}
+	if tbf.Buffer != qdisc.Buffer {
+		t.Fatal("Buffer doesn't match")
+	}
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}
+
+func TestHtbAddDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+
+	attrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(1, 0),
+		Parent:    HANDLE_ROOT,
+	}
+
+	qdisc := NewHtb(attrs)
+	qdisc.Rate2Quantum = 5
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	htb, ok := qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if htb.Defcls != qdisc.Defcls {
+		t.Fatal("Defcls doesn't match")
+	}
+	if htb.Rate2Quantum != qdisc.Rate2Quantum {
+		t.Fatal("Rate2Quantum doesn't match")
+	}
+	if htb.Debug != qdisc.Debug {
+		t.Fatal("Debug doesn't match")
+	}
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}
+
+func TestPrioAddDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+	qdisc := NewPrio(QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(1, 0),
+		Parent:    HANDLE_ROOT,
+	})
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	_, ok := qdiscs[0].(*Prio)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}
+
+func TestTbfAddHtbReplaceDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+
+	// Add
+	attrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(1, 0),
+		Parent:    HANDLE_ROOT,
+	}
+	qdisc := &Tbf{
+		QdiscAttrs: attrs,
+		Rate:       131072,
+		Limit:      1220703,
+		Buffer:     16793,
+	}
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	tbf, ok := qdiscs[0].(*Tbf)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if tbf.Rate != qdisc.Rate {
+		t.Fatal("Rate doesn't match")
+	}
+	if tbf.Limit != qdisc.Limit {
+		t.Fatal("Limit doesn't match")
+	}
+	if tbf.Buffer != qdisc.Buffer {
+		t.Fatal("Buffer doesn't match")
+	}
+	// Replace
+	// For replace to work, the handle MUST be different that the running one
+	attrs.Handle = MakeHandle(2, 0)
+	qdisc2 := NewHtb(attrs)
+	qdisc2.Rate2Quantum = 5
+	if err := QdiscReplace(qdisc2); err != nil {
+		t.Fatal(err)
+	}
+
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	htb, ok := qdiscs[0].(*Htb)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if htb.Defcls != qdisc2.Defcls {
+		t.Fatal("Defcls doesn't match")
+	}
+	if htb.Rate2Quantum != qdisc2.Rate2Quantum {
+		t.Fatal("Rate2Quantum doesn't match")
+	}
+	if htb.Debug != qdisc2.Debug {
+		t.Fatal("Debug doesn't match")
+	}
+
+	if err := QdiscDel(qdisc2); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}
+
+func TestTbfAddTbfChangeDel(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+	if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil {
+		t.Fatal(err)
+	}
+	link, err := LinkByName("foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+
+	// Add
+	attrs := QdiscAttrs{
+		LinkIndex: link.Attrs().Index,
+		Handle:    MakeHandle(1, 0),
+		Parent:    HANDLE_ROOT,
+	}
+	qdisc := &Tbf{
+		QdiscAttrs: attrs,
+		Rate:       131072,
+		Limit:      1220703,
+		Buffer:     16793,
+	}
+	if err := QdiscAdd(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err := QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	tbf, ok := qdiscs[0].(*Tbf)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if tbf.Rate != qdisc.Rate {
+		t.Fatal("Rate doesn't match")
+	}
+	if tbf.Limit != qdisc.Limit {
+		t.Fatal("Limit doesn't match")
+	}
+	if tbf.Buffer != qdisc.Buffer {
+		t.Fatal("Buffer doesn't match")
+	}
+	// Change
+	// For change to work, the handle MUST not change
+	qdisc.Rate = 23456
+	if err := QdiscChange(qdisc); err != nil {
+		t.Fatal(err)
+	}
+
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 1 {
+		t.Fatal("Failed to add qdisc")
+	}
+	tbf, ok = qdiscs[0].(*Tbf)
+	if !ok {
+		t.Fatal("Qdisc is the wrong type")
+	}
+	if tbf.Rate != qdisc.Rate {
+		t.Fatal("Rate doesn't match")
+	}
+	if tbf.Limit != qdisc.Limit {
+		t.Fatal("Limit doesn't match")
+	}
+	if tbf.Buffer != qdisc.Buffer {
+		t.Fatal("Buffer doesn't match")
+	}
+
+	if err := QdiscDel(qdisc); err != nil {
+		t.Fatal(err)
+	}
+	qdiscs, err = QdiscList(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(qdiscs) != 0 {
+		t.Fatal("Failed to remove qdisc")
+	}
+}

+ 44 - 2
Godeps/_workspace/src/github.com/vishvananda/netlink/route.go

@@ -17,6 +17,13 @@ const (
 	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
 )
 
+type NextHopFlag int
+
+const (
+	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
+	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
+)
+
 // Route represents a netlink route. A route is associated with a link,
 // has a destination network, an optional source ip, and optional
 // gateway. Advanced route parameters and non-main routing tables are
@@ -27,9 +34,44 @@ type Route struct {
 	Dst       *net.IPNet
 	Src       net.IP
 	Gw        net.IP
+	Flags     int
 }
 
 func (r Route) String() string {
-	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst,
-		r.Src, r.Gw)
+	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s}", r.LinkIndex, r.Dst,
+		r.Src, r.Gw, r.ListFlags())
+}
+
+func (r *Route) SetFlag(flag NextHopFlag) {
+	r.Flags |= int(flag)
+}
+
+func (r *Route) ClearFlag(flag NextHopFlag) {
+	r.Flags &^= int(flag)
+}
+
+type flagString struct {
+	f NextHopFlag
+	s string
+}
+
+var testFlags = []flagString{
+	flagString{f: FLAG_ONLINK, s: "onlink"},
+	flagString{f: FLAG_PERVASIVE, s: "pervasive"},
+}
+
+func (r *Route) ListFlags() []string {
+	var flags []string
+	for _, tf := range testFlags {
+		if r.Flags&int(tf.f) != 0 {
+			flags = append(flags, tf.s)
+		}
+	}
+	return flags
+}
+
+// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
+type RouteUpdate struct {
+	Type uint16
+	Route
 }

+ 75 - 49
Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go

@@ -14,23 +14,23 @@ import (
 // Equivalent to: `ip route add $route`
 func RouteAdd(route *Route) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
-	return routeHandle(route, req)
+	return routeHandle(route, req, nl.NewRtMsg())
 }
 
-// RouteAdd will delete a route from the system.
+// RouteDel will delete a route from the system.
 // Equivalent to: `ip route del $route`
 func RouteDel(route *Route) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
-	return routeHandle(route, req)
+	return routeHandle(route, req, nl.NewRtDelMsg())
 }
 
-func routeHandle(route *Route, req *nl.NetlinkRequest) error {
+func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
 	if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil {
 		return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil")
 	}
 
-	msg := nl.NewRtMsg()
 	msg.Scope = uint8(route.Scope)
+	msg.Flags = uint32(route.Flags)
 	family := -1
 	var rtAttrs []*nl.RtAttr
 
@@ -118,8 +118,7 @@ func RouteList(link Link, family int) ([]Route, error) {
 		index = base.Index
 	}
 
-	native := nl.NativeEndian()
-	res := make([]Route, 0)
+	var res []Route
 	for _, m := range msgs {
 		msg := nl.DeserializeRtMsg(m)
 
@@ -133,31 +132,14 @@ func RouteList(link Link, family int) ([]Route, error) {
 			continue
 		}
 
-		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		route, err := deserializeRoute(m)
 		if err != nil {
 			return nil, err
 		}
 
-		route := Route{Scope: Scope(msg.Scope)}
-		for _, attr := range attrs {
-			switch attr.Attr.Type {
-			case syscall.RTA_GATEWAY:
-				route.Gw = net.IP(attr.Value)
-			case syscall.RTA_PREFSRC:
-				route.Src = net.IP(attr.Value)
-			case syscall.RTA_DST:
-				route.Dst = &net.IPNet{
-					IP:   attr.Value,
-					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
-				}
-			case syscall.RTA_OIF:
-				routeIndex := int(native.Uint32(attr.Value[0:4]))
-				if link != nil && routeIndex != index {
-					// Ignore routes from other interfaces
-					continue
-				}
-				route.LinkIndex = routeIndex
-			}
+		if link != nil && route.LinkIndex != index {
+			// Ignore routes from other interfaces
+			continue
 		}
 		res = append(res, route)
 	}
@@ -165,6 +147,37 @@ func RouteList(link Link, family int) ([]Route, error) {
 	return res, nil
 }
 
+// deserializeRoute decodes a binary netlink message into a Route struct
+func deserializeRoute(m []byte) (Route, error) {
+	route := Route{}
+	msg := nl.DeserializeRtMsg(m)
+	attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+	if err != nil {
+		return route, err
+	}
+	route.Scope = Scope(msg.Scope)
+	route.Flags = int(msg.Flags)
+
+	native := nl.NativeEndian()
+	for _, attr := range attrs {
+		switch attr.Attr.Type {
+		case syscall.RTA_GATEWAY:
+			route.Gw = net.IP(attr.Value)
+		case syscall.RTA_PREFSRC:
+			route.Src = net.IP(attr.Value)
+		case syscall.RTA_DST:
+			route.Dst = &net.IPNet{
+				IP:   attr.Value,
+				Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+			}
+		case syscall.RTA_OIF:
+			routeIndex := int(native.Uint32(attr.Value[0:4]))
+			route.LinkIndex = routeIndex
+		}
+	}
+	return route, nil
+}
+
 // RouteGet gets a route to a specific destination from the host system.
 // Equivalent to: 'ip route get'.
 func RouteGet(destination net.IP) ([]Route, error) {
@@ -192,34 +205,47 @@ func RouteGet(destination net.IP) ([]Route, error) {
 		return nil, err
 	}
 
-	native := nl.NativeEndian()
-	res := make([]Route, 0)
+	var res []Route
 	for _, m := range msgs {
-		msg := nl.DeserializeRtMsg(m)
-		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		route, err := deserializeRoute(m)
 		if err != nil {
 			return nil, err
 		}
+		res = append(res, route)
+	}
+	return res, nil
+
+}
 
-		route := Route{}
-		for _, attr := range attrs {
-			switch attr.Attr.Type {
-			case syscall.RTA_GATEWAY:
-				route.Gw = net.IP(attr.Value)
-			case syscall.RTA_PREFSRC:
-				route.Src = net.IP(attr.Value)
-			case syscall.RTA_DST:
-				route.Dst = &net.IPNet{
-					IP:   attr.Value,
-					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+// RouteSubscribe takes a chan down which notifications will be sent
+// when routes are added or deleted. Close the 'done' chan to stop subscription.
+func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
+	s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
+	if err != nil {
+		return err
+	}
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
+	go func() {
+		defer close(ch)
+		for {
+			msgs, err := s.Receive()
+			if err != nil {
+				return
+			}
+			for _, m := range msgs {
+				route, err := deserializeRoute(m.Data)
+				if err != nil {
+					return
 				}
-			case syscall.RTA_OIF:
-				routeIndex := int(native.Uint32(attr.Value[0:4]))
-				route.LinkIndex = routeIndex
+				ch <- RouteUpdate{Type: m.Header.Type, Route: route}
 			}
 		}
-		res = append(res, route)
-	}
-	return res, nil
+	}()
 
+	return nil
 }

+ 62 - 0
Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go

@@ -2,7 +2,9 @@ package netlink
 
 import (
 	"net"
+	"syscall"
 	"testing"
+	"time"
 )
 
 func TestRouteAddDel(t *testing.T) {
@@ -82,3 +84,63 @@ func TestRouteAddIncomplete(t *testing.T) {
 		t.Fatal("Adding incomplete route should fail")
 	}
 }
+
+func expectRouteUpdate(ch <-chan RouteUpdate, t uint16, dst net.IP) bool {
+	for {
+		timeout := time.After(time.Minute)
+		select {
+		case update := <-ch:
+			if update.Type == t && update.Route.Dst.IP.Equal(dst) {
+				return true
+			}
+		case <-timeout:
+			return false
+		}
+	}
+}
+
+func TestRouteSubscribe(t *testing.T) {
+	tearDown := setUpNetlinkTest(t)
+	defer tearDown()
+
+	ch := make(chan RouteUpdate)
+	done := make(chan struct{})
+	defer close(done)
+	if err := RouteSubscribe(ch, done); err != nil {
+		t.Fatal(err)
+	}
+
+	// get loopback interface
+	link, err := LinkByName("lo")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// bring the interface up
+	if err = LinkSetUp(link); err != nil {
+		t.Fatal(err)
+	}
+
+	// add a gateway route
+	_, dst, err := net.ParseCIDR("192.168.0.0/24")
+
+	ip := net.ParseIP("127.1.1.1")
+	route := Route{LinkIndex: link.Attrs().Index, Dst: dst, Src: ip}
+	err = RouteAdd(&route)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !expectRouteUpdate(ch, syscall.RTM_NEWROUTE, dst.IP) {
+		t.Fatal("Add update not received as expected")
+	}
+
+	err = RouteDel(&route)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !expectRouteUpdate(ch, syscall.RTM_DELROUTE, dst.IP) {
+		t.Fatal("Del update not received as expected")
+	}
+}

+ 1 - 1
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go

@@ -84,7 +84,7 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
 		return nil, err
 	}
 
-	res := make([]XfrmPolicy, 0)
+	var res []XfrmPolicy
 	for _, m := range msgs {
 		msg := nl.DeserializeXfrmUserpolicyInfo(m)
 

+ 1 - 1
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go

@@ -118,7 +118,7 @@ func XfrmStateList(family int) ([]XfrmState, error) {
 		return nil, err
 	}
 
-	res := make([]XfrmState, 0)
+	var res []XfrmState
 	for _, m := range msgs {
 		msg := nl.DeserializeXfrmUsersaInfo(m)