diff --git a/filter.go b/filter.go new file mode 100644 index 0000000..83ad700 --- /dev/null +++ b/filter.go @@ -0,0 +1,55 @@ +package netlink + +import ( + "fmt" +) + +type Filter interface { + Attrs() *FilterAttrs + Type() string +} + +// Filter represents a netlink filter. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type FilterAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Priority uint16 // lower is higher priority + Protocol uint16 // syscall.ETH_P_* +} + +func (q FilterAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol) +} + +// U32 filters on many packet related properties +type U32 struct { + FilterAttrs + // Currently only supports redirecting to another interface + RedirIndex int +} + +func (filter *U32) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *U32) Type() string { + return "u32" +} + +// GenericFilter filters represent types that are not currently understood +// by this netlink library. +type GenericFilter struct { + FilterAttrs + FilterType string +} + +func (filter *GenericFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *GenericFilter) Type() string { + return filter.FilterType +} diff --git a/filter_linux.go b/filter_linux.go new file mode 100644 index 0000000..e58a9bd --- /dev/null +++ b/filter_linux.go @@ -0,0 +1,189 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// FilterDel will delete a filter from the system. +// Equivalent to: `tc filter del $filter` +func FilterDel(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, base.Protocol), + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterAdd will add a filter to the system. +// Equivalent to: `tc filter add $filter` +func FilterAdd(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, base.Protocol), + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if u32, ok := filter.(*U32); ok { + // match all + sel := nl.TcU32Sel{ + Nkeys: 1, + Flags: nl.TC_U32_TERMINAL, + } + sel.Keys = append(sel.Keys, nl.TcU32Key{}) + nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize()) + actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil) + table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil) + nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred")) + // redirect to other interface + mir := nl.TcMirred{ + Action: nl.TC_ACT_STOLEN, + Eaction: nl.TCA_EGRESS_REDIR, + Ifindex: uint32(u32.RedirIndex), + } + aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil) + nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize()) + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterList gets a list of filters in the system. +// Equivalent to: `tc filter show`. +// Generally retunrs nothing if link and parent are not specified. +func FilterList(link Link, parent uint32) ([]Filter, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER) + if err != nil { + return nil, err + } + + var res []Filter + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := FilterAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + base.Priority, base.Protocol = MajorMinor(msg.Info) + var filter Filter + filterType := "" + detailed := false + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + filterType = string(attr.Value[:len(attr.Value)-1]) + switch filterType { + case "u32": + filter = &U32{} + default: + filter = &GenericFilter{FilterType: filterType} + } + case nl.TCA_OPTIONS: + switch filterType { + case "u32": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + detailed, err = parseU32Data(filter, data) + if err != nil { + return nil, err + } + } + } + } + // only return the detailed version of the filter + if detailed { + *filter.Attrs() = base + res = append(res, filter) + } + } + + return res, nil +} + +func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + native = nl.NativeEndian() + u32 := filter.(*U32) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_U32_SEL: + detailed = true + sel := nl.DeserializeTcU32Sel(datum.Value) + // only parse if we have a very basic redirect + if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 { + return detailed, nil + } + case nl.TCA_U32_ACT: + table, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB { + return detailed, fmt.Errorf("Action table not formed properly") + } + aattrs, err := nl.ParseRouteAttr(table[0].Value) + for _, aattr := range aattrs { + switch aattr.Attr.Type { + case nl.TCA_KIND: + actionType := string(aattr.Value[:len(aattr.Value)-1]) + // only parse if the action is mirred + if actionType != "mirred" { + return detailed, nil + } + case nl.TCA_OPTIONS: + adata, err := nl.ParseRouteAttr(aattr.Value) + if err != nil { + return detailed, err + } + for _, adatum := range adata { + switch adatum.Attr.Type { + case nl.TCA_MIRRED_PARMS: + mir := nl.DeserializeTcMirred(adatum.Value) + u32.RedirIndex = int(mir.Ifindex) + } + } + } + } + } + } + return detailed, nil +} diff --git a/filter_test.go b/filter_test.go new file mode 100644 index 0000000..206699c --- /dev/null +++ b/filter_test.go @@ -0,0 +1,91 @@ +package netlink + +import ( + "syscall" + "testing" +) + +func TestFilterAddDel(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil { + t.Fatal(err) + } + if err := LinkAdd(&Ifb{LinkAttrs{Name: "bar"}}); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + redir, err := LinkByName("bar") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(redir); err != nil { + t.Fatal(err) + } + qdisc := &Ingress{ + QdiscAttrs: QdiscAttrs{ + LinkIndex: link.Attrs().Index, + Handle: MakeHandle(0xffff, 0), + Parent: HANDLE_INGRESS, + }, + } + if err := QdiscAdd(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err := QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 1 { + t.Fatal("Failed to add qdisc") + } + _, ok := qdiscs[0].(*Ingress) + if !ok { + t.Fatal("Qdisc is the wrong type") + } + filter := &U32{ + FilterAttrs: FilterAttrs{ + LinkIndex: link.Attrs().Index, + Parent: MakeHandle(0xffff, 0), + Priority: 1, + Protocol: syscall.ETH_P_IP, + }, + RedirIndex: redir.Attrs().Index, + } + if err := FilterAdd(filter); err != nil { + t.Fatal(err) + } + filters, err := FilterList(link, MakeHandle(0xffff, 0)) + if err != nil { + t.Fatal(err) + } + if len(filters) != 1 { + t.Fatal("Failed to add filter") + } + if err := FilterDel(filter); err != nil { + t.Fatal(err) + } + filters, err = FilterList(link, MakeHandle(0xffff, 0)) + if err != nil { + t.Fatal(err) + } + if len(filters) != 0 { + t.Fatal("Failed to remove filter") + } + if err := QdiscDel(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err = QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 0 { + t.Fatal("Failed to remove qdisc") + } +} diff --git a/nl/nl_linux.go b/nl/nl_linux.go index 3cb137d..8dbd92b 100644 --- a/nl/nl_linux.go +++ b/nl/nl_linux.go @@ -142,7 +142,7 @@ func (a *RtAttr) Len() int { } // Serialize the RtAttr into a byte array -// This can't ust unsafe.cast because it must iterate through children. +// This can't just unsafe.cast because it must iterate through children. func (a *RtAttr) Serialize() []byte { native := NativeEndian() diff --git a/nl/tc_linux.go b/nl/tc_linux.go index 6e74c52..8790b4c 100644 --- a/nl/tc_linux.go +++ b/nl/tc_linux.go @@ -35,6 +35,9 @@ const ( SizeofTcPrioMap = 0x14 SizeofTcRateSpec = 0x0c SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c + SizeofTcU32Key = 0x10 + SizeofTcU32Sel = 0x10 // without keys + SizeofTcMirred = 0x1c ) // struct tcmsg { @@ -105,6 +108,10 @@ type TcPrioMap struct { Priomap [TC_PRIO_MAX + 1]uint8 } +func (msg *TcPrioMap) Len() int { + return SizeofTcPrioMap +} + func DeserializeTcPrioMap(b []byte) *TcPrioMap { return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0])) } @@ -143,6 +150,10 @@ type TcRateSpec struct { Rate uint32 } +func (msg *TcRateSpec) Len() int { + return SizeofTcRateSpec +} + func DeserializeTcRateSpec(b []byte) *TcRateSpec { return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) } @@ -167,6 +178,10 @@ type TcTbfQopt struct { Mtu uint32 } +func (msg *TcTbfQopt) Len() int { + return SizeofTcTbfQopt +} + func DeserializeTcTbfQopt(b []byte) *TcTbfQopt { return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0])) } @@ -174,3 +189,171 @@ func DeserializeTcTbfQopt(b []byte) *TcTbfQopt { func (x *TcTbfQopt) Serialize() []byte { return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:] } + +const ( + TCA_U32_UNSPEC = iota + TCA_U32_CLASSID + TCA_U32_HASH + TCA_U32_LINK + TCA_U32_DIVISOR + TCA_U32_SEL + TCA_U32_POLICE + TCA_U32_ACT + TCA_U32_INDEV + TCA_U32_PCNT + TCA_U32_MARK + TCA_U32_MAX = TCA_U32_MARK +) + +// struct tc_u32_key { +// __be32 mask; +// __be32 val; +// int off; +// int offmask; +// }; + +type TcU32Key struct { + Mask uint32 // big endian + Val uint32 // big endian + Off int32 + OffMask int32 +} + +func (msg *TcU32Key) Len() int { + return SizeofTcU32Key +} + +func DeserializeTcU32Key(b []byte) *TcU32Key { + return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0])) +} + +func (x *TcU32Key) Serialize() []byte { + return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_u32_sel { +// unsigned char flags; +// unsigned char offshift; +// unsigned char nkeys; +// +// __be16 offmask; +// __u16 off; +// short offoff; +// +// short hoff; +// __be32 hmask; +// struct tc_u32_key keys[0]; +// }; + +const ( + TC_U32_TERMINAL = 1 << iota + TC_U32_OFFSET = 1 << iota + TC_U32_VAROFFSET = 1 << iota + TC_U32_EAT = 1 << iota +) + +type TcU32Sel struct { + Flags uint8 + Offshift uint8 + Nkeys uint8 + Pad uint8 + Offmask uint16 // big endian + Off uint16 + Offoff int16 + Hoff int16 + Hmask uint32 // big endian + Keys []TcU32Key +} + +func (msg *TcU32Sel) Len() int { + return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key +} + +func DeserializeTcU32Sel(b []byte) *TcU32Sel { + x := &TcU32Sel{} + copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b) + next := SizeofTcU32Sel + var i uint8 + for i = 0; i < x.Nkeys; i++ { + x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:])) + next += SizeofTcU32Key + } + return x +} + +func (x *TcU32Sel) Serialize() []byte { + // This can't just unsafe.cast because it must iterate through keys. + buf := make([]byte, x.Len()) + copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:]) + next := SizeofTcU32Sel + for _, key := range x.Keys { + keyBuf := key.Serialize() + copy(buf[next:], keyBuf) + next += SizeofTcU32Key + } + return buf +} + +const ( + TCA_ACT_MIRRED = 8 +) + +const ( + TCA_MIRRED_UNSPEC = iota + TCA_MIRRED_TM + TCA_MIRRED_PARMS + TCA_MIRRED_MAX = TCA_MIRRED_PARMS +) + +const ( + TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/ + TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */ + TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/ + TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */ +) + +const ( + TC_ACT_UNSPEC = int32(-1) + TC_ACT_OK = iota + TC_ACT_RECLASSIFY + TC_ACT_SHOT + TC_ACT_PIPE + TC_ACT_STOLEN + TC_ACT_QUEUED + TC_ACT_REPEAT + TC_ACT_JUMP = 0x10000000 +) + +// #define tc_gen \ +// __u32 index; \ +// __u32 capab; \ +// int action; \ +// int refcnt; \ +// int bindcnt +// struct tc_mirred { +// tc_gen; +// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ +// __u32 ifindex; /* ifindex of egress port */ +// }; + +type TcMirred struct { + Index uint32 + Capab uint32 + Action int32 + Refcnt int32 + Bindcnt int32 + Eaction int32 + Ifindex uint32 +} + +func (msg *TcMirred) Len() int { + return SizeofTcMirred +} + +func DeserializeTcMirred(b []byte) *TcMirred { + return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0])) +} + +func (x *TcMirred) Serialize() []byte { + return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:] +} diff --git a/qdisc.go b/qdisc.go index c8dcf33..960a477 100644 --- a/qdisc.go +++ b/qdisc.go @@ -17,16 +17,17 @@ type Qdisc interface { } // Qdisc represents a netlink qdisc. A qdisc is associated with a link, -// has a handle and a parent. The root qdisc of a device should have a -// parent == HANDLE_ROOT. +// has a handle, a parent and a refcnt. The root qdisc of a device should +// have parent == HANDLE_ROOT. type QdiscAttrs struct { LinkIndex int Handle uint32 Parent uint32 + Refcnt uint32 // read only } func (q QdiscAttrs) String() string { - return fmt.Sprintf("{Ifindex: %d, Handle: %s Parent: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent)) + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt) } func MakeHandle(major, minor uint16) uint32 { @@ -67,8 +68,8 @@ func (qdisc *PfifoFast) Type() string { return "pfifo_fast" } -// TokenBucketFilter is a classful qdisc that rate limits based on tokens -type TokenBucketFilter struct { +// Tbf is a classful qdisc that rate limits based on tokens +type Tbf struct { QdiscAttrs // TODO: handle 64bit rate properly Rate uint64 @@ -77,14 +78,27 @@ type TokenBucketFilter struct { // TODO: handle other settings } -func (qdisc *TokenBucketFilter) Attrs() *QdiscAttrs { +func (qdisc *Tbf) Attrs() *QdiscAttrs { return &qdisc.QdiscAttrs } -func (qdisc *TokenBucketFilter) Type() string { +func (qdisc *Tbf) Type() string { return "tbf" } +// Ingress is a qdisc for adding ingress filters +type Ingress struct { + QdiscAttrs +} + +func (qdisc *Ingress) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Ingress) Type() string { + return "ingress" +} + // GenericQdisc qdiscs represent types that are not currently understood // by this netlink library. type GenericQdisc struct { diff --git a/qdisc_linux.go b/qdisc_linux.go index 6d5ef7e..25669cb 100644 --- a/qdisc_linux.go +++ b/qdisc_linux.go @@ -1,6 +1,7 @@ package netlink import ( + "fmt" "io/ioutil" "strconv" "strings" @@ -38,16 +39,21 @@ func QdiscAdd(qdisc Qdisc) error { Parent: base.Parent, } req.AddData(msg) - req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.NonZeroTerminated(qdisc.Type()))) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) - if tbf, ok := qdisc.(*TokenBucketFilter); ok { + if tbf, ok := qdisc.(*Tbf); ok { opt := nl.TcTbfQopt{} // TODO: handle rate > uint32 opt.Rate.Rate = uint32(tbf.Rate) opt.Limit = tbf.Limit opt.Buffer = tbf.Buffer nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) + } else if _, ok := qdisc.(*Ingress); ok { + // ingress filters must use the proper handle + if msg.Parent != HANDLE_INGRESS { + return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") + } } req.AddData(options) _, err := req.Execute(syscall.NETLINK_ROUTE, 0) @@ -88,6 +94,7 @@ func QdiscList(link Link) ([]Qdisc, error) { LinkIndex: int(msg.Ifindex), Handle: msg.Handle, Parent: msg.Parent, + Refcnt: msg.Info, } var qdisc Qdisc qdiscType := "" @@ -99,7 +106,9 @@ func QdiscList(link Link) ([]Qdisc, error) { case "pfifo_fast": qdisc = &PfifoFast{} case "tbf": - qdisc = &TokenBucketFilter{} + qdisc = &Tbf{} + case "ingress": + qdisc = &Ingress{} default: qdisc = &GenericQdisc{QdiscType: qdiscType} } @@ -115,9 +124,10 @@ func QdiscList(link Link) ([]Qdisc, error) { if err != nil { return nil, err } - if err := parseTokenBucketFilterData(qdisc, data); err != nil { + if err := parseTbfData(qdisc, data); err != nil { return nil, err } + // no options for ingress } } } @@ -136,9 +146,9 @@ func parsePfifoFastData(qdisc Qdisc, value []byte) error { return nil } -func parseTokenBucketFilterData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { +func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { native = nl.NativeEndian() - tbf := qdisc.(*TokenBucketFilter) + tbf := qdisc.(*Tbf) for _, datum := range data { switch datum.Attr.Type { case nl.TCA_TBF_PARMS: diff --git a/qdisc_test.go b/qdisc_test.go index 21aa7c4..6f85b64 100644 --- a/qdisc_test.go +++ b/qdisc_test.go @@ -17,7 +17,7 @@ func TestQdiscAddDel(t *testing.T) { if err := LinkSetUp(link); err != nil { t.Fatal(err) } - qdisc := &TokenBucketFilter{ + qdisc := &Tbf{ QdiscAttrs: QdiscAttrs{ LinkIndex: link.Attrs().Index, Handle: MakeHandle(1, 0), @@ -37,7 +37,7 @@ func TestQdiscAddDel(t *testing.T) { if len(qdiscs) != 1 { t.Fatal("Failed to add qdisc") } - tbf, ok := qdiscs[0].(*TokenBucketFilter) + tbf, ok := qdiscs[0].(*Tbf) if !ok { t.Fatal("Qdisc is the wrong type") } @@ -53,7 +53,7 @@ func TestQdiscAddDel(t *testing.T) { if err := QdiscDel(qdisc); err != nil { t.Fatal(err) } - qdiscs, err = QdiscList(nil) + qdiscs, err = QdiscList(link) if err != nil { t.Fatal(err) }