mirror of
https://github.com/vishvananda/netlink
synced 2025-03-20 01:56:56 +00:00
Add a specific error to report that a netlink response had NLM_F_DUMP_INTR set, indicating that the set of results may be incomplete or inconsistent. unix.EINTR was previously returned (with no results) when the NLM_F_DUMP_INTR flag was set. Now, errors.Is(err, unix.EINTR) will still work. But, this will be a breaking change for any code that's checking for equality with unix.EINTR. Return results with ErrDumpInterrupted. Results may be incomplete or inconsistent, but give the caller the option of using them. Look for NLM_F_DUMP_INTR in more places: - linkSubscribeAt, neighSubscribeAt, routeSubscribeAt - can do an initial dump, which may report inconsistent results -> if there's an error callback, call it with ErrDumpInterrupted - socketDiagXDPExecutor - makes an NLM_F_DUMP request, without using Execute() -> give it the same behaviour as functions that do use Execute() Signed-off-by: Rob Murray <rob.murray@docker.com>
379 lines
10 KiB
Go
379 lines
10 KiB
Go
package netlink
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
|
|
"github.com/vishvananda/netlink/nl"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
const FibRuleInvert = 0x2
|
|
|
|
// RuleAdd adds a rule to the system.
|
|
// Equivalent to: ip rule add
|
|
func RuleAdd(rule *Rule) error {
|
|
return pkgHandle.RuleAdd(rule)
|
|
}
|
|
|
|
// RuleAdd adds a rule to the system.
|
|
// Equivalent to: ip rule add
|
|
func (h *Handle) RuleAdd(rule *Rule) error {
|
|
req := h.newNetlinkRequest(unix.RTM_NEWRULE, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
|
|
return ruleHandle(rule, req)
|
|
}
|
|
|
|
// RuleDel deletes a rule from the system.
|
|
// Equivalent to: ip rule del
|
|
func RuleDel(rule *Rule) error {
|
|
return pkgHandle.RuleDel(rule)
|
|
}
|
|
|
|
// RuleDel deletes a rule from the system.
|
|
// Equivalent to: ip rule del
|
|
func (h *Handle) RuleDel(rule *Rule) error {
|
|
req := h.newNetlinkRequest(unix.RTM_DELRULE, unix.NLM_F_ACK)
|
|
return ruleHandle(rule, req)
|
|
}
|
|
|
|
func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
|
|
msg := nl.NewRtMsg()
|
|
msg.Family = unix.AF_INET
|
|
msg.Protocol = unix.RTPROT_BOOT
|
|
msg.Scope = unix.RT_SCOPE_UNIVERSE
|
|
msg.Table = unix.RT_TABLE_UNSPEC
|
|
msg.Type = rule.Type // usually 0, same as unix.RTN_UNSPEC
|
|
if msg.Type == 0 && req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 {
|
|
msg.Type = unix.RTN_UNICAST
|
|
}
|
|
if rule.Invert {
|
|
msg.Flags |= FibRuleInvert
|
|
}
|
|
if rule.Family != 0 {
|
|
msg.Family = uint8(rule.Family)
|
|
}
|
|
if rule.Table >= 0 && rule.Table < 256 {
|
|
msg.Table = uint8(rule.Table)
|
|
}
|
|
if rule.Tos != 0 {
|
|
msg.Tos = uint8(rule.Tos)
|
|
}
|
|
|
|
var dstFamily uint8
|
|
var rtAttrs []*nl.RtAttr
|
|
if rule.Dst != nil && rule.Dst.IP != nil {
|
|
dstLen, _ := rule.Dst.Mask.Size()
|
|
msg.Dst_len = uint8(dstLen)
|
|
msg.Family = uint8(nl.GetIPFamily(rule.Dst.IP))
|
|
dstFamily = msg.Family
|
|
var dstData []byte
|
|
if msg.Family == unix.AF_INET {
|
|
dstData = rule.Dst.IP.To4()
|
|
} else {
|
|
dstData = rule.Dst.IP.To16()
|
|
}
|
|
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData))
|
|
}
|
|
|
|
if rule.Src != nil && rule.Src.IP != nil {
|
|
msg.Family = uint8(nl.GetIPFamily(rule.Src.IP))
|
|
if dstFamily != 0 && dstFamily != msg.Family {
|
|
return fmt.Errorf("source and destination ip are not the same IP family")
|
|
}
|
|
srcLen, _ := rule.Src.Mask.Size()
|
|
msg.Src_len = uint8(srcLen)
|
|
var srcData []byte
|
|
if msg.Family == unix.AF_INET {
|
|
srcData = rule.Src.IP.To4()
|
|
} else {
|
|
srcData = rule.Src.IP.To16()
|
|
}
|
|
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_SRC, srcData))
|
|
}
|
|
|
|
req.AddData(msg)
|
|
for i := range rtAttrs {
|
|
req.AddData(rtAttrs[i])
|
|
}
|
|
|
|
if rule.Priority >= 0 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.Priority))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b))
|
|
}
|
|
if rule.Mark != 0 || rule.Mask != nil {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, rule.Mark)
|
|
req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b))
|
|
}
|
|
if rule.Mask != nil {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, *rule.Mask)
|
|
req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b))
|
|
}
|
|
if rule.Flow >= 0 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.Flow))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_FLOW, b))
|
|
}
|
|
if rule.TunID > 0 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.TunID))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_TUN_ID, b))
|
|
}
|
|
if rule.Table >= 256 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.Table))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_TABLE, b))
|
|
}
|
|
if msg.Table > 0 {
|
|
if rule.SuppressPrefixlen >= 0 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.SuppressPrefixlen))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_SUPPRESS_PREFIXLEN, b))
|
|
}
|
|
if rule.SuppressIfgroup >= 0 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.SuppressIfgroup))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_SUPPRESS_IFGROUP, b))
|
|
}
|
|
}
|
|
if rule.IifName != "" {
|
|
req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName+"\x00")))
|
|
}
|
|
if rule.OifName != "" {
|
|
req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName+"\x00")))
|
|
}
|
|
if rule.Goto >= 0 {
|
|
msg.Type = nl.FR_ACT_GOTO
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.Goto))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b))
|
|
}
|
|
|
|
if rule.IPProto > 0 {
|
|
b := make([]byte, 4)
|
|
native.PutUint32(b, uint32(rule.IPProto))
|
|
req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b))
|
|
}
|
|
|
|
if rule.Dport != nil {
|
|
b := rule.Dport.toRtAttrData()
|
|
req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b))
|
|
}
|
|
|
|
if rule.Sport != nil {
|
|
b := rule.Sport.toRtAttrData()
|
|
req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b))
|
|
}
|
|
|
|
if rule.UIDRange != nil {
|
|
b := rule.UIDRange.toRtAttrData()
|
|
req.AddData(nl.NewRtAttr(nl.FRA_UID_RANGE, b))
|
|
}
|
|
|
|
if rule.Protocol > 0 {
|
|
req.AddData(nl.NewRtAttr(nl.FRA_PROTOCOL, nl.Uint8Attr(rule.Protocol)))
|
|
}
|
|
|
|
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
|
|
return err
|
|
}
|
|
|
|
// RuleList lists rules in the system.
|
|
// Equivalent to: ip rule list
|
|
//
|
|
// If the returned error is [ErrDumpInterrupted], results may be inconsistent
|
|
// or incomplete.
|
|
func RuleList(family int) ([]Rule, error) {
|
|
return pkgHandle.RuleList(family)
|
|
}
|
|
|
|
// RuleList lists rules in the system.
|
|
// Equivalent to: ip rule list
|
|
//
|
|
// If the returned error is [ErrDumpInterrupted], results may be inconsistent
|
|
// or incomplete.
|
|
func (h *Handle) RuleList(family int) ([]Rule, error) {
|
|
return h.RuleListFiltered(family, nil, 0)
|
|
}
|
|
|
|
// RuleListFiltered gets a list of rules in the system filtered by the
|
|
// specified rule template `filter`.
|
|
// Equivalent to: ip rule list
|
|
//
|
|
// If the returned error is [ErrDumpInterrupted], results may be inconsistent
|
|
// or incomplete.
|
|
func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) {
|
|
return pkgHandle.RuleListFiltered(family, filter, filterMask)
|
|
}
|
|
|
|
// RuleListFiltered lists rules in the system.
|
|
// Equivalent to: ip rule list
|
|
//
|
|
// If the returned error is [ErrDumpInterrupted], results may be inconsistent
|
|
// or incomplete.
|
|
func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) {
|
|
req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST)
|
|
msg := nl.NewIfInfomsg(family)
|
|
req.AddData(msg)
|
|
|
|
msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWRULE)
|
|
if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
|
|
return nil, executeErr
|
|
}
|
|
|
|
var res = make([]Rule, 0)
|
|
for i := range msgs {
|
|
msg := nl.DeserializeRtMsg(msgs[i])
|
|
attrs, err := nl.ParseRouteAttr(msgs[i][msg.Len():])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rule := NewRule()
|
|
rule.Priority = 0 // The default priority from kernel
|
|
|
|
rule.Invert = msg.Flags&FibRuleInvert > 0
|
|
rule.Family = int(msg.Family)
|
|
rule.Tos = uint(msg.Tos)
|
|
|
|
for j := range attrs {
|
|
switch attrs[j].Attr.Type {
|
|
case unix.RTA_TABLE:
|
|
rule.Table = int(native.Uint32(attrs[j].Value[0:4]))
|
|
case nl.FRA_SRC:
|
|
rule.Src = &net.IPNet{
|
|
IP: attrs[j].Value,
|
|
Mask: net.CIDRMask(int(msg.Src_len), 8*len(attrs[j].Value)),
|
|
}
|
|
case nl.FRA_DST:
|
|
rule.Dst = &net.IPNet{
|
|
IP: attrs[j].Value,
|
|
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)),
|
|
}
|
|
case nl.FRA_FWMARK:
|
|
rule.Mark = native.Uint32(attrs[j].Value[0:4])
|
|
case nl.FRA_FWMASK:
|
|
mask := native.Uint32(attrs[j].Value[0:4])
|
|
rule.Mask = &mask
|
|
case nl.FRA_TUN_ID:
|
|
rule.TunID = uint(native.Uint64(attrs[j].Value[0:8]))
|
|
case nl.FRA_IIFNAME:
|
|
rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
|
|
case nl.FRA_OIFNAME:
|
|
rule.OifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
|
|
case nl.FRA_SUPPRESS_PREFIXLEN:
|
|
i := native.Uint32(attrs[j].Value[0:4])
|
|
if i != 0xffffffff {
|
|
rule.SuppressPrefixlen = int(i)
|
|
}
|
|
case nl.FRA_SUPPRESS_IFGROUP:
|
|
i := native.Uint32(attrs[j].Value[0:4])
|
|
if i != 0xffffffff {
|
|
rule.SuppressIfgroup = int(i)
|
|
}
|
|
case nl.FRA_FLOW:
|
|
rule.Flow = int(native.Uint32(attrs[j].Value[0:4]))
|
|
case nl.FRA_GOTO:
|
|
rule.Goto = int(native.Uint32(attrs[j].Value[0:4]))
|
|
case nl.FRA_PRIORITY:
|
|
rule.Priority = int(native.Uint32(attrs[j].Value[0:4]))
|
|
case nl.FRA_IP_PROTO:
|
|
rule.IPProto = int(native.Uint32(attrs[j].Value[0:4]))
|
|
case nl.FRA_DPORT_RANGE:
|
|
rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4]))
|
|
case nl.FRA_SPORT_RANGE:
|
|
rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4]))
|
|
case nl.FRA_UID_RANGE:
|
|
rule.UIDRange = NewRuleUIDRange(native.Uint32(attrs[j].Value[0:4]), native.Uint32(attrs[j].Value[4:8]))
|
|
case nl.FRA_PROTOCOL:
|
|
rule.Protocol = uint8(attrs[j].Value[0])
|
|
}
|
|
}
|
|
|
|
if filter != nil {
|
|
switch {
|
|
case filterMask&RT_FILTER_SRC != 0 &&
|
|
(rule.Src == nil || rule.Src.String() != filter.Src.String()):
|
|
continue
|
|
case filterMask&RT_FILTER_DST != 0 &&
|
|
(rule.Dst == nil || rule.Dst.String() != filter.Dst.String()):
|
|
continue
|
|
case filterMask&RT_FILTER_TABLE != 0 &&
|
|
filter.Table != unix.RT_TABLE_UNSPEC && rule.Table != filter.Table:
|
|
continue
|
|
case filterMask&RT_FILTER_TOS != 0 && rule.Tos != filter.Tos:
|
|
continue
|
|
case filterMask&RT_FILTER_PRIORITY != 0 && rule.Priority != filter.Priority:
|
|
continue
|
|
case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark:
|
|
continue
|
|
case filterMask&RT_FILTER_MASK != 0 && !ptrEqual(rule.Mask, filter.Mask):
|
|
continue
|
|
}
|
|
}
|
|
|
|
res = append(res, *rule)
|
|
}
|
|
|
|
return res, executeErr
|
|
}
|
|
|
|
func (pr *RulePortRange) toRtAttrData() []byte {
|
|
b := [][]byte{make([]byte, 2), make([]byte, 2)}
|
|
native.PutUint16(b[0], pr.Start)
|
|
native.PutUint16(b[1], pr.End)
|
|
return bytes.Join(b, []byte{})
|
|
}
|
|
|
|
func (pr *RuleUIDRange) toRtAttrData() []byte {
|
|
b := [][]byte{make([]byte, 4), make([]byte, 4)}
|
|
native.PutUint32(b[0], pr.Start)
|
|
native.PutUint32(b[1], pr.End)
|
|
return bytes.Join(b, []byte{})
|
|
}
|
|
|
|
func ptrEqual(a, b *uint32) bool {
|
|
if a == b {
|
|
return true
|
|
}
|
|
if (a == nil) || (b == nil) {
|
|
return false
|
|
}
|
|
return *a == *b
|
|
}
|
|
|
|
func (r Rule) typeString() string {
|
|
switch r.Type {
|
|
case unix.RTN_UNSPEC: // zero
|
|
return ""
|
|
case unix.RTN_UNICAST:
|
|
return ""
|
|
case unix.RTN_LOCAL:
|
|
return "local"
|
|
case unix.RTN_BROADCAST:
|
|
return "broadcast"
|
|
case unix.RTN_ANYCAST:
|
|
return "anycast"
|
|
case unix.RTN_MULTICAST:
|
|
return "multicast"
|
|
case unix.RTN_BLACKHOLE:
|
|
return "blackhole"
|
|
case unix.RTN_UNREACHABLE:
|
|
return "unreachable"
|
|
case unix.RTN_PROHIBIT:
|
|
return "prohibit"
|
|
case unix.RTN_THROW:
|
|
return "throw"
|
|
case unix.RTN_NAT:
|
|
return "nat"
|
|
case unix.RTN_XRESOLVE:
|
|
return "xresolve"
|
|
default:
|
|
return fmt.Sprintf("type(0x%x)", r.Type)
|
|
}
|
|
}
|