Introduce Conntrack support

- Conntrack table FLUSH
- Conntrack table DELETE with filter
    The filter is only for IP field
- Conntrack table GET
  The flow information is not complete, but the method
  returns a simplified structure with basic flow info

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
This commit is contained in:
Flavio Crisciani 2017-04-03 18:51:36 -07:00 committed by Vish Ishaya
parent 1b53666767
commit 24a3a2da61
3 changed files with 544 additions and 1 deletions

340
conntrack_linux.go Normal file
View File

@ -0,0 +1,340 @@
package netlink
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// ConntrackTableType Conntrack table for the netlink operation
type ConntrackTableType uint8
const (
// ConntrackTable Conntrack table
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK 1
ConntrackTable = 1
// ConntrackExpectTable Conntrack expect table
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
ConntrackExpectTable = 2
)
// InetFamily Family type
type InetFamily uint8
// -L [table] [options] List conntrack or expectation table
// -G [table] parameters Get conntrack or expectation
// -I [table] parameters Create a conntrack or expectation
// -U [table] parameters Update a conntrack
// -E [table] [options] Show events
// -C [table] Show counter
// -S Show statistics
// ConntrackTableList returns the flow list of a table of a specific family
// conntrack -L [table] [options] List conntrack or expectation table
func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
return pkgHandle.ConntrackTableList(table, family)
}
// ConntrackTableFlush flushes all the flows of a specified table
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func ConntrackTableFlush(table ConntrackTableType) error {
return pkgHandle.ConntrackTableFlush(table)
}
// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
// conntrack -D [table] parameters Delete conntrack or expectation
func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
return pkgHandle.ConntrackDeleteFilter(table, family, filter)
}
// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
// conntrack -L [table] [options] List conntrack or expectation table
func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
res, err := h.dumpConntrackTable(table, family)
if err != nil {
return nil, err
}
// Deserialize all the flows
var result []*ConntrackFlow
for _, dataRaw := range res {
result = append(result, parseRawData(dataRaw))
}
return result, nil
}
// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
_, err := req.Execute(syscall.NETLINK_NETFILTER, 0)
return err
}
// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
// conntrack -D [table] parameters Delete conntrack or expectation
func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
res, err := h.dumpConntrackTable(table, family)
if err != nil {
return 0, err
}
var matched uint
for _, dataRaw := range res {
flow := parseRawData(dataRaw)
if match := filter.MatchConntrackFlow(flow); match {
req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
req2.AddRawData(dataRaw[4:])
req2.Execute(syscall.NETLINK_NETFILTER, 0)
matched++
}
}
return matched, nil
}
func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
// Create the Netlink request object
req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
// Add the netfilter header
msg := &nl.Nfgenmsg{
NfgenFamily: uint8(family),
Version: nl.NFNETLINK_V0,
ResId: 0,
}
req.AddData(msg)
return req
}
func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP)
return req.Execute(syscall.NETLINK_NETFILTER, 0)
}
// The full conntrack flow structure is very complicated and can be found in the file:
// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
// For the time being, the structure below allows to parse and extract the base information of a flow
type ipTuple struct {
SrcIP net.IP
DstIP net.IP
Protocol uint8
SrcPort uint16
DstPort uint16
}
type ConntrackFlow struct {
FamilyType uint8
Forward ipTuple
Reverse ipTuple
}
func (s *ConntrackFlow) String() string {
// conntrack cmd output:
// udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001
return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d",
nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort,
s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort)
}
// This method parse the ip tuple structure
// The message structure is the following:
// <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
// <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
// <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
// <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
// <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) {
for i := 0; i < 2; i++ {
_, t, _, v := parseNfAttrTLV(reader)
switch t {
case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
tpl.SrcIP = v
case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
tpl.DstIP = v
}
}
// Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
reader.Seek(4, io.SeekCurrent)
_, t, _, v := parseNfAttrTLV(reader)
if t == nl.CTA_PROTO_NUM {
tpl.Protocol = uint8(v[0])
}
// Skip some padding 3 bytes
reader.Seek(3, io.SeekCurrent)
for i := 0; i < 2; i++ {
_, t, _ := parseNfAttrTL(reader)
switch t {
case nl.CTA_PROTO_SRC_PORT:
parseBERaw16(reader, &tpl.SrcPort)
case nl.CTA_PROTO_DST_PORT:
parseBERaw16(reader, &tpl.DstPort)
}
// Skip some padding 2 byte
reader.Seek(2, io.SeekCurrent)
}
}
func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
isNested, attrType, len = parseNfAttrTL(r)
value = make([]byte, len)
binary.Read(r, binary.BigEndian, &value)
return isNested, attrType, len, value
}
func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
binary.Read(r, nl.NativeEndian(), &len)
len -= nl.SizeofNfattr
binary.Read(r, nl.NativeEndian(), &attrType)
isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
attrType = attrType & (nl.NLA_F_NESTED - 1)
return isNested, attrType, len
}
func parseBERaw16(r *bytes.Reader, v *uint16) {
binary.Read(r, binary.BigEndian, v)
}
func parseRawData(data []byte) *ConntrackFlow {
s := &ConntrackFlow{}
// First there is the Nfgenmsg header
// consume only the family field
reader := bytes.NewReader(data)
binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
// skip rest of the Netfilter header
reader.Seek(3, io.SeekCurrent)
// The message structure is the following:
// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
// flow information of the forward flow
// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
// flow information of the reverse flow
for reader.Len() > 0 {
nested, t, l := parseNfAttrTL(reader)
if nested && t == nl.CTA_TUPLE_ORIG {
if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
parseIpTuple(reader, &s.Forward)
}
} else if nested && t == nl.CTA_TUPLE_REPLY {
if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
parseIpTuple(reader, &s.Reverse)
// Got all the useful information stop parsing
break
} else {
// Header not recognized skip it
reader.Seek(int64(l), io.SeekCurrent)
}
}
}
return s
}
// Conntrack parameters and options:
// -n, --src-nat ip source NAT ip
// -g, --dst-nat ip destination NAT ip
// -j, --any-nat ip source or destination NAT ip
// -m, --mark mark Set mark
// -c, --secmark secmark Set selinux secmark
// -e, --event-mask eventmask Event mask, eg. NEW,DESTROY
// -z, --zero Zero counters while listing
// -o, --output type[,...] Output format, eg. xml
// -l, --label label[,...] conntrack labels
// Common parameters and options:
// -s, --src, --orig-src ip Source address from original direction
// -d, --dst, --orig-dst ip Destination address from original direction
// -r, --reply-src ip Source addres from reply direction
// -q, --reply-dst ip Destination address from reply direction
// -p, --protonum proto Layer 4 Protocol, eg. 'tcp'
// -f, --family proto Layer 3 Protocol, eg. 'ipv6'
// -t, --timeout timeout Set timeout
// -u, --status status Set status, eg. ASSURED
// -w, --zone value Set conntrack zone
// --orig-zone value Set zone for original direction
// --reply-zone value Set zone for reply direction
// -b, --buffer-size Netlink socket buffer size
// --mask-src ip Source mask address
// --mask-dst ip Destination mask address
// Filter types
type ConntrackFilterType uint8
const (
ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction
ConntrackOrigDstIP // -orig-dst ip Destination address from original direction
ConntrackNatSrcIP // -src-nat ip Source NAT ip
ConntrackNatDstIP // -dst-nat ip Destination NAT ip
ConntrackNatAnyIP // -any-nat ip Source or destination NAT ip
)
type ConntrackFilter struct {
ipFilter map[ConntrackFilterType]net.IP
}
// AddIP adds an IP to the conntrack filter
func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip *net.IP) error {
if f.ipFilter == nil {
f.ipFilter = make(map[ConntrackFilterType]net.IP)
}
if _, ok := f.ipFilter[tp]; ok {
return errors.New("Filter attribute already present")
}
f.ipFilter[tp] = *ip
return nil
}
// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
// false otherwise
func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
if len(f.ipFilter) == 0 {
// empty filter always not match
return false
}
match := true
// -orig-src ip Source address from original direction
if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
match = match && elem.Equal(flow.Forward.SrcIP)
}
// -orig-dst ip Destination address from original direction
if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
match = match && elem.Equal(flow.Forward.DstIP)
}
// -src-nat ip Source NAT ip
if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found {
match = match && elem.Equal(flow.Reverse.SrcIP)
}
// -dst-nat ip Destination NAT ip
if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found {
match = match && elem.Equal(flow.Reverse.DstIP)
}
// -any-nat ip Source or destination NAT ip
if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found {
match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
}
return match
}

189
nl/conntrack_linux.go Normal file
View File

@ -0,0 +1,189 @@
package nl
import "unsafe"
// Track the message sizes for the correct serialization/deserialization
const (
SizeofNfgenmsg = 4
SizeofNfattr = 4
SizeofNfConntrack = 376
SizeofNfctTupleHead = 52
)
var L4ProtoMap = map[uint8]string{
6: "tcp",
17: "udp",
}
// All the following constants are coming from:
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h
// enum cntl_msg_types {
// IPCTNL_MSG_CT_NEW,
// IPCTNL_MSG_CT_GET,
// IPCTNL_MSG_CT_DELETE,
// IPCTNL_MSG_CT_GET_CTRZERO,
// IPCTNL_MSG_CT_GET_STATS_CPU,
// IPCTNL_MSG_CT_GET_STATS,
// IPCTNL_MSG_CT_GET_DYING,
// IPCTNL_MSG_CT_GET_UNCONFIRMED,
//
// IPCTNL_MSG_MAX
// };
const (
IPCTNL_MSG_CT_GET = 1
IPCTNL_MSG_CT_DELETE = 2
)
// #define NFNETLINK_V0 0
const (
NFNETLINK_V0 = 0
)
// #define NLA_F_NESTED (1 << 15)
const (
NLA_F_NESTED = (1 << 15)
)
// enum ctattr_type {
// CTA_UNSPEC,
// CTA_TUPLE_ORIG,
// CTA_TUPLE_REPLY,
// CTA_STATUS,
// CTA_PROTOINFO,
// CTA_HELP,
// CTA_NAT_SRC,
// #define CTA_NAT CTA_NAT_SRC /* backwards compatibility */
// CTA_TIMEOUT,
// CTA_MARK,
// CTA_COUNTERS_ORIG,
// CTA_COUNTERS_REPLY,
// CTA_USE,
// CTA_ID,
// CTA_NAT_DST,
// CTA_TUPLE_MASTER,
// CTA_SEQ_ADJ_ORIG,
// CTA_NAT_SEQ_ADJ_ORIG = CTA_SEQ_ADJ_ORIG,
// CTA_SEQ_ADJ_REPLY,
// CTA_NAT_SEQ_ADJ_REPLY = CTA_SEQ_ADJ_REPLY,
// CTA_SECMARK, /* obsolete */
// CTA_ZONE,
// CTA_SECCTX,
// CTA_TIMESTAMP,
// CTA_MARK_MASK,
// CTA_LABELS,
// CTA_LABELS_MASK,
// __CTA_MAX
// };
const (
CTA_TUPLE_ORIG = 1
CTA_TUPLE_REPLY = 2
CTA_STATUS = 3
CTA_TIMEOUT = 8
CTA_MARK = 9
CTA_PROTOINFO = 4
)
// enum ctattr_tuple {
// CTA_TUPLE_UNSPEC,
// CTA_TUPLE_IP,
// CTA_TUPLE_PROTO,
// CTA_TUPLE_ZONE,
// __CTA_TUPLE_MAX
// };
// #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
const (
CTA_TUPLE_IP = 1
CTA_TUPLE_PROTO = 2
)
// enum ctattr_ip {
// CTA_IP_UNSPEC,
// CTA_IP_V4_SRC,
// CTA_IP_V4_DST,
// CTA_IP_V6_SRC,
// CTA_IP_V6_DST,
// __CTA_IP_MAX
// };
// #define CTA_IP_MAX (__CTA_IP_MAX - 1)
const (
CTA_IP_V4_SRC = 1
CTA_IP_V4_DST = 2
CTA_IP_V6_SRC = 3
CTA_IP_V6_DST = 4
)
// enum ctattr_l4proto {
// CTA_PROTO_UNSPEC,
// CTA_PROTO_NUM,
// CTA_PROTO_SRC_PORT,
// CTA_PROTO_DST_PORT,
// CTA_PROTO_ICMP_ID,
// CTA_PROTO_ICMP_TYPE,
// CTA_PROTO_ICMP_CODE,
// CTA_PROTO_ICMPV6_ID,
// CTA_PROTO_ICMPV6_TYPE,
// CTA_PROTO_ICMPV6_CODE,
// __CTA_PROTO_MAX
// };
// #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
const (
CTA_PROTO_NUM = 1
CTA_PROTO_SRC_PORT = 2
CTA_PROTO_DST_PORT = 3
)
// enum ctattr_protoinfo {
// CTA_PROTOINFO_UNSPEC,
// CTA_PROTOINFO_TCP,
// CTA_PROTOINFO_DCCP,
// CTA_PROTOINFO_SCTP,
// __CTA_PROTOINFO_MAX
// };
// #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
const (
CTA_PROTOINFO_TCP = 1
)
// enum ctattr_protoinfo_tcp {
// CTA_PROTOINFO_TCP_UNSPEC,
// CTA_PROTOINFO_TCP_STATE,
// CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
// CTA_PROTOINFO_TCP_WSCALE_REPLY,
// CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
// CTA_PROTOINFO_TCP_FLAGS_REPLY,
// __CTA_PROTOINFO_TCP_MAX
// };
// #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
const (
CTA_PROTOINFO_TCP_STATE = 1
CTA_PROTOINFO_TCP_WSCALE_ORIGINAL = 2
CTA_PROTOINFO_TCP_WSCALE_REPLY = 3
CTA_PROTOINFO_TCP_FLAGS_ORIGINAL = 4
CTA_PROTOINFO_TCP_FLAGS_REPLY = 5
)
// /* General form of address family dependent message.
// */
// struct nfgenmsg {
// __u8 nfgen_family; /* AF_xxx */
// __u8 version; /* nfnetlink version */
// __be16 res_id; /* resource id */
// };
type Nfgenmsg struct {
NfgenFamily uint8
Version uint8
ResId uint16 // big endian
}
func (msg *Nfgenmsg) Len() int {
return SizeofNfgenmsg
}
func DeserializeNfgenmsg(b []byte) *Nfgenmsg {
return (*Nfgenmsg)(unsafe.Pointer(&b[0:SizeofNfgenmsg][0]))
}
func (msg *Nfgenmsg) Serialize() []byte {
return (*(*[SizeofNfgenmsg]byte)(unsafe.Pointer(msg)))[:]
}

View File

@ -24,7 +24,7 @@ const (
)
// SupportedNlFamilies contains the list of netlink families this netlink package supports
var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM, syscall.NETLINK_NETFILTER}
var nextSeqNr uint32
@ -321,6 +321,7 @@ func (a *RtAttr) Serialize() []byte {
type NetlinkRequest struct {
syscall.NlMsghdr
Data []NetlinkRequestData
RawData []byte
Sockets map[int]*SocketHandle
}
@ -332,6 +333,8 @@ func (req *NetlinkRequest) Serialize() []byte {
dataBytes[i] = data.Serialize()
length = length + len(dataBytes[i])
}
length += len(req.RawData)
req.Len = uint32(length)
b := make([]byte, length)
hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
@ -343,6 +346,10 @@ func (req *NetlinkRequest) Serialize() []byte {
next = next + 1
}
}
// Add the raw data if any
if len(req.RawData) > 0 {
copy(b[next:length], req.RawData)
}
return b
}
@ -352,6 +359,13 @@ func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
}
}
// AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
func (req *NetlinkRequest) AddRawData(data []byte) {
if data != nil {
req.RawData = append(req.RawData, data...)
}
}
// Execute the request against a the given sockType.
// Returns a list of netlink messages in serialized format, optionally filtered
// by resType.