mirror of https://github.com/OISF/suricata
af-packet: kernel bypass implementation
This patch implements bypass capability for af-packet. The filter only bypass TCP and UDP in IPv4 and IPv6. It don't don't bypass IPv6 with extended headers. This patch also introduces a bypassed flow manager that takes care of timeouting the bypassed flows. It uses a 60 sec timeout on flow. As they are supposed to be active we can try that. If they are not active then we don't care to get them back in Suricata.pull/3221/head
parent
91e1256b01
commit
06173267c6
@ -0,0 +1,193 @@
|
|||||||
|
//#include <bcc/proto.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
#include <linux/if_ether.h>
|
||||||
|
#include <linux/in.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
#include <linux/in6.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
#include <linux/filter.h>
|
||||||
|
|
||||||
|
#include "bpf_helpers.h"
|
||||||
|
|
||||||
|
#define LINUX_VERSION_CODE 263682
|
||||||
|
|
||||||
|
struct flowv4_keys {
|
||||||
|
__be32 src;
|
||||||
|
__be32 dst;
|
||||||
|
union {
|
||||||
|
__be32 ports;
|
||||||
|
__be16 port16[2];
|
||||||
|
};
|
||||||
|
__u32 ip_proto;
|
||||||
|
} __attribute__((__aligned__(8)));
|
||||||
|
|
||||||
|
struct flowv6_keys {
|
||||||
|
__be32 src[4];
|
||||||
|
__be32 dst[4];
|
||||||
|
union {
|
||||||
|
__be32 ports;
|
||||||
|
__be16 port16[2];
|
||||||
|
};
|
||||||
|
__u32 ip_proto;
|
||||||
|
} __attribute__((__aligned__(8)));
|
||||||
|
|
||||||
|
struct pair {
|
||||||
|
uint64_t time;
|
||||||
|
uint64_t packets;
|
||||||
|
uint64_t bytes;
|
||||||
|
} __attribute__((__aligned__(8)));
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") flow_table_v4 = {
|
||||||
|
.type = BPF_MAP_TYPE_HASH,
|
||||||
|
.key_size = sizeof(struct flowv4_keys),
|
||||||
|
.value_size = sizeof(struct pair),
|
||||||
|
.max_entries = 32768,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") flow_table_v6 = {
|
||||||
|
.type = BPF_MAP_TYPE_HASH,
|
||||||
|
.key_size = sizeof(struct flowv6_keys),
|
||||||
|
.value_size = sizeof(struct pair),
|
||||||
|
.max_entries = 32768,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __always_inline int ipv4_filter(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
uint32_t nhoff, verlen;
|
||||||
|
struct flowv4_keys tuple;
|
||||||
|
struct pair *value;
|
||||||
|
uint16_t port;
|
||||||
|
|
||||||
|
nhoff = skb->cb[0];
|
||||||
|
|
||||||
|
tuple.ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
|
||||||
|
/* only support TCP and UDP for now */
|
||||||
|
switch (tuple.ip_proto) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tuple.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
|
||||||
|
tuple.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
|
||||||
|
|
||||||
|
verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
|
||||||
|
nhoff += (verlen & 0xF) << 2;
|
||||||
|
tuple.ports = load_word(skb, nhoff);
|
||||||
|
port = tuple.port16[1];
|
||||||
|
tuple.port16[1] = tuple.port16[0];
|
||||||
|
tuple.port16[0] = port;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if ((tuple.port16[0] == 22) || (tuple.port16[1] == 22))
|
||||||
|
{
|
||||||
|
uint16_t sp = tuple.port16[0];
|
||||||
|
//uint16_t dp = tuple.port16[1];
|
||||||
|
char fmt[] = "Parsed SSH flow: %u %d -> %u\n";
|
||||||
|
bpf_trace_printk(fmt, sizeof(fmt), tuple.src, sp, tuple.dst);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/* Test if src is in hash */
|
||||||
|
value = bpf_map_lookup_elem(&flow_table_v4, &tuple);
|
||||||
|
if (value) {
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
uint16_t sp = tuple.port16[0];
|
||||||
|
//uint16_t dp = tuple.port16[1];
|
||||||
|
char bfmt[] = "Found flow: %u %d -> %u\n";
|
||||||
|
bpf_trace_printk(bfmt, sizeof(bfmt), tuple.src, sp, tuple.dst);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
__sync_fetch_and_add(&value->packets, 1);
|
||||||
|
__sync_fetch_and_add(&value->bytes, skb->len);
|
||||||
|
value->time = bpf_ktime_get_ns();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline int ipv6_filter(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
uint32_t nhoff;
|
||||||
|
uint8_t nhdr;
|
||||||
|
struct flowv6_keys tuple;
|
||||||
|
struct pair *value;
|
||||||
|
uint16_t port;
|
||||||
|
|
||||||
|
nhoff = skb->cb[0];
|
||||||
|
|
||||||
|
tuple.src[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr));
|
||||||
|
tuple.src[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4);
|
||||||
|
tuple.src[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 8);
|
||||||
|
tuple.src[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 12);
|
||||||
|
tuple.dst[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr));
|
||||||
|
tuple.dst[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4);
|
||||||
|
tuple.dst[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 8);
|
||||||
|
tuple.dst[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 12);
|
||||||
|
|
||||||
|
/* get next header */
|
||||||
|
nhdr = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr));
|
||||||
|
|
||||||
|
/* only support direct TCP and UDP for now */
|
||||||
|
switch (nhdr) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parse TCP */
|
||||||
|
tuple.ports = load_word(skb, nhoff + 40 /* IPV6_HEADER_LEN */);
|
||||||
|
port = tuple.port16[1];
|
||||||
|
tuple.port16[1] = tuple.port16[0];
|
||||||
|
tuple.port16[0] = port;
|
||||||
|
tuple.ip_proto = nhdr;
|
||||||
|
|
||||||
|
//char fmt[] = "Now Got IPv6 port %u and %u\n";
|
||||||
|
//bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]);
|
||||||
|
/* Test if src is in hash */
|
||||||
|
value = bpf_map_lookup_elem(&flow_table_v6, &tuple);
|
||||||
|
if (value) {
|
||||||
|
//char fmt[] = "Got a match IPv6: %u and %u\n";
|
||||||
|
//bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]);
|
||||||
|
__sync_fetch_and_add(&value->packets, 1);
|
||||||
|
__sync_fetch_and_add(&value->bytes, skb->len);
|
||||||
|
value->time = bpf_ktime_get_ns();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SEC("filter") hashfilter(struct __sk_buff *skb) {
|
||||||
|
__u32 nhoff = BPF_LL_OFF + ETH_HLEN;
|
||||||
|
|
||||||
|
skb->cb[0] = nhoff;
|
||||||
|
switch (skb->protocol) {
|
||||||
|
case __constant_htons(ETH_P_IP):
|
||||||
|
return ipv4_filter(skb);
|
||||||
|
case __constant_htons(ETH_P_IPV6):
|
||||||
|
return ipv6_filter(skb);
|
||||||
|
default:
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
char fmt[] = "Got proto %u\n";
|
||||||
|
bpf_trace_printk(fmt, sizeof(fmt), h_proto);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char __license[] SEC("license") = "GPL";
|
||||||
|
|
||||||
|
uint32_t __version SEC("version") = LINUX_VERSION_CODE;
|
@ -0,0 +1,178 @@
|
|||||||
|
/* Copyright (C) 2016 Open Information Security Foundation
|
||||||
|
*
|
||||||
|
* You can copy, redistribute or modify this Program under the terms of
|
||||||
|
* the GNU General Public License version 2 as published by the Free
|
||||||
|
* Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* version 2 along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
*
|
||||||
|
* \author Eric Leblond <eleblond@stamus-networks.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suricata-common.h"
|
||||||
|
#include "tm-threads.h"
|
||||||
|
#include "flow.h"
|
||||||
|
#include "flow-bypass.h"
|
||||||
|
#include "flow-private.h"
|
||||||
|
#include "util-ebpf.h"
|
||||||
|
|
||||||
|
#define BYPASSED_FLOW_TIMEOUT 60
|
||||||
|
#define FLOW_BYPASS_DELAY 10
|
||||||
|
|
||||||
|
typedef struct BypassedFlowManagerThreadData_ {
|
||||||
|
uint16_t flow_bypassed_cnt_clo;
|
||||||
|
uint16_t flow_bypassed_pkts;
|
||||||
|
uint16_t flow_bypassed_bytes;
|
||||||
|
} BypassedFlowManagerThreadData;
|
||||||
|
|
||||||
|
#ifdef HAVE_PACKET_EBPF
|
||||||
|
|
||||||
|
static int BypassedFlowV4Timeout(int fd, struct flowv4_keys *key, struct pair *value, void *data)
|
||||||
|
{
|
||||||
|
struct timespec *curtime = (struct timespec *)data;
|
||||||
|
SCLogDebug("Got curtime %" PRIu64 " and value %" PRIu64 " (sp:%d, dp:%d)",
|
||||||
|
curtime->tv_sec, value->time / 1000000000,
|
||||||
|
key->port16[0], key->port16[1]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (curtime->tv_sec - value->time / 1000000000 > BYPASSED_FLOW_TIMEOUT) {
|
||||||
|
SCLogDebug("Got no packet for %d -> %d at %" PRIu64,
|
||||||
|
key->port16[0], key->port16[1], value->time);
|
||||||
|
EBPFDeleteKey(fd, key);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int BypassedFlowV6Timeout(int fd, struct flowv6_keys *key, struct pair *value, void *data)
|
||||||
|
{
|
||||||
|
struct timespec *curtime = (struct timespec *)data;
|
||||||
|
SCLogDebug("Got curtime %" PRIu64 " and value %" PRIu64 " (sp:%d, dp:%d)",
|
||||||
|
curtime->tv_sec, value->time / 1000000000,
|
||||||
|
key->port16[0], key->port16[1]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (curtime->tv_sec - value->time / 1000000000 > BYPASSED_FLOW_TIMEOUT) {
|
||||||
|
SCLogDebug("Got no packet for %d -> %d at %" PRIu64,
|
||||||
|
key->port16[0], key->port16[1], value->time);
|
||||||
|
EBPFDeleteKey(fd, key);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static TmEcode BypassedFlowManager(ThreadVars *th_v, void *thread_data)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_PACKET_EBPF
|
||||||
|
int tcount = 0;
|
||||||
|
BypassedFlowManagerThreadData *ftd = thread_data;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
SCLogDebug("Dumping the table");
|
||||||
|
struct timespec curtime;
|
||||||
|
struct flows_stats bypassstats = { 0, 0, 0};
|
||||||
|
if (clock_gettime(CLOCK_MONOTONIC, &curtime) != 0) {
|
||||||
|
SCLogWarning(SC_ERR_INVALID_VALUE, "Can't get time: %s (%d)",
|
||||||
|
strerror(errno), errno);
|
||||||
|
sleep(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* TODO indirection here: AF_PACKET and NFQ should be able to give their iterate function */
|
||||||
|
tcount = EBPFForEachFlowV4Table("flow_table_v4", BypassedFlowV4Timeout, &bypassstats, &curtime);
|
||||||
|
if (tcount) {
|
||||||
|
StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)bypassstats.count);
|
||||||
|
StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)bypassstats.packets);
|
||||||
|
StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)bypassstats.bytes);
|
||||||
|
}
|
||||||
|
memset(&bypassstats, 0, sizeof(bypassstats));
|
||||||
|
/* TODO indirection here: AF_PACKET and NFQ should be able to give their iterate function */
|
||||||
|
tcount = EBPFForEachFlowV6Table("flow_table_v6", BypassedFlowV6Timeout, &bypassstats, &curtime);
|
||||||
|
if (tcount) {
|
||||||
|
StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)bypassstats.count);
|
||||||
|
StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)bypassstats.packets);
|
||||||
|
StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)bypassstats.bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TmThreadsCheckFlag(th_v, THV_KILL)) {
|
||||||
|
StatsSyncCounters(th_v);
|
||||||
|
return TM_ECODE_OK;
|
||||||
|
}
|
||||||
|
sleep(FLOW_BYPASS_DELAY);
|
||||||
|
StatsSyncCountersIfSignalled(th_v);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return TM_ECODE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static TmEcode BypassedFlowManagerThreadInit(ThreadVars *t, const void *initdata, void **data)
|
||||||
|
{
|
||||||
|
BypassedFlowManagerThreadData *ftd = SCCalloc(1, sizeof(BypassedFlowManagerThreadData));
|
||||||
|
if (ftd == NULL)
|
||||||
|
return TM_ECODE_FAILED;
|
||||||
|
|
||||||
|
*data = ftd;
|
||||||
|
|
||||||
|
ftd->flow_bypassed_cnt_clo = StatsRegisterCounter("flow_bypassed.closed", t);
|
||||||
|
ftd->flow_bypassed_pkts = StatsRegisterCounter("flow_bypassed.pkts", t);
|
||||||
|
ftd->flow_bypassed_bytes = StatsRegisterCounter("flow_bypassed.bytes", t);
|
||||||
|
|
||||||
|
return TM_ECODE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static TmEcode BypassedFlowManagerThreadDeinit(ThreadVars *t, void *data)
|
||||||
|
{
|
||||||
|
if (data)
|
||||||
|
SCFree(data);
|
||||||
|
return TM_ECODE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief spawn the flow manager thread */
|
||||||
|
void BypassedFlowManagerThreadSpawn()
|
||||||
|
{
|
||||||
|
#ifdef AFLFUZZ_DISABLE_MGTTHREADS
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_PACKET_EBPF
|
||||||
|
ThreadVars *tv_flowmgr = NULL;
|
||||||
|
tv_flowmgr = TmThreadCreateMgmtThreadByName("BypassedFlowManager",
|
||||||
|
"BypassedFlowManager", 0);
|
||||||
|
BUG_ON(tv_flowmgr == NULL);
|
||||||
|
|
||||||
|
if (tv_flowmgr == NULL) {
|
||||||
|
printf("ERROR: TmThreadsCreate failed\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (TmThreadSpawn(tv_flowmgr) != TM_ECODE_OK) {
|
||||||
|
printf("ERROR: TmThreadSpawn failed\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void TmModuleBypassedFlowManagerRegister (void)
|
||||||
|
{
|
||||||
|
tmm_modules[TMM_BYPASSEDFLOWMANAGER].name = "BypassedFlowManager";
|
||||||
|
tmm_modules[TMM_BYPASSEDFLOWMANAGER].ThreadInit = BypassedFlowManagerThreadInit;
|
||||||
|
tmm_modules[TMM_BYPASSEDFLOWMANAGER].ThreadDeinit = BypassedFlowManagerThreadDeinit;
|
||||||
|
tmm_modules[TMM_BYPASSEDFLOWMANAGER].Management = BypassedFlowManager;
|
||||||
|
tmm_modules[TMM_BYPASSEDFLOWMANAGER].cap_flags = 0;
|
||||||
|
tmm_modules[TMM_BYPASSEDFLOWMANAGER].flags = TM_FLAG_MANAGEMENT_TM;
|
||||||
|
SCLogDebug("%s registered", tmm_modules[TMM_BYPASSEDFLOWMANAGER].name);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,34 @@
|
|||||||
|
/* Copyright (C) 2016 Open Information Security Foundation
|
||||||
|
*
|
||||||
|
* You can copy, redistribute or modify this Program under the terms of
|
||||||
|
* the GNU General Public License version 2 as published by the Free
|
||||||
|
* Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* version 2 along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
*
|
||||||
|
* \author Eric Leblond <eleblond@stamus-networks.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __FLOW_BYPASS_H__
|
||||||
|
#define __FLOW_BYPASS_H__
|
||||||
|
|
||||||
|
void FlowAddToBypassed(Flow *f);
|
||||||
|
|
||||||
|
void BypassedFlowManagerThreadSpawn(void);
|
||||||
|
void TmModuleBypassedFlowManagerRegister(void);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue