mirror of https://github.com/OISF/suricata
af-packet: kernel bypass implementation
This patch implements bypass capability for af-packet. The filter only bypass TCP and UDP in IPv4 and IPv6. It don't don't bypass IPv6 with extended headers. This patch also introduces a bypassed flow manager that takes care of timeouting the bypassed flows. It uses a 60 sec timeout on flow. As they are supposed to be active we can try that. If they are not active then we don't care to get them back in Suricata.pull/3221/head
parent
91e1256b01
commit
06173267c6
@ -0,0 +1,193 @@
|
||||
//#include <bcc/proto.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/filter.h>
|
||||
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
#define LINUX_VERSION_CODE 263682
|
||||
|
||||
struct flowv4_keys {
|
||||
__be32 src;
|
||||
__be32 dst;
|
||||
union {
|
||||
__be32 ports;
|
||||
__be16 port16[2];
|
||||
};
|
||||
__u32 ip_proto;
|
||||
} __attribute__((__aligned__(8)));
|
||||
|
||||
struct flowv6_keys {
|
||||
__be32 src[4];
|
||||
__be32 dst[4];
|
||||
union {
|
||||
__be32 ports;
|
||||
__be16 port16[2];
|
||||
};
|
||||
__u32 ip_proto;
|
||||
} __attribute__((__aligned__(8)));
|
||||
|
||||
struct pair {
|
||||
uint64_t time;
|
||||
uint64_t packets;
|
||||
uint64_t bytes;
|
||||
} __attribute__((__aligned__(8)));
|
||||
|
||||
struct bpf_map_def SEC("maps") flow_table_v4 = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct flowv4_keys),
|
||||
.value_size = sizeof(struct pair),
|
||||
.max_entries = 32768,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") flow_table_v6 = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(struct flowv6_keys),
|
||||
.value_size = sizeof(struct pair),
|
||||
.max_entries = 32768,
|
||||
};
|
||||
|
||||
static __always_inline int ipv4_filter(struct __sk_buff *skb)
|
||||
{
|
||||
uint32_t nhoff, verlen;
|
||||
struct flowv4_keys tuple;
|
||||
struct pair *value;
|
||||
uint16_t port;
|
||||
|
||||
nhoff = skb->cb[0];
|
||||
|
||||
tuple.ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
|
||||
/* only support TCP and UDP for now */
|
||||
switch (tuple.ip_proto) {
|
||||
case IPPROTO_TCP:
|
||||
case IPPROTO_UDP:
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
tuple.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
|
||||
tuple.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
|
||||
|
||||
verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
|
||||
nhoff += (verlen & 0xF) << 2;
|
||||
tuple.ports = load_word(skb, nhoff);
|
||||
port = tuple.port16[1];
|
||||
tuple.port16[1] = tuple.port16[0];
|
||||
tuple.port16[0] = port;
|
||||
|
||||
#if 0
|
||||
if ((tuple.port16[0] == 22) || (tuple.port16[1] == 22))
|
||||
{
|
||||
uint16_t sp = tuple.port16[0];
|
||||
//uint16_t dp = tuple.port16[1];
|
||||
char fmt[] = "Parsed SSH flow: %u %d -> %u\n";
|
||||
bpf_trace_printk(fmt, sizeof(fmt), tuple.src, sp, tuple.dst);
|
||||
}
|
||||
#endif
|
||||
/* Test if src is in hash */
|
||||
value = bpf_map_lookup_elem(&flow_table_v4, &tuple);
|
||||
if (value) {
|
||||
#if 0
|
||||
{
|
||||
uint16_t sp = tuple.port16[0];
|
||||
//uint16_t dp = tuple.port16[1];
|
||||
char bfmt[] = "Found flow: %u %d -> %u\n";
|
||||
bpf_trace_printk(bfmt, sizeof(bfmt), tuple.src, sp, tuple.dst);
|
||||
}
|
||||
#endif
|
||||
__sync_fetch_and_add(&value->packets, 1);
|
||||
__sync_fetch_and_add(&value->bytes, skb->len);
|
||||
value->time = bpf_ktime_get_ns();
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static __always_inline int ipv6_filter(struct __sk_buff *skb)
|
||||
{
|
||||
uint32_t nhoff;
|
||||
uint8_t nhdr;
|
||||
struct flowv6_keys tuple;
|
||||
struct pair *value;
|
||||
uint16_t port;
|
||||
|
||||
nhoff = skb->cb[0];
|
||||
|
||||
tuple.src[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr));
|
||||
tuple.src[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4);
|
||||
tuple.src[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 8);
|
||||
tuple.src[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 12);
|
||||
tuple.dst[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr));
|
||||
tuple.dst[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4);
|
||||
tuple.dst[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 8);
|
||||
tuple.dst[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 12);
|
||||
|
||||
/* get next header */
|
||||
nhdr = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr));
|
||||
|
||||
/* only support direct TCP and UDP for now */
|
||||
switch (nhdr) {
|
||||
case IPPROTO_TCP:
|
||||
case IPPROTO_UDP:
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Parse TCP */
|
||||
tuple.ports = load_word(skb, nhoff + 40 /* IPV6_HEADER_LEN */);
|
||||
port = tuple.port16[1];
|
||||
tuple.port16[1] = tuple.port16[0];
|
||||
tuple.port16[0] = port;
|
||||
tuple.ip_proto = nhdr;
|
||||
|
||||
//char fmt[] = "Now Got IPv6 port %u and %u\n";
|
||||
//bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]);
|
||||
/* Test if src is in hash */
|
||||
value = bpf_map_lookup_elem(&flow_table_v6, &tuple);
|
||||
if (value) {
|
||||
//char fmt[] = "Got a match IPv6: %u and %u\n";
|
||||
//bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]);
|
||||
__sync_fetch_and_add(&value->packets, 1);
|
||||
__sync_fetch_and_add(&value->bytes, skb->len);
|
||||
value->time = bpf_ktime_get_ns();
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int SEC("filter") hashfilter(struct __sk_buff *skb) {
|
||||
__u32 nhoff = BPF_LL_OFF + ETH_HLEN;
|
||||
|
||||
skb->cb[0] = nhoff;
|
||||
switch (skb->protocol) {
|
||||
case __constant_htons(ETH_P_IP):
|
||||
return ipv4_filter(skb);
|
||||
case __constant_htons(ETH_P_IPV6):
|
||||
return ipv6_filter(skb);
|
||||
default:
|
||||
#if 0
|
||||
{
|
||||
char fmt[] = "Got proto %u\n";
|
||||
bpf_trace_printk(fmt, sizeof(fmt), h_proto);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
||||
|
||||
uint32_t __version SEC("version") = LINUX_VERSION_CODE;
|
@ -0,0 +1,178 @@
|
||||
/* Copyright (C) 2016 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Eric Leblond <eleblond@stamus-networks.com>
|
||||
*/
|
||||
|
||||
#include "suricata-common.h"
|
||||
#include "tm-threads.h"
|
||||
#include "flow.h"
|
||||
#include "flow-bypass.h"
|
||||
#include "flow-private.h"
|
||||
#include "util-ebpf.h"
|
||||
|
||||
#define BYPASSED_FLOW_TIMEOUT 60
|
||||
#define FLOW_BYPASS_DELAY 10
|
||||
|
||||
typedef struct BypassedFlowManagerThreadData_ {
|
||||
uint16_t flow_bypassed_cnt_clo;
|
||||
uint16_t flow_bypassed_pkts;
|
||||
uint16_t flow_bypassed_bytes;
|
||||
} BypassedFlowManagerThreadData;
|
||||
|
||||
#ifdef HAVE_PACKET_EBPF
|
||||
|
||||
static int BypassedFlowV4Timeout(int fd, struct flowv4_keys *key, struct pair *value, void *data)
|
||||
{
|
||||
struct timespec *curtime = (struct timespec *)data;
|
||||
SCLogDebug("Got curtime %" PRIu64 " and value %" PRIu64 " (sp:%d, dp:%d)",
|
||||
curtime->tv_sec, value->time / 1000000000,
|
||||
key->port16[0], key->port16[1]
|
||||
);
|
||||
|
||||
if (curtime->tv_sec - value->time / 1000000000 > BYPASSED_FLOW_TIMEOUT) {
|
||||
SCLogDebug("Got no packet for %d -> %d at %" PRIu64,
|
||||
key->port16[0], key->port16[1], value->time);
|
||||
EBPFDeleteKey(fd, key);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int BypassedFlowV6Timeout(int fd, struct flowv6_keys *key, struct pair *value, void *data)
|
||||
{
|
||||
struct timespec *curtime = (struct timespec *)data;
|
||||
SCLogDebug("Got curtime %" PRIu64 " and value %" PRIu64 " (sp:%d, dp:%d)",
|
||||
curtime->tv_sec, value->time / 1000000000,
|
||||
key->port16[0], key->port16[1]
|
||||
);
|
||||
|
||||
if (curtime->tv_sec - value->time / 1000000000 > BYPASSED_FLOW_TIMEOUT) {
|
||||
SCLogDebug("Got no packet for %d -> %d at %" PRIu64,
|
||||
key->port16[0], key->port16[1], value->time);
|
||||
EBPFDeleteKey(fd, key);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static TmEcode BypassedFlowManager(ThreadVars *th_v, void *thread_data)
|
||||
{
|
||||
#ifdef HAVE_PACKET_EBPF
|
||||
int tcount = 0;
|
||||
BypassedFlowManagerThreadData *ftd = thread_data;
|
||||
|
||||
while (1) {
|
||||
SCLogDebug("Dumping the table");
|
||||
struct timespec curtime;
|
||||
struct flows_stats bypassstats = { 0, 0, 0};
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &curtime) != 0) {
|
||||
SCLogWarning(SC_ERR_INVALID_VALUE, "Can't get time: %s (%d)",
|
||||
strerror(errno), errno);
|
||||
sleep(1);
|
||||
continue;
|
||||
}
|
||||
/* TODO indirection here: AF_PACKET and NFQ should be able to give their iterate function */
|
||||
tcount = EBPFForEachFlowV4Table("flow_table_v4", BypassedFlowV4Timeout, &bypassstats, &curtime);
|
||||
if (tcount) {
|
||||
StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)bypassstats.count);
|
||||
StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)bypassstats.packets);
|
||||
StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)bypassstats.bytes);
|
||||
}
|
||||
memset(&bypassstats, 0, sizeof(bypassstats));
|
||||
/* TODO indirection here: AF_PACKET and NFQ should be able to give their iterate function */
|
||||
tcount = EBPFForEachFlowV6Table("flow_table_v6", BypassedFlowV6Timeout, &bypassstats, &curtime);
|
||||
if (tcount) {
|
||||
StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)bypassstats.count);
|
||||
StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)bypassstats.packets);
|
||||
StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)bypassstats.bytes);
|
||||
}
|
||||
|
||||
if (TmThreadsCheckFlag(th_v, THV_KILL)) {
|
||||
StatsSyncCounters(th_v);
|
||||
return TM_ECODE_OK;
|
||||
}
|
||||
sleep(FLOW_BYPASS_DELAY);
|
||||
StatsSyncCountersIfSignalled(th_v);
|
||||
}
|
||||
#endif
|
||||
return TM_ECODE_OK;
|
||||
}
|
||||
|
||||
|
||||
static TmEcode BypassedFlowManagerThreadInit(ThreadVars *t, const void *initdata, void **data)
|
||||
{
|
||||
BypassedFlowManagerThreadData *ftd = SCCalloc(1, sizeof(BypassedFlowManagerThreadData));
|
||||
if (ftd == NULL)
|
||||
return TM_ECODE_FAILED;
|
||||
|
||||
*data = ftd;
|
||||
|
||||
ftd->flow_bypassed_cnt_clo = StatsRegisterCounter("flow_bypassed.closed", t);
|
||||
ftd->flow_bypassed_pkts = StatsRegisterCounter("flow_bypassed.pkts", t);
|
||||
ftd->flow_bypassed_bytes = StatsRegisterCounter("flow_bypassed.bytes", t);
|
||||
|
||||
return TM_ECODE_OK;
|
||||
}
|
||||
|
||||
static TmEcode BypassedFlowManagerThreadDeinit(ThreadVars *t, void *data)
|
||||
{
|
||||
if (data)
|
||||
SCFree(data);
|
||||
return TM_ECODE_OK;
|
||||
}
|
||||
|
||||
/** \brief spawn the flow manager thread */
|
||||
void BypassedFlowManagerThreadSpawn()
|
||||
{
|
||||
#ifdef AFLFUZZ_DISABLE_MGTTHREADS
|
||||
return;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PACKET_EBPF
|
||||
ThreadVars *tv_flowmgr = NULL;
|
||||
tv_flowmgr = TmThreadCreateMgmtThreadByName("BypassedFlowManager",
|
||||
"BypassedFlowManager", 0);
|
||||
BUG_ON(tv_flowmgr == NULL);
|
||||
|
||||
if (tv_flowmgr == NULL) {
|
||||
printf("ERROR: TmThreadsCreate failed\n");
|
||||
exit(1);
|
||||
}
|
||||
if (TmThreadSpawn(tv_flowmgr) != TM_ECODE_OK) {
|
||||
printf("ERROR: TmThreadSpawn failed\n");
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void TmModuleBypassedFlowManagerRegister (void)
|
||||
{
|
||||
tmm_modules[TMM_BYPASSEDFLOWMANAGER].name = "BypassedFlowManager";
|
||||
tmm_modules[TMM_BYPASSEDFLOWMANAGER].ThreadInit = BypassedFlowManagerThreadInit;
|
||||
tmm_modules[TMM_BYPASSEDFLOWMANAGER].ThreadDeinit = BypassedFlowManagerThreadDeinit;
|
||||
tmm_modules[TMM_BYPASSEDFLOWMANAGER].Management = BypassedFlowManager;
|
||||
tmm_modules[TMM_BYPASSEDFLOWMANAGER].cap_flags = 0;
|
||||
tmm_modules[TMM_BYPASSEDFLOWMANAGER].flags = TM_FLAG_MANAGEMENT_TM;
|
||||
SCLogDebug("%s registered", tmm_modules[TMM_BYPASSEDFLOWMANAGER].name);
|
||||
}
|
||||
|
@ -0,0 +1,34 @@
|
||||
/* Copyright (C) 2016 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Eric Leblond <eleblond@stamus-networks.com>
|
||||
*/
|
||||
|
||||
#ifndef __FLOW_BYPASS_H__
|
||||
#define __FLOW_BYPASS_H__
|
||||
|
||||
void FlowAddToBypassed(Flow *f);
|
||||
|
||||
void BypassedFlowManagerThreadSpawn(void);
|
||||
void TmModuleBypassedFlowManagerRegister(void);
|
||||
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue