/* Copyright (C) 2018-2019 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free * Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * version 2 along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ /** * \ingroup afppacket * * @{ */ /** * \file * * \author Eric Leblond * * eBPF utility * */ #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1 #include "suricata-common.h" #include "flow-bypass.h" #ifdef HAVE_PACKET_EBPF #include #include #include "util-ebpf.h" #include "util-cpu.h" #include "util-device.h" #include "device-storage.h" #include "flow-storage.h" #include "flow.h" #include "flow-hash.h" #include "tm-threads.h" #include #include #include #include "config.h" #define BPF_MAP_MAX_COUNT 16 #define BYPASSED_FLOW_TIMEOUT 60 static int g_livedev_storage_id = -1; static int g_flow_storage_id = -1; struct bpf_map_item { char iface[IFNAMSIZ]; char * name; int fd; uint8_t to_unlink; }; struct bpf_maps_info { struct bpf_map_item array[BPF_MAP_MAX_COUNT]; int last; }; typedef struct BypassedIfaceList_ { LiveDevice *dev; struct BypassedIfaceList_ *next; } BypassedIfaceList; static void BpfMapsInfoFree(void *bpf) { struct bpf_maps_info *bpfinfo = (struct bpf_maps_info *)bpf; int i; for (i = 0; i < bpfinfo->last; i ++) { if (bpfinfo->array[i].name) { if (bpfinfo->array[i].to_unlink) { char pinnedpath[PATH_MAX]; int ret = snprintf(pinnedpath, sizeof(pinnedpath), "/sys/fs/bpf/suricata-%s-%s", bpfinfo->array[i].iface, bpfinfo->array[i].name); if (ret > 0) { /* Unlink the pinned entry */ ret = unlink(pinnedpath); if (ret == -1) { int error = errno; SCLogWarning(SC_ERR_SYSCALL, "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error); } } else { SCLogWarning(SC_ERR_SPRINTF, "Unable to remove map %s", bpfinfo->array[i].name); } } SCFree(bpfinfo->array[i].name); } } SCFree(bpfinfo); } static void BypassedListFree(void *ifl) { BypassedIfaceList *mifl = (BypassedIfaceList *)ifl; BypassedIfaceList *nifl; while (mifl) { nifl = mifl->next; SCFree(mifl); mifl = nifl; } } static void EBPFDeleteKey(int fd, void *key) { int ret = bpf_map_delete_elem(fd, key); if (ret < 0) { SCLogWarning(SC_ERR_SYSCALL, "Unable to delete entry: %s (%d)", strerror(errno), errno); } } static struct bpf_maps_info *EBPFGetBpfMap(const char *iface) { LiveDevice *livedev = LiveGetDevice(iface); if (livedev == NULL) return NULL; void *data = LiveDevGetStorageById(livedev, g_livedev_storage_id); return (struct bpf_maps_info *)data; } /** * Get file descriptor of a map in the scope of a interface * * \param iface the interface where the map need to be looked for * \param name the name of the map * \return the file descriptor or -1 in case of error */ int EBPFGetMapFDByName(const char *iface, const char *name) { int i; if (iface == NULL || name == NULL) return -1; struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface); if (bpf_maps == NULL) return -1; for (i = 0; i < BPF_MAP_MAX_COUNT; i++) { if (!bpf_maps->array[i].name) continue; if (!strcmp(bpf_maps->array[i].name, name)) { SCLogDebug("Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name); return bpf_maps->array[i].fd; } } return -1; } static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file) { char pinnedpath[1024]; snprintf(pinnedpath, sizeof(pinnedpath), "/sys/fs/bpf/suricata-%s-%s", livedev->dev, file); return bpf_obj_get(pinnedpath); } static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config) { int fd_v4 = -1, fd_v6 = -1; /* First try to load the eBPF check map and return if found */ if (config->pinned_maps_name) { int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name); if (ret == 0) { /* pinned maps found, let's just exit as XDP filter is in place */ return ret; } } if (config->mode == AFP_MODE_XDP_BYPASS) { /* Get flow v4 table */ fd_v4 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v4"); if (fd_v4 < 0) { return fd_v4; } /* Get flow v6 table */ fd_v6 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v6"); if (fd_v6 < 0) { SCLogWarning(SC_ERR_INVALID_ARGUMENT, "Found a flow_table_v4 map but no flow_table_v6 map"); return fd_v6; } } struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data)); if (bpf_map_data == NULL) { SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map array"); return -1; } if (config->mode == AFP_MODE_XDP_BYPASS) { bpf_map_data->array[0].fd = fd_v4; bpf_map_data->array[0].name = SCStrdup("flow_table_v4"); if (bpf_map_data->array[0].name == NULL) { goto alloc_error; } bpf_map_data->array[1].fd = fd_v6; bpf_map_data->array[1].name = SCStrdup("flow_table_v6"); if (bpf_map_data->array[1].name == NULL) { goto alloc_error; } bpf_map_data->last = 2; } else { bpf_map_data->last = 0; } /* Load other known maps: cpu_map, cpus_available, tx_peer, tx_peer_int */ int fd = EBPFLoadPinnedMapsFile(livedev, "cpu_map"); if (fd >= 0) { bpf_map_data->array[bpf_map_data->last].fd = fd; bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpu_map"); if (bpf_map_data->array[bpf_map_data->last].name == NULL) { goto alloc_error; } bpf_map_data->last++; } fd = EBPFLoadPinnedMapsFile(livedev, "cpus_available"); if (fd >= 0) { bpf_map_data->array[bpf_map_data->last].fd = fd; bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpus_available"); if (bpf_map_data->array[bpf_map_data->last].name == NULL) { goto alloc_error; } bpf_map_data->last++; } fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer"); if (fd >= 0) { bpf_map_data->array[bpf_map_data->last].fd = fd; bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer"); if (bpf_map_data->array[bpf_map_data->last].name == NULL) { goto alloc_error; } bpf_map_data->last++; } fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer_int"); if (fd >= 0) { bpf_map_data->array[bpf_map_data->last].fd = fd; bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer_int"); if (bpf_map_data->array[bpf_map_data->last].name == NULL) { goto alloc_error; } bpf_map_data->last++; } /* Attach the bpf_maps_info to the LiveDevice via the device storage */ LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data); /* Declare that device will use bypass stats */ LiveDevUseBypass(livedev); return 0; alloc_error: for (int i = 0; i < bpf_map_data->last; i++) { SCFree(bpf_map_data->array[i].name); } bpf_map_data->last = 0; SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map name"); return -1; } /** * Load a section of an eBPF file * * This function loads a section inside an eBPF and return * via the parameter val the file descriptor that will be used to * inject the eBPF code into the kernel via a syscall. * * \param path the path of the eBPF file to load * \param section the section in the eBPF file to load * \param val a pointer to an integer that will be the file desc * \return -1 in case of error, 0 in case of success, 1 if pinned maps is loaded */ int EBPFLoadFile(const char *iface, const char *path, const char * section, int *val, struct ebpf_timeout_config *config) { int err, pfd; bool found = false; struct bpf_object *bpfobj = NULL; struct bpf_program *bpfprog = NULL; struct bpf_map *map = NULL; if (iface == NULL) return -1; LiveDevice *livedev = LiveGetDevice(iface); if (livedev == NULL) return -1; if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) { /* We try to get our flow table maps and if we have them we can simply return */ if (EBPFLoadPinnedMaps(livedev, config) == 0) { return 1; } } if (! path) { SCLogError(SC_ERR_INVALID_VALUE, "No file defined to load eBPF from"); return -1; } /* Sending the eBPF code to the kernel requires a large amount of * locked memory so we set it to unlimited to avoid a ENOPERM error */ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) { SCLogError(SC_ERR_MEM_ALLOC, "Unable to lock memory: %s (%d)", strerror(errno), errno); return -1; } /* Open the eBPF file and parse it */ bpfobj = bpf_object__open(path); long error = libbpf_get_error(bpfobj); if (error) { char err_buf[128]; libbpf_strerror(error, err_buf, sizeof(err_buf)); SCLogError(SC_ERR_INVALID_VALUE, "Unable to load eBPF objects in '%s': %s", path, err_buf); return -1; } if (config->flags & EBPF_XDP_HW_MODE) { unsigned int ifindex = if_nametoindex(iface); bpf_object__for_each_program(bpfprog, bpfobj) { bpf_program__set_ifindex(bpfprog, ifindex); } bpf_map__for_each(map, bpfobj) { bpf_map__set_ifindex(map, ifindex); } } /* Let's check that our section is here */ bpf_object__for_each_program(bpfprog, bpfobj) { const char *title = bpf_program__title(bpfprog, 0); if (!strcmp(title, section)) { if (config->flags & EBPF_SOCKET_FILTER) { bpf_program__set_socket_filter(bpfprog); } else { bpf_program__set_xdp(bpfprog); } found = true; break; } } if (found == false) { SCLogError(SC_ERR_INVALID_VALUE, "No section '%s' in '%s' file. Will not be able to use the file", section, path); return -1; } err = bpf_object__load(bpfobj); if (err < 0) { if (err == -EPERM) { SCLogError(SC_ERR_MEM_ALLOC, "Permission issue when loading eBPF object: " "%s (%d)", strerror(err), err); } else { char buf[129]; libbpf_strerror(err, buf, sizeof(buf)); SCLogError(SC_ERR_INVALID_VALUE, "Unable to load eBPF object: %s (%d)", buf, err); } return -1; } /* Kernel and userspace are sharing data via map. Userspace access to the * map via a file descriptor. So we need to store the map to fd info. For * that we use bpf_maps_info:: */ struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data)); if (bpf_map_data == NULL) { SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map array"); return -1; } /* Store the maps in bpf_maps_info:: */ bpf_map__for_each(map, bpfobj) { if (bpf_map_data->last == BPF_MAP_MAX_COUNT) { SCLogError(SC_ERR_NOT_SUPPORTED, "Too many BPF maps in eBPF files"); break; } SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map)); bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map); bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map)); snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ, "%s", iface); if (!bpf_map_data->array[bpf_map_data->last].name) { SCLogError(SC_ERR_MEM_ALLOC, "Unable to duplicate map name"); BpfMapsInfoFree(bpf_map_data); return -1; } bpf_map_data->array[bpf_map_data->last].to_unlink = 0; if (config->flags & EBPF_PINNED_MAPS) { SCLogConfig("Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd, bpf_map_data->array[bpf_map_data->last].name); char buf[1024]; snprintf(buf, sizeof(buf), "/sys/fs/bpf/suricata-%s-%s", iface, bpf_map_data->array[bpf_map_data->last].name); int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf); if (ret != 0) { SCLogWarning(SC_ERR_AFP_CREATE, "Can not pin: %s", strerror(errno)); } /* Don't unlink pinned maps in XDP mode to avoid a state reset */ if (config->flags & EBPF_XDP_CODE) { bpf_map_data->array[bpf_map_data->last].to_unlink = 0; } else { bpf_map_data->array[bpf_map_data->last].to_unlink = 1; } } bpf_map_data->last++; } /* Attach the bpf_maps_info to the LiveDevice via the device storage */ LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data); LiveDevUseBypass(livedev); /* Finally we get the file descriptor for our eBPF program. We will use * the fd to attach the program to the socket (eBPF case) or to the device * (XDP case). */ pfd = bpf_program__fd(bpfprog); if (pfd == -1) { SCLogError(SC_ERR_INVALID_VALUE, "Unable to find %s section", section); return -1; } *val = pfd; return 0; } /** * Attach a XDP program identified by its file descriptor to a device * * \param iface the name of interface * \param fd the eBPF/XDP program file descriptor * \param a flag to pass to attach function mostly used to set XDP mode * \return -1 in case of error, 0 if success */ int EBPFSetupXDP(const char *iface, int fd, uint8_t flags) { #ifdef HAVE_PACKET_XDP unsigned int ifindex = if_nametoindex(iface); if (ifindex == 0) { SCLogError(SC_ERR_INVALID_VALUE, "Unknown interface '%s'", iface); return -1; } int err = bpf_set_link_xdp_fd(ifindex, fd, flags); if (err != 0) { char buf[129]; libbpf_strerror(err, buf, sizeof(buf)); SCLogError(SC_ERR_INVALID_VALUE, "Unable to set XDP on '%s': %s (%d)", iface, buf, err); return -1; } #endif return 0; } /** * Create a Flow in the table for a Flowkey * * \return false (this create function never returns true) */ static bool EBPFCreateFlowForKey(struct flows_stats *flowstats, FlowKey *flow_key, uint32_t hash, struct timespec *ctime, uint64_t pkts_cnt, uint64_t bytes_cnt) { Flow *f = FlowGetFromFlowKey(flow_key, ctime, hash); if (f == NULL) return false; FlowUpdateState(f, FLOW_STATE_CAPTURE_BYPASSED); /* set accounting, we can't know the direction, so let's just start to * server then if we already have something in to server to client. We need * these numbers as we will use it to see if we have new traffic coming * on the flow */ FlowCounters *fc = FlowGetStorageById(f, GetFlowBypassCounterID()); if (fc == NULL) { fc = SCCalloc(sizeof(FlowCounters), 1); if (fc) { FlowSetStorageById(f, GetFlowBypassCounterID(), fc); fc->todstpktcnt = pkts_cnt; fc->todstbytecnt = bytes_cnt; } else { FLOWLOCK_UNLOCK(f); return false; } } else { fc->tosrcpktcnt = pkts_cnt; fc->tosrcbytecnt = bytes_cnt; } FLOWLOCK_UNLOCK(f); return false; } /** * Update a Flow in the Flow table for a Flowkey. * * \return false if Flow is in Flow table and true if not */ static bool EBPFUpdateFlowForKey(struct flows_stats *flowstats, FlowKey *flow_key, uint32_t hash, struct timespec *ctime, uint64_t pkts_cnt, uint64_t bytes_cnt) { Flow *f = FlowGetExistingFlowFromHash(flow_key, hash); if (f != NULL) { SCLogDebug("bypassed flow found %d -> %d, doing accounting", f->sp, f->dp); FlowCounters *fc = FlowGetStorageById(f, GetFlowBypassCounterID()); if (fc == NULL) { FLOWLOCK_UNLOCK(f); flowstats->count++; return true; } if (flow_key->sp == f->sp) { if (pkts_cnt != fc->todstpktcnt) { flowstats->packets += pkts_cnt - fc->todstpktcnt; flowstats->bytes += bytes_cnt - fc->todstbytecnt; fc->todstpktcnt = pkts_cnt; fc->todstbytecnt = bytes_cnt; /* interval based so no meaning to update the millisecond. * Let's keep it fast and simple */ f->lastts.tv_sec = ctime->tv_sec; } } else { if (pkts_cnt != fc->tosrcpktcnt) { flowstats->packets += pkts_cnt - fc->tosrcpktcnt; flowstats->bytes += bytes_cnt - fc->tosrcbytecnt; fc->tosrcpktcnt = pkts_cnt; fc->tosrcbytecnt = bytes_cnt; f->lastts.tv_sec = ctime->tv_sec; } } FLOWLOCK_UNLOCK(f); return false; } else { /* Flow has disappeared so it has been removed due to timeout. * This means we did not see any packet since bypassed_timeout/flow_dump_interval * checks so we are highly probably have no packet to account. * So we just discard the flow */ SCLogDebug("No flow for %d -> %d", flow_key->sp, flow_key->dp); SCLogDebug("Dead with pkts %lu bytes %lu", pkts_cnt, bytes_cnt); flowstats->count++; return true; } } typedef bool (*OpFlowForKey)(struct flows_stats *flowstats, FlowKey *flow_key, uint32_t hash, struct timespec *ctime, uint64_t pkts_cnt, uint64_t bytes_cnt); /** * Bypassed flows cleaning for IPv4 * * This function iterates on all the flows of the IPv4 table * looking for timeouted flow to delete from the flow table. */ static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char *name, struct flows_stats *flowstats, struct timespec *ctime, struct ebpf_timeout_config *tcfg, OpFlowForKey EBPFOpFlowForKey ) { int mapfd = EBPFGetMapFDByName(dev->dev, name); struct flowv4_keys key = {}, next_key; int found = 0; unsigned int i; uint64_t hash_cnt = 0; if (tcfg->cpus_count == 0) { SCLogWarning(SC_ERR_INVALID_VALUE, "CPU count should not be 0"); return 0; } bool dead_flow = false; while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) { uint64_t bytes_cnt = 0; uint64_t pkts_cnt = 0; hash_cnt++; if (dead_flow) { EBPFDeleteKey(mapfd, &key); dead_flow = false; } /* We use a per CPU structure so we will get a array of values. But if nr_cpus * is 1 then we have a global hash. */ BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count); memset(values_array, 0, sizeof(values_array)); int res = bpf_map_lookup_elem(mapfd, &next_key, values_array); if (res < 0) { SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]); SCLogDebug("errno: (%d) %s", errno, strerror(errno)); key = next_key; continue; } for (i = 0; i < tcfg->cpus_count; i++) { /* let's start accumulating value so we can compute the counters */ SCLogDebug("%d: Adding pkts %lu bytes %lu", i, BPF_PERCPU(values_array, i).packets, BPF_PERCPU(values_array, i).bytes); pkts_cnt += BPF_PERCPU(values_array, i).packets; bytes_cnt += BPF_PERCPU(values_array, i).bytes; } /* Get the corresponding Flow in the Flow table to compare and update * its counters and lastseen if needed */ FlowKey flow_key; if (tcfg->mode == AFP_MODE_XDP_BYPASS) { flow_key.sp = ntohs(next_key.port16[0]); flow_key.dp = ntohs(next_key.port16[1]); flow_key.src.addr_data32[0] = next_key.src; flow_key.dst.addr_data32[0] = next_key.dst; } else { flow_key.sp = next_key.port16[0]; flow_key.dp = next_key.port16[1]; flow_key.src.addr_data32[0] = ntohl(next_key.src); flow_key.dst.addr_data32[0] = ntohl(next_key.dst); } flow_key.src.family = AF_INET; flow_key.src.addr_data32[1] = 0; flow_key.src.addr_data32[2] = 0; flow_key.src.addr_data32[3] = 0; flow_key.dst.family = AF_INET; flow_key.dst.addr_data32[1] = 0; flow_key.dst.addr_data32[2] = 0; flow_key.dst.addr_data32[3] = 0; flow_key.vlan_id[0] = next_key.vlan_id[0]; flow_key.vlan_id[1] = next_key.vlan_id[1]; flow_key.proto = next_key.ip_proto; flow_key.recursion_level = 0; dead_flow = EBPFOpFlowForKey(flowstats, &flow_key, BPF_PERCPU(values_array, 0).hash, ctime, pkts_cnt, bytes_cnt); if (dead_flow) { found = 1; } if (TmThreadsCheckFlag(th_v, THV_KILL)) { return 0; } key = next_key; } if (dead_flow) { EBPFDeleteKey(mapfd, &key); found = 1; } SC_ATOMIC_ADD(dev->bypassed, flowstats->packets); LiveDevSetBypassStats(dev, hash_cnt, AF_INET); return found; } /** * Bypassed flows cleaning for IPv6 * * This function iterates on all the flows of the IPv4 table * looking for timeouted flow to delete from the flow table. */ static int EBPFForEachFlowV6Table(ThreadVars *th_v, LiveDevice *dev, const char *name, struct flows_stats *flowstats, struct timespec *ctime, struct ebpf_timeout_config *tcfg, OpFlowForKey EBPFOpFlowForKey ) { int mapfd = EBPFGetMapFDByName(dev->dev, name); struct flowv6_keys key = {}, next_key; int found = 0; unsigned int i; uint64_t hash_cnt = 0; if (tcfg->cpus_count == 0) { SCLogWarning(SC_ERR_INVALID_VALUE, "CPU count should not be 0"); return 0; } uint64_t pkts_cnt = 0; while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) { uint64_t bytes_cnt = 0; hash_cnt++; if (pkts_cnt > 0) { EBPFDeleteKey(mapfd, &key); } pkts_cnt = 0; /* We use a per CPU structure so we will get a array of values. But if nr_cpus * is 1 then we have a global hash. */ BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count); memset(values_array, 0, sizeof(values_array)); int res = bpf_map_lookup_elem(mapfd, &next_key, values_array); if (res < 0) { SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]); key = next_key; continue; } for (i = 0; i < tcfg->cpus_count; i++) { /* let's start accumulating value so we can compute the counters */ SCLogDebug("%d: Adding pkts %lu bytes %lu", i, BPF_PERCPU(values_array, i).packets, BPF_PERCPU(values_array, i).bytes); pkts_cnt += BPF_PERCPU(values_array, i).packets; bytes_cnt += BPF_PERCPU(values_array, i).bytes; } /* Get the corresponding Flow in the Flow table to compare and update * its counters and lastseen if needed */ FlowKey flow_key; if (tcfg->mode == AFP_MODE_XDP_BYPASS) { flow_key.sp = ntohs(next_key.port16[0]); flow_key.dp = ntohs(next_key.port16[1]); flow_key.src.family = AF_INET6; flow_key.src.addr_data32[0] = next_key.src[0]; flow_key.src.addr_data32[1] = next_key.src[1]; flow_key.src.addr_data32[2] = next_key.src[2]; flow_key.src.addr_data32[3] = next_key.src[3]; flow_key.dst.family = AF_INET6; flow_key.dst.addr_data32[0] = next_key.dst[0]; flow_key.dst.addr_data32[1] = next_key.dst[1]; flow_key.dst.addr_data32[2] = next_key.dst[2]; flow_key.dst.addr_data32[3] = next_key.dst[3]; } else { flow_key.sp = next_key.port16[0]; flow_key.dp = next_key.port16[1]; flow_key.src.family = AF_INET6; flow_key.src.addr_data32[0] = ntohl(next_key.src[0]); flow_key.src.addr_data32[1] = ntohl(next_key.src[1]); flow_key.src.addr_data32[2] = ntohl(next_key.src[2]); flow_key.src.addr_data32[3] = ntohl(next_key.src[3]); flow_key.dst.family = AF_INET6; flow_key.dst.addr_data32[0] = ntohl(next_key.dst[0]); flow_key.dst.addr_data32[1] = ntohl(next_key.dst[1]); flow_key.dst.addr_data32[2] = ntohl(next_key.dst[2]); flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]); } flow_key.vlan_id[0] = next_key.vlan_id[0]; flow_key.vlan_id[1] = next_key.vlan_id[1]; flow_key.proto = next_key.ip_proto; flow_key.recursion_level = 0; pkts_cnt = EBPFOpFlowForKey(flowstats, &flow_key, BPF_PERCPU(values_array, 0).hash, ctime, pkts_cnt, bytes_cnt); if (pkts_cnt > 0) { found = 1; } if (TmThreadsCheckFlag(th_v, THV_KILL)) { return 0; } key = next_key; } if (pkts_cnt > 0) { EBPFDeleteKey(mapfd, &key); found = 1; } SC_ATOMIC_ADD(dev->bypassed, flowstats->packets); LiveDevSetBypassStats(dev, hash_cnt, AF_INET6); return found; } int EBPFCheckBypassedFlowCreate(ThreadVars *th_v, struct timespec *curtime, void *data) { struct flows_stats local_bypassstats = { 0, 0, 0}; LiveDevice *ldev = NULL, *ndev; struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data; while(LiveDeviceForEach(&ldev, &ndev)) { EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4", &local_bypassstats, curtime, cfg, EBPFCreateFlowForKey); EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6", &local_bypassstats, curtime, cfg, EBPFCreateFlowForKey); } return 0; } /** * Flow timeout checking function * * This function is called by the Flow bypass manager to trigger removal * of entries in the kernel/userspace flow table if needed. * */ int EBPFCheckBypassedFlowTimeout(ThreadVars *th_v, struct flows_stats *bypassstats, struct timespec *curtime, void *data) { struct flows_stats local_bypassstats = { 0, 0, 0}; int ret = 0; int tcount = 0; LiveDevice *ldev = NULL, *ndev; struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data; BUG_ON(cfg == NULL); while(LiveDeviceForEach(&ldev, &ndev)) { memset(&local_bypassstats, 0, sizeof(local_bypassstats)); tcount = EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4", &local_bypassstats, curtime, cfg, EBPFUpdateFlowForKey); bypassstats->count = local_bypassstats.count; bypassstats->packets = local_bypassstats.packets ; bypassstats->bytes = local_bypassstats.bytes; if (tcount) { ret = 1; } memset(&local_bypassstats, 0, sizeof(local_bypassstats)); tcount = EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6", &local_bypassstats, curtime, cfg, EBPFUpdateFlowForKey); bypassstats->count += local_bypassstats.count; bypassstats->packets += local_bypassstats.packets ; bypassstats->bytes += local_bypassstats.bytes; if (tcount) { ret = 1; } } return ret; } void EBPFRegisterExtension(void) { g_livedev_storage_id = LiveDevStorageRegister("bpfmap", sizeof(void *), NULL, BpfMapsInfoFree); g_flow_storage_id = FlowStorageRegister("bypassedlist", sizeof(void *), NULL, BypassedListFree); } #ifdef HAVE_PACKET_XDP static uint32_t g_redirect_iface_cpu_counter = 0; static int EBPFAddCPUToMap(const char *iface, uint32_t i) { int cpumap = EBPFGetMapFDByName(iface, "cpu_map"); uint32_t queue_size = 4096; int ret; if (cpumap < 0) { SCLogError(SC_ERR_AFP_CREATE, "Can't find cpu_map"); return -1; } ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0); if (ret) { SCLogError(SC_ERR_AFP_CREATE, "Create CPU entry failed (err:%d)", ret); return -1; } int cpus_available = EBPFGetMapFDByName(iface, "cpus_available"); if (cpus_available < 0) { SCLogError(SC_ERR_AFP_CREATE, "Can't find cpus_available map"); return -1; } ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0); if (ret) { SCLogError(SC_ERR_AFP_CREATE, "Create CPU entry failed (err:%d)", ret); return -1; } return 0; } static void EBPFRedirectMapAddCPU(int i, void *data) { if (EBPFAddCPUToMap(data, i) < 0) { SCLogError(SC_ERR_INVALID_VALUE, "Unable to add CPU %d to set", i); } else { g_redirect_iface_cpu_counter++; } } void EBPFBuildCPUSet(ConfNode *node, char *iface) { uint32_t key0 = 0; int mapfd = EBPFGetMapFDByName(iface, "cpus_count"); if (mapfd < 0) { SCLogError(SC_ERR_INVALID_VALUE, "Unable to find 'cpus_count' map"); return; } g_redirect_iface_cpu_counter = 0; if (node == NULL) { bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter, BPF_ANY); return; } BuildCpusetWithCallback("xdp-cpu-redirect", node, EBPFRedirectMapAddCPU, iface); bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter, BPF_ANY); } /** * Setup peer interface in XDP system * * Ths function set up the peer interface in the XDP maps used by the * bypass filter. The first map tx_peer has type device map and is * used to store the peer. The second map tx_peer_int is used by the * code to check if we have a peer defined for this interface. * * As the map are per device we just need maps with one single element. * In both case, we use the key 0 to enter element so XDP kernel code * is using the same key. */ int EBPFSetPeerIface(const char *iface, const char *out_iface) { int mapfd = EBPFGetMapFDByName(iface, "tx_peer"); if (mapfd < 0) { SCLogError(SC_ERR_INVALID_VALUE, "Unable to find 'tx_peer' map"); return -1; } int intmapfd = EBPFGetMapFDByName(iface, "tx_peer_int"); if (intmapfd < 0) { SCLogError(SC_ERR_INVALID_VALUE, "Unable to find 'tx_peer_int' map"); return -1; } int key0 = 0; unsigned int peer_index = if_nametoindex(out_iface); if (peer_index == 0) { SCLogError(SC_ERR_INVALID_VALUE, "No iface '%s'", out_iface); return -1; } int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY); if (ret) { SCLogError(SC_ERR_AFP_CREATE, "Create peer entry failed (err:%d)", ret); return -1; } ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY); if (ret) { SCLogError(SC_ERR_AFP_CREATE, "Create peer entry failed (err:%d)", ret); return -1; } return 0; } /** * Bypass the flow on all ifaces it is seen on. This is used * in IPS mode. */ int EBPFUpdateFlow(Flow *f, Packet *p, void *data) { BypassedIfaceList *ifl = (BypassedIfaceList *)FlowGetStorageById(f, g_flow_storage_id); if (ifl == NULL) { ifl = SCCalloc(1, sizeof(*ifl)); if (ifl == NULL) { return 0; } ifl->dev = p->livedev; FlowSetStorageById(f, g_flow_storage_id, ifl); return 1; } /* Look for packet iface in the list */ BypassedIfaceList *ldev = ifl; while (ldev) { if (p->livedev == ldev->dev) { return 1; } ldev = ldev->next; } /* Call bypass function if ever not in the list */ p->BypassPacketsFlow(p); /* Add iface to the list */ BypassedIfaceList *nifl = SCCalloc(1, sizeof(*nifl)); if (nifl == NULL) { return 0; } nifl->dev = p->livedev; nifl->next = ifl; FlowSetStorageById(f, g_flow_storage_id, nifl); return 1; } #endif /* HAVE_PACKET_XDP */ #endif