dpdk/rss: move and change rss rte_flow functions
Move and adjust the base of RSS configuration from util-dpdk-i40e.c to
a new file that can be later utilized by other cards.
RSS configuration can be configured via rte_flow rules. This is useful
for possible future features such as specific header offload
(vxlan, nvgre) also implemented via rte_flow rules, as rte_flow
rules can be chained via groups and priorities.
i40e uses multiple different rte_flow rules to setup RSS. At first,
function DeviceSetRSSFlowQueues() is used to setup rx queues.
This rule matches all types of traffic, so the equivalent
to dpdk-testpmd pattern would be "pattern end"
This rule can not contain hash types (ipv4, ipv6 etc.) nor hash key.
The hash function used here is RTE_ETH_HASH_FUNCTION_DEFAULT.
The syntax in dpdk-testpmd for this rule with attributes:
port index == 0
used rx queue indices == 0 1 2 3
is as follows:
"flow create 0 ingress pattern end actions rss queues 0 1 2 3 end
func default / end"
The other rules configured by i40eDeviceSetRSSFlowIPv4() and
i40eDeviceSetRSSFlowIPv6() match specific type of traffic by l4 protocol
(none, TCP, UDP, SCTP). For example, pattern to match l3 ipv4 with l4
tcp traffic in dpdk-testpmd syntax would be equivalent of
"pattern eth / ipv4 / tcp / end".
These rules can not have rx queues configured, but have hash types
(l3 src and dst address). This means that the traffic distribution
is affected only by l3 addresses, independent of the l4 specifics.
Also these pattern matching rules have symmetric 6d5a
hash key configured. The length of the key is dependent on DPDK version.
The hash function (either RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ or
RTE_ETH_HASH_FUNCTION_TOEPLITZ, depending on DPKD version) used
in these rules hashes symmetricaly due to the symmetric hash key.
The syntax in dpdk-testpmd for rule to match ipv4-tcp traffic with
attributes:
port index == 0
<hash_key> == 52 bytes long 6d5a symmetric hash key
is as follows:
"flow create 0 ingress pattern eth / ipv4 / tcp / end actions rss types
ipv4-tcp l3-src-only l3-dst-only end queues end key <hash_key>
key_len 52 func toeplitz / end"
(queues need to be set to NULL)
Ticket: 7337
8 months ago
|
|
|
/* Copyright (C) 2021-2025 Open Information Security Foundation
|
|
|
|
*
|
|
|
|
* You can copy, redistribute or modify this Program under the terms of
|
|
|
|
* the GNU General Public License version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* version 2 along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
* 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \defgroup dpdk DPDK running mode
|
|
|
|
*
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \file
|
|
|
|
*
|
|
|
|
* \author Lukas Sismis <lukas.sismis@gmail.com>
|
|
|
|
*
|
|
|
|
* DPDK capture interface
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "suricata-common.h"
|
|
|
|
#include "runmodes.h"
|
|
|
|
#include "decode.h"
|
|
|
|
#include "packet.h"
|
|
|
|
#include "source-dpdk.h"
|
|
|
|
#include "suricata.h"
|
|
|
|
#include "threads.h"
|
|
|
|
#include "threadvars.h"
|
|
|
|
#include "tm-threads.h"
|
|
|
|
#include "tmqh-packetpool.h"
|
|
|
|
#include "util-privs.h"
|
|
|
|
#include "util-device-private.h"
|
|
|
|
#include "action-globals.h"
|
|
|
|
|
|
|
|
#ifndef HAVE_DPDK
|
|
|
|
|
|
|
|
TmEcode NoDPDKSupportExit(ThreadVars *, const void *, void **);
|
|
|
|
|
|
|
|
void TmModuleReceiveDPDKRegister(void)
|
|
|
|
{
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].name = "ReceiveDPDK";
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].ThreadInit = NoDPDKSupportExit;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].Func = NULL;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].ThreadExitPrintStats = NULL;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].ThreadDeinit = NULL;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].cap_flags = 0;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].flags = TM_FLAG_RECEIVE_TM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Registration Function for DecodeDPDK.
|
|
|
|
*/
|
|
|
|
void TmModuleDecodeDPDKRegister(void)
|
|
|
|
{
|
|
|
|
tmm_modules[TMM_DECODEDPDK].name = "DecodeDPDK";
|
|
|
|
tmm_modules[TMM_DECODEDPDK].ThreadInit = NoDPDKSupportExit;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].Func = NULL;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].ThreadExitPrintStats = NULL;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].ThreadDeinit = NULL;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].cap_flags = 0;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].flags = TM_FLAG_DECODE_TM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief this function prints an error message and exits.
|
|
|
|
*/
|
|
|
|
TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data)
|
|
|
|
{
|
|
|
|
FatalError("Error creating thread %s: you do not have "
|
|
|
|
"support for DPDK enabled, on Linux host please recompile "
|
|
|
|
"with --enable-dpdk",
|
|
|
|
tv->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* We have DPDK support */
|
|
|
|
|
|
|
|
#include "util-affinity.h"
|
|
|
|
#include "util-dpdk.h"
|
|
|
|
#include "util-dpdk-i40e.h"
|
dpdk/rss: move and change rss rte_flow functions
Move and adjust the base of RSS configuration from util-dpdk-i40e.c to
a new file that can be later utilized by other cards.
RSS configuration can be configured via rte_flow rules. This is useful
for possible future features such as specific header offload
(vxlan, nvgre) also implemented via rte_flow rules, as rte_flow
rules can be chained via groups and priorities.
i40e uses multiple different rte_flow rules to setup RSS. At first,
function DeviceSetRSSFlowQueues() is used to setup rx queues.
This rule matches all types of traffic, so the equivalent
to dpdk-testpmd pattern would be "pattern end"
This rule can not contain hash types (ipv4, ipv6 etc.) nor hash key.
The hash function used here is RTE_ETH_HASH_FUNCTION_DEFAULT.
The syntax in dpdk-testpmd for this rule with attributes:
port index == 0
used rx queue indices == 0 1 2 3
is as follows:
"flow create 0 ingress pattern end actions rss queues 0 1 2 3 end
func default / end"
The other rules configured by i40eDeviceSetRSSFlowIPv4() and
i40eDeviceSetRSSFlowIPv6() match specific type of traffic by l4 protocol
(none, TCP, UDP, SCTP). For example, pattern to match l3 ipv4 with l4
tcp traffic in dpdk-testpmd syntax would be equivalent of
"pattern eth / ipv4 / tcp / end".
These rules can not have rx queues configured, but have hash types
(l3 src and dst address). This means that the traffic distribution
is affected only by l3 addresses, independent of the l4 specifics.
Also these pattern matching rules have symmetric 6d5a
hash key configured. The length of the key is dependent on DPDK version.
The hash function (either RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ or
RTE_ETH_HASH_FUNCTION_TOEPLITZ, depending on DPKD version) used
in these rules hashes symmetricaly due to the symmetric hash key.
The syntax in dpdk-testpmd for rule to match ipv4-tcp traffic with
attributes:
port index == 0
<hash_key> == 52 bytes long 6d5a symmetric hash key
is as follows:
"flow create 0 ingress pattern eth / ipv4 / tcp / end actions rss types
ipv4-tcp l3-src-only l3-dst-only end queues end key <hash_key>
key_len 52 func toeplitz / end"
(queues need to be set to NULL)
Ticket: 7337
8 months ago
|
|
|
#include "util-dpdk-ice.h"
|
|
|
|
#include "util-dpdk-ixgbe.h"
|
|
|
|
#include "util-dpdk-mlx5.h"
|
|
|
|
#include "util-dpdk-bonding.h"
|
|
|
|
#include <numa.h>
|
|
|
|
|
|
|
|
#define BURST_SIZE 32
|
|
|
|
// interrupt mode constants
|
|
|
|
#define MIN_ZERO_POLL_COUNT 10U
|
|
|
|
#define MIN_ZERO_POLL_COUNT_TO_SLEEP 10U
|
|
|
|
#define MINIMUM_SLEEP_TIME_US 1U
|
|
|
|
#define STANDARD_SLEEP_TIME_US 100U
|
|
|
|
#define MAX_EPOLL_TIMEOUT_MS 500U
|
|
|
|
static rte_spinlock_t intr_lock[RTE_MAX_ETHPORTS];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Structure to hold thread specific variables.
|
|
|
|
*/
|
|
|
|
typedef struct DPDKThreadVars_ {
|
|
|
|
/* counters */
|
|
|
|
uint64_t pkts;
|
|
|
|
ThreadVars *tv;
|
|
|
|
TmSlot *slot;
|
|
|
|
LiveDevice *livedev;
|
|
|
|
ChecksumValidationMode checksum_mode;
|
|
|
|
bool intr_enabled;
|
|
|
|
/* references to packet and drop counters */
|
|
|
|
uint16_t capture_dpdk_packets;
|
|
|
|
uint16_t capture_dpdk_rx_errs;
|
|
|
|
uint16_t capture_dpdk_imissed;
|
|
|
|
uint16_t capture_dpdk_rx_no_mbufs;
|
|
|
|
uint16_t capture_dpdk_ierrors;
|
|
|
|
uint16_t capture_dpdk_tx_errs;
|
|
|
|
unsigned int flags;
|
|
|
|
uint16_t threads;
|
|
|
|
/* for IPS */
|
|
|
|
DpdkCopyModeEnum copy_mode;
|
|
|
|
uint16_t out_port_id;
|
|
|
|
/* Entry in the peers_list */
|
|
|
|
|
|
|
|
uint64_t bytes;
|
|
|
|
uint64_t accepted;
|
|
|
|
uint64_t dropped;
|
|
|
|
uint16_t port_id;
|
|
|
|
uint16_t queue_id;
|
|
|
|
int32_t port_socket_id;
|
|
|
|
struct rte_mbuf *received_mbufs[BURST_SIZE];
|
|
|
|
DPDKWorkerSync *workers_sync;
|
|
|
|
} DPDKThreadVars;
|
|
|
|
|
|
|
|
static TmEcode ReceiveDPDKThreadInit(ThreadVars *, const void *, void **);
|
|
|
|
static void ReceiveDPDKThreadExitStats(ThreadVars *, void *);
|
|
|
|
static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *, void *);
|
|
|
|
static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot);
|
|
|
|
|
|
|
|
static TmEcode DecodeDPDKThreadInit(ThreadVars *, const void *, void **);
|
|
|
|
static TmEcode DecodeDPDKThreadDeinit(ThreadVars *tv, void *data);
|
|
|
|
static TmEcode DecodeDPDK(ThreadVars *, Packet *, void *);
|
|
|
|
|
|
|
|
static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset);
|
|
|
|
static bool InterruptsRXEnable(uint16_t port_id, uint16_t queue_id)
|
|
|
|
{
|
|
|
|
uint32_t event_data = (uint32_t)port_id << UINT16_WIDTH | queue_id;
|
|
|
|
int32_t ret = rte_eth_dev_rx_intr_ctl_q(port_id, queue_id, RTE_EPOLL_PER_THREAD,
|
|
|
|
RTE_INTR_EVENT_ADD, (void *)((uintptr_t)event_data));
|
|
|
|
|
|
|
|
if (ret != 0) {
|
|
|
|
SCLogError("%s-Q%d: failed to enable interrupt mode: %s", DPDKGetPortNameByPortID(port_id),
|
|
|
|
queue_id, rte_strerror(-ret));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint32_t InterruptsSleepHeuristic(uint32_t no_pkt_polls_count)
|
|
|
|
{
|
|
|
|
if (no_pkt_polls_count < MIN_ZERO_POLL_COUNT_TO_SLEEP)
|
|
|
|
return MINIMUM_SLEEP_TIME_US;
|
|
|
|
|
|
|
|
return STANDARD_SLEEP_TIME_US;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void InterruptsTurnOnOff(uint16_t port_id, uint16_t queue_id, bool on)
|
|
|
|
{
|
|
|
|
rte_spinlock_lock(&(intr_lock[port_id]));
|
|
|
|
|
|
|
|
if (on)
|
|
|
|
rte_eth_dev_rx_intr_enable(port_id, queue_id);
|
|
|
|
else
|
|
|
|
rte_eth_dev_rx_intr_disable(port_id, queue_id);
|
|
|
|
|
|
|
|
rte_spinlock_unlock(&(intr_lock[port_id]));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void DPDKFreeMbufArray(
|
|
|
|
struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset)
|
|
|
|
{
|
|
|
|
for (int i = offset; i < mbuf_cnt; i++) {
|
|
|
|
rte_pktmbuf_free(mbuf_array[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DevicePostStartPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name)
|
|
|
|
{
|
dpdk/rss: move and change rss rte_flow functions
Move and adjust the base of RSS configuration from util-dpdk-i40e.c to
a new file that can be later utilized by other cards.
RSS configuration can be configured via rte_flow rules. This is useful
for possible future features such as specific header offload
(vxlan, nvgre) also implemented via rte_flow rules, as rte_flow
rules can be chained via groups and priorities.
i40e uses multiple different rte_flow rules to setup RSS. At first,
function DeviceSetRSSFlowQueues() is used to setup rx queues.
This rule matches all types of traffic, so the equivalent
to dpdk-testpmd pattern would be "pattern end"
This rule can not contain hash types (ipv4, ipv6 etc.) nor hash key.
The hash function used here is RTE_ETH_HASH_FUNCTION_DEFAULT.
The syntax in dpdk-testpmd for this rule with attributes:
port index == 0
used rx queue indices == 0 1 2 3
is as follows:
"flow create 0 ingress pattern end actions rss queues 0 1 2 3 end
func default / end"
The other rules configured by i40eDeviceSetRSSFlowIPv4() and
i40eDeviceSetRSSFlowIPv6() match specific type of traffic by l4 protocol
(none, TCP, UDP, SCTP). For example, pattern to match l3 ipv4 with l4
tcp traffic in dpdk-testpmd syntax would be equivalent of
"pattern eth / ipv4 / tcp / end".
These rules can not have rx queues configured, but have hash types
(l3 src and dst address). This means that the traffic distribution
is affected only by l3 addresses, independent of the l4 specifics.
Also these pattern matching rules have symmetric 6d5a
hash key configured. The length of the key is dependent on DPDK version.
The hash function (either RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ or
RTE_ETH_HASH_FUNCTION_TOEPLITZ, depending on DPKD version) used
in these rules hashes symmetricaly due to the symmetric hash key.
The syntax in dpdk-testpmd for rule to match ipv4-tcp traffic with
attributes:
port index == 0
<hash_key> == 52 bytes long 6d5a symmetric hash key
is as follows:
"flow create 0 ingress pattern eth / ipv4 / tcp / end actions rss types
ipv4-tcp l3-src-only l3-dst-only end queues end key <hash_key>
key_len 52 func toeplitz / end"
(queues need to be set to NULL)
Ticket: 7337
8 months ago
|
|
|
if (strcmp(driver_name, "net_bonding") == 0)
|
|
|
|
driver_name = BondingDeviceDriverGet(ptv->port_id);
|
|
|
|
if (strcmp(driver_name, "net_i40e") == 0)
|
dpdk/rss: move and change rss rte_flow functions
Move and adjust the base of RSS configuration from util-dpdk-i40e.c to
a new file that can be later utilized by other cards.
RSS configuration can be configured via rte_flow rules. This is useful
for possible future features such as specific header offload
(vxlan, nvgre) also implemented via rte_flow rules, as rte_flow
rules can be chained via groups and priorities.
i40e uses multiple different rte_flow rules to setup RSS. At first,
function DeviceSetRSSFlowQueues() is used to setup rx queues.
This rule matches all types of traffic, so the equivalent
to dpdk-testpmd pattern would be "pattern end"
This rule can not contain hash types (ipv4, ipv6 etc.) nor hash key.
The hash function used here is RTE_ETH_HASH_FUNCTION_DEFAULT.
The syntax in dpdk-testpmd for this rule with attributes:
port index == 0
used rx queue indices == 0 1 2 3
is as follows:
"flow create 0 ingress pattern end actions rss queues 0 1 2 3 end
func default / end"
The other rules configured by i40eDeviceSetRSSFlowIPv4() and
i40eDeviceSetRSSFlowIPv6() match specific type of traffic by l4 protocol
(none, TCP, UDP, SCTP). For example, pattern to match l3 ipv4 with l4
tcp traffic in dpdk-testpmd syntax would be equivalent of
"pattern eth / ipv4 / tcp / end".
These rules can not have rx queues configured, but have hash types
(l3 src and dst address). This means that the traffic distribution
is affected only by l3 addresses, independent of the l4 specifics.
Also these pattern matching rules have symmetric 6d5a
hash key configured. The length of the key is dependent on DPDK version.
The hash function (either RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ or
RTE_ETH_HASH_FUNCTION_TOEPLITZ, depending on DPKD version) used
in these rules hashes symmetricaly due to the symmetric hash key.
The syntax in dpdk-testpmd for rule to match ipv4-tcp traffic with
attributes:
port index == 0
<hash_key> == 52 bytes long 6d5a symmetric hash key
is as follows:
"flow create 0 ingress pattern eth / ipv4 / tcp / end actions rss types
ipv4-tcp l3-src-only l3-dst-only end queues end key <hash_key>
key_len 52 func toeplitz / end"
(queues need to be set to NULL)
Ticket: 7337
8 months ago
|
|
|
i40eDeviceSetRSS(ptv->port_id, ptv->threads, ptv->livedev->dev);
|
|
|
|
else if (strcmp(driver_name, "net_ixgbe") == 0)
|
|
|
|
ixgbeDeviceSetRSS(ptv->port_id, ptv->threads, ptv->livedev->dev);
|
|
|
|
else if (strcmp(driver_name, "net_ice") == 0)
|
|
|
|
iceDeviceSetRSS(ptv->port_id, ptv->threads, ptv->livedev->dev);
|
|
|
|
else if (strcmp(driver_name, "mlx5_pci") == 0)
|
|
|
|
mlx5DeviceSetRSS(ptv->port_id, ptv->threads, ptv->livedev->dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DevicePreClosePMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name)
|
|
|
|
{
|
|
|
|
if (strcmp(driver_name, "net_bonding") == 0) {
|
|
|
|
driver_name = BondingDeviceDriverGet(ptv->port_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (
|
|
|
|
#if RTE_VERSION > RTE_VERSION_NUM(20, 0, 0, 0)
|
|
|
|
strcmp(driver_name, "net_i40e") == 0 ||
|
|
|
|
#endif /* RTE_VERSION > RTE_VERSION_NUM(20, 0, 0, 0) */
|
|
|
|
strcmp(driver_name, "net_ixgbe") == 0 || strcmp(driver_name, "net_ice") == 0 ||
|
|
|
|
strcmp(driver_name, "mlx5_pci") == 0) {
|
|
|
|
// Flush the RSS rules that have been inserted in the post start section
|
|
|
|
struct rte_flow_error flush_error = { 0 };
|
|
|
|
int32_t retval = rte_flow_flush(ptv->port_id, &flush_error);
|
|
|
|
if (retval != 0) {
|
|
|
|
SCLogError("%s: unable to flush rte_flow rules: %s Flush error msg: %s",
|
|
|
|
ptv->livedev->dev, rte_strerror(-retval), flush_error.message);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attempts to retrieve NUMA node id on which the caller runs
|
|
|
|
* @return NUMA id on success, -1 otherwise
|
|
|
|
*/
|
|
|
|
static int GetNumaNode(void)
|
|
|
|
{
|
|
|
|
int cpu = 0;
|
|
|
|
int node = -1;
|
|
|
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
cpu = sched_getcpu();
|
|
|
|
node = numa_node_of_cpu(cpu);
|
|
|
|
#else
|
|
|
|
SCLogWarning("NUMA node retrieval is not supported on this OS.");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Registration Function for ReceiveDPDK.
|
|
|
|
* \todo Unit tests are needed for this module.
|
|
|
|
*/
|
|
|
|
void TmModuleReceiveDPDKRegister(void)
|
|
|
|
{
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].name = "ReceiveDPDK";
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].ThreadInit = ReceiveDPDKThreadInit;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].Func = NULL;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].PktAcqLoop = ReceiveDPDKLoop;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].PktAcqBreakLoop = NULL;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].ThreadExitPrintStats = ReceiveDPDKThreadExitStats;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].ThreadDeinit = ReceiveDPDKThreadDeinit;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].cap_flags = SC_CAP_NET_RAW;
|
|
|
|
tmm_modules[TMM_RECEIVEDPDK].flags = TM_FLAG_RECEIVE_TM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Registration Function for DecodeDPDK.
|
|
|
|
* \todo Unit tests are needed for this module.
|
|
|
|
*/
|
|
|
|
void TmModuleDecodeDPDKRegister(void)
|
|
|
|
{
|
|
|
|
tmm_modules[TMM_DECODEDPDK].name = "DecodeDPDK";
|
|
|
|
tmm_modules[TMM_DECODEDPDK].ThreadInit = DecodeDPDKThreadInit;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].Func = DecodeDPDK;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].ThreadExitPrintStats = NULL;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].ThreadDeinit = DecodeDPDKThreadDeinit;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].cap_flags = 0;
|
|
|
|
tmm_modules[TMM_DECODEDPDK].flags = TM_FLAG_DECODE_TM;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void DPDKDumpCounters(DPDKThreadVars *ptv)
|
|
|
|
{
|
|
|
|
/* Some NICs (e.g. Intel) do not support queue statistics and the drops can be fetched only on
|
|
|
|
* the port level. Therefore setting it to the first worker to have at least continuous update
|
|
|
|
* on the dropped packets. */
|
|
|
|
if (ptv->queue_id == 0) {
|
|
|
|
struct rte_eth_stats eth_stats;
|
|
|
|
int retval = rte_eth_stats_get(ptv->port_id, ð_stats);
|
|
|
|
if (unlikely(retval != 0)) {
|
|
|
|
SCLogError("%s: failed to get stats: %s", ptv->livedev->dev, rte_strerror(-retval));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_packets,
|
|
|
|
ptv->pkts + eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
|
|
|
|
SC_ATOMIC_SET(ptv->livedev->pkts,
|
|
|
|
eth_stats.ipackets + eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_rx_errs,
|
|
|
|
eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_imissed, eth_stats.imissed);
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_rx_no_mbufs, eth_stats.rx_nombuf);
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_ierrors, eth_stats.ierrors);
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_tx_errs, eth_stats.oerrors);
|
|
|
|
SC_ATOMIC_SET(
|
|
|
|
ptv->livedev->drop, eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
|
|
|
|
} else {
|
|
|
|
StatsSetUI64(ptv->tv, ptv->capture_dpdk_packets, ptv->pkts);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DPDKReleasePacket(Packet *p)
|
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
/* Need to be in copy mode and need to detect early release
|
|
|
|
where Ethernet header could not be set (and pseudo packet)
|
|
|
|
When enabling promiscuous mode on Intel cards, 2 ICMPv6 packets are generated.
|
|
|
|
These get into the infinite cycle between the NIC and the switch in some cases */
|
|
|
|
if ((p->dpdk_v.copy_mode == DPDK_COPY_MODE_TAP ||
|
|
|
|
(p->dpdk_v.copy_mode == DPDK_COPY_MODE_IPS && !PacketCheckAction(p, ACTION_DROP)))
|
|
|
|
#if defined(RTE_LIBRTE_I40E_PMD) || defined(RTE_LIBRTE_IXGBE_PMD) || defined(RTE_LIBRTE_ICE_PMD)
|
|
|
|
&& !(PacketIsICMPv6(p) && PacketGetICMPv6(p)->type == 143)
|
|
|
|
#endif
|
|
|
|
) {
|
|
|
|
BUG_ON(PKT_IS_PSEUDOPKT(p));
|
|
|
|
retval =
|
|
|
|
rte_eth_tx_burst(p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1);
|
|
|
|
// rte_eth_tx_burst can return only 0 (failure) or 1 (success) because we are only
|
|
|
|
// transmitting burst of size 1 and the function rte_eth_tx_burst returns number of
|
|
|
|
// successfully sent packets.
|
|
|
|
if (unlikely(retval < 1)) {
|
|
|
|
// sometimes a repeated transmit can help to send out the packet
|
|
|
|
rte_delay_us(DPDK_BURST_TX_WAIT_US);
|
|
|
|
retval = rte_eth_tx_burst(
|
|
|
|
p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1);
|
|
|
|
if (unlikely(retval < 1)) {
|
|
|
|
SCLogDebug("Unable to transmit the packet on port %u queue %u",
|
|
|
|
p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id);
|
|
|
|
rte_pktmbuf_free(p->dpdk_v.mbuf);
|
|
|
|
p->dpdk_v.mbuf = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
rte_pktmbuf_free(p->dpdk_v.mbuf);
|
|
|
|
p->dpdk_v.mbuf = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
PacketFreeOrRelease(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static TmEcode ReceiveDPDKLoopInit(ThreadVars *tv, DPDKThreadVars *ptv)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
// Indicate that the thread is actually running its application level
|
|
|
|
// code (i.e., it can poll packets)
|
|
|
|
TmThreadsSetFlag(tv, THV_RUNNING);
|
|
|
|
PacketPoolWait();
|
|
|
|
|
|
|
|
rte_eth_stats_reset(ptv->port_id);
|
|
|
|
rte_eth_xstats_reset(ptv->port_id);
|
|
|
|
|
|
|
|
if (ptv->intr_enabled && !InterruptsRXEnable(ptv->port_id, ptv->queue_id))
|
|
|
|
SCReturnInt(TM_ECODE_FAILED);
|
|
|
|
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void LoopHandleTimeoutOnIdle(ThreadVars *tv)
|
|
|
|
{
|
|
|
|
static thread_local uint64_t last_timeout_msec = 0;
|
|
|
|
SCTime_t t = TimeGet();
|
|
|
|
uint64_t msecs = SCTIME_MSECS(t);
|
|
|
|
if (msecs > last_timeout_msec + 100) {
|
|
|
|
TmThreadsCaptureHandleTimeout(tv, NULL);
|
|
|
|
last_timeout_msec = msecs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Decides if it should retry the packet poll or continue with the packet processing
|
|
|
|
* \return true if the poll should be retried, false otherwise
|
|
|
|
*/
|
|
|
|
static inline bool RXPacketCountHeuristic(ThreadVars *tv, DPDKThreadVars *ptv, uint16_t nb_rx)
|
|
|
|
{
|
|
|
|
static thread_local uint32_t zero_pkt_polls_cnt = 0;
|
|
|
|
|
|
|
|
if (nb_rx > 0) {
|
|
|
|
zero_pkt_polls_cnt = 0;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
LoopHandleTimeoutOnIdle(tv);
|
|
|
|
if (!ptv->intr_enabled)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
zero_pkt_polls_cnt++;
|
|
|
|
if (zero_pkt_polls_cnt <= MIN_ZERO_POLL_COUNT)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
uint32_t pwd_idle_hint = InterruptsSleepHeuristic(zero_pkt_polls_cnt);
|
|
|
|
if (pwd_idle_hint < STANDARD_SLEEP_TIME_US) {
|
|
|
|
rte_delay_us(pwd_idle_hint);
|
|
|
|
} else {
|
|
|
|
InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, true);
|
|
|
|
struct rte_epoll_event event;
|
|
|
|
rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1, MAX_EPOLL_TIMEOUT_MS);
|
|
|
|
InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, false);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Initializes a packet from an mbuf
|
|
|
|
* \return true if the packet was initialized successfully, false otherwise
|
|
|
|
*/
|
|
|
|
static inline Packet *PacketInitFromMbuf(DPDKThreadVars *ptv, struct rte_mbuf *mbuf)
|
|
|
|
{
|
|
|
|
Packet *p = PacketGetFromQueueOrAlloc();
|
|
|
|
if (unlikely(p == NULL)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
PKT_SET_SRC(p, PKT_SRC_WIRE);
|
|
|
|
p->datalink = LINKTYPE_ETHERNET;
|
|
|
|
if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
|
|
|
|
p->flags |= PKT_IGNORE_CHECKSUM;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->ts = TimeGet();
|
|
|
|
p->dpdk_v.mbuf = mbuf;
|
|
|
|
p->ReleasePacket = DPDKReleasePacket;
|
|
|
|
p->dpdk_v.copy_mode = ptv->copy_mode;
|
|
|
|
p->dpdk_v.out_port_id = ptv->out_port_id;
|
|
|
|
p->dpdk_v.out_queue_id = ptv->queue_id;
|
|
|
|
p->livedev = ptv->livedev;
|
|
|
|
|
|
|
|
if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
|
|
|
|
p->flags |= PKT_IGNORE_CHECKSUM;
|
|
|
|
} else if (ptv->checksum_mode == CHECKSUM_VALIDATION_OFFLOAD) {
|
|
|
|
uint64_t ol_flags = p->dpdk_v.mbuf->ol_flags;
|
|
|
|
if ((ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK) == RTE_MBUF_F_RX_IP_CKSUM_GOOD &&
|
|
|
|
(ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) == RTE_MBUF_F_RX_L4_CKSUM_GOOD) {
|
|
|
|
SCLogDebug("HW detected GOOD IP and L4 chsum, ignoring validation");
|
|
|
|
p->flags |= PKT_IGNORE_CHECKSUM;
|
|
|
|
} else {
|
|
|
|
if ((ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK) == RTE_MBUF_F_RX_IP_CKSUM_BAD) {
|
|
|
|
SCLogDebug("HW detected BAD IP checksum");
|
|
|
|
// chsum recalc will not be triggered but rule keyword check will be
|
|
|
|
p->l3.csum_set = true;
|
|
|
|
p->l3.csum = 0;
|
|
|
|
}
|
|
|
|
if ((ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) == RTE_MBUF_F_RX_L4_CKSUM_BAD) {
|
|
|
|
SCLogDebug("HW detected BAD L4 chsum");
|
|
|
|
p->l4.csum_set = true;
|
|
|
|
p->l4.csum = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void DPDKSegmentedMbufWarning(struct rte_mbuf *mbuf)
|
|
|
|
{
|
|
|
|
static thread_local bool segmented_mbufs_warned = false;
|
|
|
|
if (!segmented_mbufs_warned && !rte_pktmbuf_is_contiguous(mbuf)) {
|
|
|
|
char warn_s[] = "Segmented mbufs detected! Redmine Ticket #6012 "
|
|
|
|
"Check your configuration or report the issue";
|
|
|
|
enum rte_proc_type_t eal_t = rte_eal_process_type();
|
|
|
|
if (eal_t == RTE_PROC_SECONDARY) {
|
|
|
|
SCLogWarning("%s. To avoid segmented mbufs, "
|
|
|
|
"try to increase mbuf size in your primary application",
|
|
|
|
warn_s);
|
|
|
|
} else if (eal_t == RTE_PROC_PRIMARY) {
|
|
|
|
SCLogWarning("%s. To avoid segmented mbufs, "
|
|
|
|
"try to increase MTU in your suricata.yaml",
|
|
|
|
warn_s);
|
|
|
|
}
|
|
|
|
|
|
|
|
segmented_mbufs_warned = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void HandleShutdown(DPDKThreadVars *ptv)
|
|
|
|
{
|
|
|
|
SCLogDebug("Stopping Suricata!");
|
|
|
|
SC_ATOMIC_ADD(ptv->workers_sync->worker_checked_in, 1);
|
|
|
|
while (SC_ATOMIC_GET(ptv->workers_sync->worker_checked_in) < ptv->workers_sync->worker_cnt) {
|
|
|
|
rte_delay_us(10);
|
|
|
|
}
|
|
|
|
if (ptv->queue_id == 0) {
|
|
|
|
rte_delay_us(20); // wait for all threads to get out of the sync loop
|
|
|
|
SC_ATOMIC_SET(ptv->workers_sync->worker_checked_in, 0);
|
|
|
|
// If Suricata runs in peered mode, the peer threads might still want to send
|
|
|
|
// packets to our port. Instead, we know, that we are done with the peered port, so
|
|
|
|
// we stop it. The peered threads will stop our port.
|
|
|
|
if (ptv->copy_mode == DPDK_COPY_MODE_TAP || ptv->copy_mode == DPDK_COPY_MODE_IPS) {
|
|
|
|
rte_eth_dev_stop(ptv->out_port_id);
|
|
|
|
} else {
|
|
|
|
// in IDS we stop our port - no peer threads are running
|
|
|
|
rte_eth_dev_stop(ptv->port_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DPDKDumpCounters(ptv);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void PeriodicDPDKDumpCounters(DPDKThreadVars *ptv)
|
|
|
|
{
|
|
|
|
static thread_local SCTime_t last_dump = { 0 };
|
|
|
|
SCTime_t current_time = TimeGet();
|
|
|
|
/* Trigger one dump of stats every second */
|
|
|
|
if (current_time.secs != last_dump.secs) {
|
|
|
|
DPDKDumpCounters(ptv);
|
|
|
|
last_dump = current_time;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Main DPDK reading Loop function
|
|
|
|
*/
|
|
|
|
static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
DPDKThreadVars *ptv = (DPDKThreadVars *)data;
|
|
|
|
ptv->slot = ((TmSlot *)slot)->slot_next;
|
|
|
|
TmEcode ret = ReceiveDPDKLoopInit(tv, ptv);
|
|
|
|
if (ret != TM_ECODE_OK) {
|
|
|
|
SCReturnInt(ret);
|
|
|
|
}
|
|
|
|
while (true) {
|
|
|
|
if (unlikely(suricata_ctl_flags != 0)) {
|
|
|
|
HandleShutdown(ptv);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t nb_rx =
|
|
|
|
rte_eth_rx_burst(ptv->port_id, ptv->queue_id, ptv->received_mbufs, BURST_SIZE);
|
|
|
|
if (RXPacketCountHeuristic(tv, ptv, nb_rx)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptv->pkts += (uint64_t)nb_rx;
|
|
|
|
for (uint16_t i = 0; i < nb_rx; i++) {
|
|
|
|
Packet *p = PacketInitFromMbuf(ptv, ptv->received_mbufs[i]);
|
|
|
|
if (p == NULL) {
|
|
|
|
rte_pktmbuf_free(ptv->received_mbufs[i]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
DPDKSegmentedMbufWarning(ptv->received_mbufs[i]);
|
|
|
|
PacketSetData(p, rte_pktmbuf_mtod(p->dpdk_v.mbuf, uint8_t *),
|
|
|
|
rte_pktmbuf_pkt_len(p->dpdk_v.mbuf));
|
|
|
|
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
|
|
|
|
TmqhOutputPacketpool(ptv->tv, p);
|
|
|
|
DPDKFreeMbufArray(ptv->received_mbufs, nb_rx - i - 1, i + 1);
|
|
|
|
SCReturnInt(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PeriodicDPDKDumpCounters(ptv);
|
|
|
|
StatsSyncCountersIfSignalled(tv);
|
|
|
|
}
|
|
|
|
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Init function for ReceiveDPDK.
|
|
|
|
*
|
|
|
|
* \param tv pointer to ThreadVars
|
|
|
|
* \param initdata pointer to the interface passed from the user
|
|
|
|
* \param data pointer gets populated with DPDKThreadVars
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void **data)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
int retval, thread_numa;
|
|
|
|
DPDKThreadVars *ptv = NULL;
|
|
|
|
DPDKIfaceConfig *dpdk_config = (DPDKIfaceConfig *)initdata;
|
|
|
|
|
|
|
|
if (initdata == NULL) {
|
|
|
|
SCLogError("DPDK configuration is NULL in thread initialization");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptv = SCCalloc(1, sizeof(DPDKThreadVars));
|
|
|
|
if (unlikely(ptv == NULL)) {
|
|
|
|
SCLogError("Unable to allocate memory");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptv->tv = tv;
|
|
|
|
ptv->pkts = 0;
|
|
|
|
ptv->bytes = 0;
|
|
|
|
ptv->livedev = LiveGetDevice(dpdk_config->iface);
|
|
|
|
|
|
|
|
ptv->capture_dpdk_packets = StatsRegisterCounter("capture.packets", ptv->tv);
|
|
|
|
ptv->capture_dpdk_rx_errs = StatsRegisterCounter("capture.rx_errors", ptv->tv);
|
|
|
|
ptv->capture_dpdk_tx_errs = StatsRegisterCounter("capture.tx_errors", ptv->tv);
|
|
|
|
ptv->capture_dpdk_imissed = StatsRegisterCounter("capture.dpdk.imissed", ptv->tv);
|
|
|
|
ptv->capture_dpdk_rx_no_mbufs = StatsRegisterCounter("capture.dpdk.no_mbufs", ptv->tv);
|
|
|
|
ptv->capture_dpdk_ierrors = StatsRegisterCounter("capture.dpdk.ierrors", ptv->tv);
|
|
|
|
|
|
|
|
ptv->copy_mode = dpdk_config->copy_mode;
|
|
|
|
ptv->checksum_mode = dpdk_config->checksum_mode;
|
|
|
|
|
|
|
|
ptv->threads = dpdk_config->threads;
|
|
|
|
ptv->intr_enabled = (dpdk_config->flags & DPDK_IRQ_MODE) ? true : false;
|
|
|
|
ptv->port_id = dpdk_config->port_id;
|
|
|
|
ptv->out_port_id = dpdk_config->out_port_id;
|
|
|
|
ptv->port_socket_id = dpdk_config->socket_id;
|
|
|
|
|
|
|
|
thread_numa = GetNumaNode();
|
|
|
|
if (thread_numa >= 0 && ptv->port_socket_id != SOCKET_ID_ANY &&
|
|
|
|
thread_numa != ptv->port_socket_id) {
|
|
|
|
SC_ATOMIC_ADD(dpdk_config->inconsistent_numa_cnt, 1);
|
|
|
|
SCLogPerf("%s: NIC is on NUMA %d, thread on NUMA %d", dpdk_config->iface,
|
|
|
|
ptv->port_socket_id, thread_numa);
|
|
|
|
}
|
|
|
|
|
|
|
|
ptv->workers_sync = dpdk_config->workers_sync;
|
|
|
|
uint16_t queue_id = SC_ATOMIC_ADD(dpdk_config->queue_id, 1);
|
|
|
|
ptv->queue_id = queue_id;
|
|
|
|
|
|
|
|
// the last thread starts the device
|
|
|
|
if (queue_id == dpdk_config->threads - 1) {
|
|
|
|
retval = rte_eth_dev_start(ptv->port_id);
|
|
|
|
if (retval < 0) {
|
|
|
|
SCLogError("%s: error (%s) during device startup", dpdk_config->iface,
|
|
|
|
rte_strerror(-retval));
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct rte_eth_dev_info dev_info;
|
|
|
|
retval = rte_eth_dev_info_get(ptv->port_id, &dev_info);
|
|
|
|
if (retval != 0) {
|
|
|
|
SCLogError("%s: error (%s) when getting device info", dpdk_config->iface,
|
|
|
|
rte_strerror(-retval));
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t timeout = dpdk_config->linkup_timeout * 10;
|
|
|
|
while (timeout > 0) {
|
|
|
|
struct rte_eth_link link = { 0 };
|
|
|
|
retval = rte_eth_link_get_nowait(ptv->port_id, &link);
|
|
|
|
if (retval != 0) {
|
|
|
|
if (retval == -ENOTSUP) {
|
|
|
|
SCLogInfo("%s: link status not supported, skipping", dpdk_config->iface);
|
|
|
|
} else {
|
|
|
|
SCLogInfo("%s: error (%s) when getting link status, skipping",
|
|
|
|
dpdk_config->iface, rte_strerror(-retval));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (link.link_status) {
|
|
|
|
char link_status_str[RTE_ETH_LINK_MAX_STR_LEN];
|
|
|
|
#if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0)
|
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
|
|
rte_eth_link_to_str(link_status_str, sizeof(link_status_str), &link);
|
|
|
|
#pragma GCC diagnostic pop
|
|
|
|
#else
|
|
|
|
snprintf(link_status_str, sizeof(link_status_str),
|
|
|
|
"Link Up, speed %u Mbps, %s", // 22 chars + 10 for digits + 11 for duplex
|
|
|
|
link.link_speed,
|
|
|
|
(link.link_duplex == ETH_LINK_FULL_DUPLEX) ? "full-duplex" : "half-duplex");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
SCLogInfo("%s: %s", dpdk_config->iface, link_status_str);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
rte_delay_ms(100);
|
|
|
|
timeout--;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dpdk_config->linkup_timeout && timeout == 0) {
|
|
|
|
SCLogWarning("%s: link is down, trying to continue anyway", dpdk_config->iface);
|
|
|
|
}
|
|
|
|
|
|
|
|
// some PMDs requires additional actions only after the device has started
|
|
|
|
DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name);
|
|
|
|
|
|
|
|
uint16_t inconsistent_numa_cnt = SC_ATOMIC_GET(dpdk_config->inconsistent_numa_cnt);
|
|
|
|
if (inconsistent_numa_cnt > 0 && ptv->port_socket_id != SOCKET_ID_ANY) {
|
|
|
|
SCLogWarning("%s: NIC is on NUMA %d, %u threads on different NUMA node(s)",
|
|
|
|
dpdk_config->iface, ptv->port_socket_id, inconsistent_numa_cnt);
|
|
|
|
} else if (ptv->port_socket_id == SOCKET_ID_ANY && rte_socket_count() > 1) {
|
|
|
|
SCLogNotice(
|
|
|
|
"%s: unable to determine NIC's NUMA node, degraded performance can be expected",
|
|
|
|
dpdk_config->iface);
|
|
|
|
}
|
|
|
|
if (ptv->intr_enabled) {
|
|
|
|
rte_spinlock_init(&intr_lock[ptv->port_id]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*data = (void *)ptv;
|
|
|
|
dpdk_config->DerefFunc(dpdk_config);
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
|
|
|
|
fail:
|
|
|
|
if (dpdk_config != NULL)
|
|
|
|
dpdk_config->DerefFunc(dpdk_config);
|
|
|
|
if (ptv != NULL)
|
|
|
|
SCFree(ptv);
|
|
|
|
SCReturnInt(TM_ECODE_FAILED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void PrintDPDKPortXstats(uint16_t port_id, const char *port_name)
|
|
|
|
{
|
|
|
|
struct rte_eth_xstat *xstats;
|
|
|
|
struct rte_eth_xstat_name *xstats_names;
|
|
|
|
|
|
|
|
int32_t ret = rte_eth_xstats_get(port_id, NULL, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
FatalError("Error (%s) getting count of rte_eth_xstats failed on port %s",
|
|
|
|
rte_strerror(-ret), port_name);
|
|
|
|
}
|
|
|
|
uint16_t len = (uint16_t)ret;
|
|
|
|
|
|
|
|
xstats = SCCalloc(len, sizeof(*xstats));
|
|
|
|
if (xstats == NULL)
|
|
|
|
FatalError("Failed to allocate memory for the rte_eth_xstat structure");
|
|
|
|
|
|
|
|
ret = rte_eth_xstats_get(port_id, xstats, len);
|
|
|
|
if (ret < 0 || ret > len) {
|
|
|
|
SCFree(xstats);
|
|
|
|
FatalError("Error (%s) getting rte_eth_xstats failed on port %s", rte_strerror(-ret),
|
|
|
|
port_name);
|
|
|
|
}
|
|
|
|
xstats_names = SCCalloc(len, sizeof(*xstats_names));
|
|
|
|
if (xstats_names == NULL) {
|
|
|
|
SCFree(xstats);
|
|
|
|
FatalError("Failed to allocate memory for the rte_eth_xstat_name array");
|
|
|
|
}
|
|
|
|
ret = rte_eth_xstats_get_names(port_id, xstats_names, len);
|
|
|
|
if (ret < 0 || ret > len) {
|
|
|
|
SCFree(xstats);
|
|
|
|
SCFree(xstats_names);
|
|
|
|
FatalError("Error (%s) getting names of rte_eth_xstats failed on port %s",
|
|
|
|
rte_strerror(-ret), port_name);
|
|
|
|
}
|
|
|
|
for (int32_t i = 0; i < len; i++) {
|
|
|
|
if (xstats[i].value > 0)
|
|
|
|
SCLogPerf("Port %u (%s) - %s: %" PRIu64, port_id, port_name, xstats_names[i].name,
|
|
|
|
xstats[i].value);
|
|
|
|
}
|
|
|
|
|
|
|
|
SCFree(xstats);
|
|
|
|
SCFree(xstats_names);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief This function prints stats to the screen at exit.
|
|
|
|
* \param tv pointer to ThreadVars
|
|
|
|
* \param data pointer that gets cast into DPDKThreadVars for ptv
|
|
|
|
*/
|
|
|
|
static void ReceiveDPDKThreadExitStats(ThreadVars *tv, void *data)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
int retval;
|
|
|
|
DPDKThreadVars *ptv = (DPDKThreadVars *)data;
|
|
|
|
|
|
|
|
if (ptv->queue_id == 0) {
|
|
|
|
struct rte_eth_stats eth_stats;
|
|
|
|
PrintDPDKPortXstats(ptv->port_id, ptv->livedev->dev);
|
|
|
|
retval = rte_eth_stats_get(ptv->port_id, ð_stats);
|
|
|
|
if (unlikely(retval != 0)) {
|
|
|
|
SCLogError("%s: failed to get stats (%s)", ptv->livedev->dev, strerror(-retval));
|
|
|
|
SCReturn;
|
|
|
|
}
|
|
|
|
SCLogPerf("%s: total RX stats: packets %" PRIu64 " bytes: %" PRIu64 " missed: %" PRIu64
|
|
|
|
" errors: %" PRIu64 " nombufs: %" PRIu64,
|
|
|
|
ptv->livedev->dev, eth_stats.ipackets, eth_stats.ibytes, eth_stats.imissed,
|
|
|
|
eth_stats.ierrors, eth_stats.rx_nombuf);
|
|
|
|
if (ptv->copy_mode == DPDK_COPY_MODE_TAP || ptv->copy_mode == DPDK_COPY_MODE_IPS)
|
|
|
|
SCLogPerf("%s: total TX stats: packets %" PRIu64 " bytes: %" PRIu64 " errors: %" PRIu64,
|
|
|
|
ptv->livedev->dev, eth_stats.opackets, eth_stats.obytes, eth_stats.oerrors);
|
|
|
|
}
|
|
|
|
|
|
|
|
DPDKDumpCounters(ptv);
|
|
|
|
SCLogPerf("(%s) received packets %" PRIu64, tv->name, ptv->pkts);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief DeInit function closes dpdk at exit.
|
|
|
|
* \param tv pointer to ThreadVars
|
|
|
|
* \param data pointer that gets cast into DPDKThreadVars for ptv
|
|
|
|
*/
|
|
|
|
static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *tv, void *data)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
DPDKThreadVars *ptv = (DPDKThreadVars *)data;
|
|
|
|
|
|
|
|
if (ptv->queue_id == 0) {
|
|
|
|
struct rte_eth_dev_info dev_info;
|
|
|
|
int retval = rte_eth_dev_info_get(ptv->port_id, &dev_info);
|
|
|
|
if (retval != 0) {
|
|
|
|
SCLogError("%s: error (%s) when getting device info", ptv->livedev->dev,
|
|
|
|
rte_strerror(-retval));
|
|
|
|
SCReturnInt(TM_ECODE_FAILED);
|
|
|
|
}
|
|
|
|
|
|
|
|
DevicePreClosePMDSpecificActions(ptv, dev_info.driver_name);
|
|
|
|
|
|
|
|
if (ptv->workers_sync) {
|
|
|
|
SCFree(ptv->workers_sync);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SCFree(ptv);
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief This function passes off to link type decoders.
|
|
|
|
*
|
|
|
|
* DecodeDPDK decodes packets from DPDK and passes
|
|
|
|
* them off to the proper link type decoder.
|
|
|
|
*
|
|
|
|
* \param t pointer to ThreadVars
|
|
|
|
* \param p pointer to the current packet
|
|
|
|
* \param data pointer that gets cast into DPDKThreadVars for ptv
|
|
|
|
*/
|
|
|
|
static TmEcode DecodeDPDK(ThreadVars *tv, Packet *p, void *data)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
DecodeThreadVars *dtv = (DecodeThreadVars *)data;
|
|
|
|
|
|
|
|
BUG_ON(PKT_IS_PSEUDOPKT(p));
|
|
|
|
|
|
|
|
/* update counters */
|
|
|
|
DecodeUpdatePacketCounters(tv, dtv, p);
|
|
|
|
|
|
|
|
/* If suri has set vlan during reading, we increase vlan counter */
|
|
|
|
if (p->vlan_idx) {
|
|
|
|
StatsIncr(tv, dtv->counter_vlan);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* call the decoder */
|
|
|
|
DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
|
|
|
|
|
|
|
|
PacketDecodeFinalize(tv, dtv, p);
|
|
|
|
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static TmEcode DecodeDPDKThreadInit(ThreadVars *tv, const void *initdata, void **data)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
DecodeThreadVars *dtv = NULL;
|
|
|
|
|
|
|
|
dtv = DecodeThreadVarsAlloc(tv);
|
|
|
|
|
|
|
|
if (dtv == NULL)
|
|
|
|
SCReturnInt(TM_ECODE_FAILED);
|
|
|
|
|
|
|
|
DecodeRegisterPerfCounters(dtv, tv);
|
|
|
|
|
|
|
|
*data = (void *)dtv;
|
|
|
|
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static TmEcode DecodeDPDKThreadDeinit(ThreadVars *tv, void *data)
|
|
|
|
{
|
|
|
|
SCEnter();
|
|
|
|
if (data != NULL)
|
|
|
|
DecodeThreadVarsFree(tv, data);
|
|
|
|
SCReturnInt(TM_ECODE_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* HAVE_DPDK */
|
|
|
|
/* eof */
|
|
|
|
/**
|
|
|
|
* @}
|
|
|
|
*/
|