mirror of https://github.com/OISF/suricata
dpdk: rework hugepage hints to use per-numa information
Previous integration of hugepage analysis only fetched data from /proc/meminfo. However this proved to be often deceiving mainly for providing only global information and not taking into account different hugepage sizes (e.g. 1GB hugepages) and different NUMA nodes. Ticket: #6419pull/10204/head
parent
c28cc93e23
commit
ca6f7c2d00
@ -0,0 +1,411 @@
|
||||
/* Copyright (C) 2023 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Lukas Sismis <lsismis@oisf.net>
|
||||
*/
|
||||
|
||||
#include "suricata.h"
|
||||
#include "util-debug.h"
|
||||
#include "util-hugepages.h"
|
||||
|
||||
static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index);
|
||||
static uint16_t SystemNodeCountGet(void);
|
||||
static void SystemHugepagePerNodeGetHugepageSizes(
|
||||
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes);
|
||||
static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt);
|
||||
static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node);
|
||||
static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h);
|
||||
static void SystemHugepageNodeInfoDestroy(NodeInfo *n);
|
||||
static void SystemHugepageNodeInfoDump(NodeInfo *n);
|
||||
static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s);
|
||||
|
||||
static bool SystemHugepageSupported(void)
|
||||
{
|
||||
#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */
|
||||
}
|
||||
|
||||
// block of all hugepage-specific internal functions
|
||||
#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
|
||||
|
||||
/**
|
||||
* \brief Linux-specific function to detect number of NUMA nodes on the system
|
||||
* \returns number of NUMA nodes, 0 on error
|
||||
*/
|
||||
static uint16_t SystemNodeCountGetLinux(void)
|
||||
{
|
||||
char dir_path[] = "/sys/devices/system/node/";
|
||||
DIR *dir = opendir(dir_path);
|
||||
if (dir == NULL) {
|
||||
SCLogError("unable to open %s", dir_path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t count = 0;
|
||||
struct dirent *entry;
|
||||
while ((entry = readdir(dir)) != NULL) {
|
||||
char d_name[] = "node";
|
||||
if (entry->d_type == DT_DIR && strncmp(entry->d_name, d_name, strlen(d_name)) == 0)
|
||||
count++;
|
||||
}
|
||||
closedir(dir);
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Linux-specific function to detect number of unique hugepage sizes
|
||||
* \param[in] node_index index of the NUMA node
|
||||
* \returns number of hugepage sizes, 0 on error
|
||||
*/
|
||||
static uint16_t SystemHugepageSizesCntPerNodeGetLinux(uint16_t node_index)
|
||||
{
|
||||
char dir_path[256];
|
||||
snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index);
|
||||
DIR *dir = opendir(dir_path);
|
||||
if (dir == NULL) {
|
||||
SCLogError("unable to open %s", dir_path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t count = 0;
|
||||
struct dirent *entry;
|
||||
while ((entry = readdir(dir)) != NULL) {
|
||||
char d_name[] = "hugepages-";
|
||||
if (entry->d_type == DT_DIR && strncmp(entry->d_name, d_name, strlen(d_name)) == 0)
|
||||
count++;
|
||||
}
|
||||
closedir(dir);
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Linux-specific function to detect unique hugepage sizes
|
||||
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
|
||||
* \param[in] node_index index of the NUMA node
|
||||
* \param[in] hp_sizes_cnt number of the unique hugepage sizes
|
||||
* \param[out] hp_sizes a pointer to the array of hugepage sizes
|
||||
*/
|
||||
static void SystemHugepagePerNodeGetHugepageSizesLinux(
|
||||
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes)
|
||||
{
|
||||
char dir_path[256];
|
||||
snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index);
|
||||
DIR *dir = opendir(dir_path);
|
||||
if (dir == NULL) {
|
||||
SCLogError("unable to open %s", dir_path);
|
||||
return;
|
||||
}
|
||||
uint16_t index = 0;
|
||||
struct dirent *entry;
|
||||
while ((entry = readdir(dir)) != NULL) {
|
||||
if (entry->d_type == DT_DIR && strncmp(entry->d_name, "hugepages-", 10) == 0) {
|
||||
sscanf(entry->d_name, "hugepages-%ukB", &(hp_sizes[index]));
|
||||
index++;
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Linux-specific function to detect number of unique hugepage sizes
|
||||
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
|
||||
* \param[out] hugepages a pointer to the array of hugepage info structures
|
||||
* \param[in] hp_sizes a pointer to the array of hugepage sizes
|
||||
* \param[in] hp_sizes_cnt number of hugepage sizes
|
||||
* \param[in] node_index index of the NUMA node
|
||||
* \returns 0 on success, negative number on error
|
||||
*/
|
||||
static int16_t SystemHugepagePerNodeGetHugepageInfoLinux(
|
||||
HugepageInfo *hugepages, uint32_t *hp_sizes, uint16_t hp_sizes_cnt, uint16_t node_index)
|
||||
{
|
||||
for (int16_t i = 0; i < hp_sizes_cnt; i++) {
|
||||
hugepages[i].size_kb = hp_sizes[i];
|
||||
char path[256];
|
||||
snprintf(path, sizeof(path),
|
||||
"/sys/devices/system/node/node%hu/hugepages/hugepages-%ukB/nr_hugepages",
|
||||
node_index, hp_sizes[i]);
|
||||
FILE *f = fopen(path, "r");
|
||||
if (!f) {
|
||||
SCLogError("unable to open %s", path);
|
||||
return -SC_EEXIST;
|
||||
}
|
||||
if (fscanf(f, "%hu", &hugepages[i].allocated) != 1) {
|
||||
SCLogError("failed to read the total number of allocated hugepages (%ukB) on node %hu",
|
||||
hp_sizes[i], node_index);
|
||||
fclose(f);
|
||||
return -SC_EINVAL;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
snprintf(path, sizeof(path),
|
||||
"/sys/devices/system/node/node%hu/hugepages/hugepages-%ukB/free_hugepages",
|
||||
node_index, hp_sizes[i]);
|
||||
f = fopen(path, "r");
|
||||
if (!f) {
|
||||
SCLogError("unable to open %s", path);
|
||||
return -SC_EEXIST;
|
||||
}
|
||||
if (fscanf(f, "%hu", &hugepages[i].free) != 1) {
|
||||
SCLogError("failed to read the total number of free hugepages (%ukB) on node %hu",
|
||||
hp_sizes[i], node_index);
|
||||
fclose(f);
|
||||
return -SC_EINVAL;
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */
|
||||
|
||||
/**
|
||||
* \brief The function gathers information about hugepages on a given node
|
||||
* \param[in] node_index index of the NUMA node
|
||||
* \param[out] node a pointer to the structure to hold hugepage info
|
||||
* \returns 0 on success, negative number on error
|
||||
*/
|
||||
static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node)
|
||||
{
|
||||
uint16_t hp_sizes_cnt = SystemHugepageSizesCntPerNodeGet(node_index);
|
||||
if (hp_sizes_cnt == 0) {
|
||||
SCLogError("hugepages not found for node %d", node_index);
|
||||
return -SC_EEXIST;
|
||||
}
|
||||
uint32_t *hp_sizes = SCCalloc(hp_sizes_cnt, sizeof(*hp_sizes));
|
||||
if (hp_sizes == NULL) {
|
||||
FatalError("failed to allocate memory for hugepage info");
|
||||
}
|
||||
SystemHugepagePerNodeGetHugepageSizes(node_index, hp_sizes_cnt, hp_sizes);
|
||||
|
||||
node->hugepages = SystemHugepageHugepageInfoCreate(hp_sizes_cnt);
|
||||
node->num_hugepage_sizes = hp_sizes_cnt;
|
||||
|
||||
int16_t ret = 0;
|
||||
#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
|
||||
ret = SystemHugepagePerNodeGetHugepageInfoLinux(
|
||||
node->hugepages, hp_sizes, node->num_hugepage_sizes, node_index);
|
||||
#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */
|
||||
|
||||
SCFree(hp_sizes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The function detects number of NUMA nodes on the system
|
||||
* \returns 0 if detection is unsuccessful, otherwise number of detected nodes
|
||||
*/
|
||||
static uint16_t SystemNodeCountGet(void)
|
||||
{
|
||||
#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
|
||||
return SystemNodeCountGetLinux();
|
||||
#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The function detects the number of unique hugepage sizes
|
||||
* \returns 0 if detection is unsuccessful, otherwise number of hugepage sizes
|
||||
*/
|
||||
static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index)
|
||||
{
|
||||
#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
|
||||
return SystemHugepageSizesCntPerNodeGetLinux(node_index);
|
||||
#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The function fills an array with unique hugepage sizes
|
||||
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
|
||||
* \param[in] node_index index of the NUMA node
|
||||
* \param[in] hp_sizes_cnt number of hugepage sizes
|
||||
* \param[out] hp_sizes a pointer to the array of hugepage sizes
|
||||
*/
|
||||
static void SystemHugepagePerNodeGetHugepageSizes(
|
||||
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes)
|
||||
{
|
||||
#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
|
||||
return SystemHugepagePerNodeGetHugepageSizesLinux(node_index, hp_sizes_cnt, hp_sizes);
|
||||
#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */
|
||||
}
|
||||
|
||||
static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt)
|
||||
{
|
||||
HugepageInfo *h = SCCalloc(hp_size_cnt, sizeof(*h));
|
||||
if (h == NULL) {
|
||||
FatalError("failed to allocate hugepage info array");
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h)
|
||||
{
|
||||
if (h != NULL)
|
||||
SCFree(h);
|
||||
}
|
||||
|
||||
static void SystemHugepageNodeInfoDestroy(NodeInfo *n)
|
||||
{
|
||||
if (n == NULL)
|
||||
return;
|
||||
|
||||
SystemHugepageHugepageInfoDestroy(n->hugepages);
|
||||
}
|
||||
|
||||
static void SystemHugepageNodeInfoDump(NodeInfo *n)
|
||||
{
|
||||
if (n == NULL)
|
||||
return;
|
||||
|
||||
for (uint16_t i = 0; i < n->num_hugepage_sizes; i++) {
|
||||
SCLogDebug("Hugepage size - %dkB - allocated: %d free: %d", n->hugepages[i].size_kb,
|
||||
n->hugepages[i].allocated, n->hugepages[i].free);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The function prints out the hugepage snapshot
|
||||
* \param[in] s a pointer to the snapshot
|
||||
*/
|
||||
static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s)
|
||||
{
|
||||
if (s == NULL)
|
||||
return;
|
||||
|
||||
for (uint16_t i = 0; i < s->num_nodes; i++) {
|
||||
SCLogDebug("NUMA Node %d", i);
|
||||
SystemHugepageNodeInfoDump(&(s->nodes[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void SystemHugepageSnapshotDestroy(SystemHugepageSnapshot *s)
|
||||
{
|
||||
if (s == NULL)
|
||||
return;
|
||||
|
||||
for (uint16_t i = 0; i < s->num_nodes; i++) {
|
||||
SystemHugepageNodeInfoDestroy(&(s->nodes[i]));
|
||||
}
|
||||
SCFree(s->nodes);
|
||||
SCFree(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The function creates a snapshot of the system's hugepage usage
|
||||
* per NUMA node and per hugepage size.
|
||||
* The snapshot is used to evaluate the system's hugepage usage after
|
||||
* initialization of Suricata.
|
||||
* \returns a pointer to the snapshot, NULL on error
|
||||
*/
|
||||
SystemHugepageSnapshot *SystemHugepageSnapshotCreate(void)
|
||||
{
|
||||
if (!SystemHugepageSupported())
|
||||
return NULL;
|
||||
|
||||
uint16_t node_cnt = SystemNodeCountGet();
|
||||
if (node_cnt == 0) {
|
||||
SCLogError("failed to obtain number of NUMA nodes in the system");
|
||||
return NULL;
|
||||
}
|
||||
NodeInfo *nodes = SCCalloc(node_cnt, sizeof(*nodes));
|
||||
if (nodes == NULL) {
|
||||
FatalError("failed to allocate memory for NUMA node info");
|
||||
}
|
||||
|
||||
SystemHugepageSnapshot *s = SCCalloc(1, sizeof(*s));
|
||||
if (s == NULL) {
|
||||
SCFree(nodes);
|
||||
FatalError("failed to allocate memory for NUMA node snapshot");
|
||||
}
|
||||
s->num_nodes = node_cnt;
|
||||
s->nodes = nodes;
|
||||
|
||||
for (uint16_t i = 0; i < s->num_nodes; i++) {
|
||||
int16_t ret = SystemHugepagePerNodeGetHugepageInfo(i, &s->nodes[i]);
|
||||
if (ret != 0) {
|
||||
SystemHugepageSnapshotDestroy(s);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The function compares two hugepage snapshots and prints out
|
||||
* recommendations for hugepage configuration
|
||||
* \param[in] pre_s a pointer to the snapshot taken before Suricata initialization
|
||||
* \param[in] post_s a pointer to the snapshot taken after Suricata initialization
|
||||
*/
|
||||
void SystemHugepageEvaluateHugepages(SystemHugepageSnapshot *pre_s, SystemHugepageSnapshot *post_s)
|
||||
{
|
||||
if (!SystemHugepageSupported() || pre_s == NULL || post_s == NULL)
|
||||
return;
|
||||
|
||||
SCLogDebug("Hugepages before initialization");
|
||||
SystemHugepageSnapshotDump(pre_s);
|
||||
|
||||
SCLogDebug("Hugepages after initialization");
|
||||
SystemHugepageSnapshotDump(post_s);
|
||||
|
||||
if (pre_s->num_nodes != post_s->num_nodes)
|
||||
FatalError("Number of NUMA nodes changed during hugepage evaluation");
|
||||
|
||||
for (int32_t i = 0; i < post_s->num_nodes; i++) {
|
||||
if (pre_s->nodes[i].num_hugepage_sizes != post_s->nodes[i].num_hugepage_sizes)
|
||||
FatalError("Number of NUMA node hugepage sizes changed during hugepage evaluation");
|
||||
|
||||
for (int32_t j = 0; j < post_s->nodes->num_hugepage_sizes; j++) {
|
||||
HugepageInfo *prerun_hp = &pre_s->nodes[i].hugepages[j];
|
||||
HugepageInfo *postrun_hp = &post_s->nodes[i].hugepages[j];
|
||||
|
||||
if (prerun_hp->free == 0) {
|
||||
continue; // this HP size on this node has no HPs allocated
|
||||
} else if (prerun_hp->free < postrun_hp->free) {
|
||||
SCLogWarning(
|
||||
"Hugepage usage decreased while it should only increase/stay the same");
|
||||
} else if (prerun_hp->free > 0 && prerun_hp->free == postrun_hp->free) {
|
||||
SCLogPerf("Hugepages on NUMA node %u are unused and can be deallocated", i);
|
||||
} else { // assumes this is an active NUMA node because at least some hugepages were
|
||||
// used
|
||||
// speculative hint only for 2048kB pages as e.g. 1 GB pages can leave a lot of room
|
||||
// for additional allocations
|
||||
if (postrun_hp->size_kb == 2048 && postrun_hp->free == 0) {
|
||||
SCLogPerf("all %ukB hugepages used on NUMA node %d - consider increasing to "
|
||||
"prevent memory allocation from other NUMA nodes",
|
||||
postrun_hp->size_kb, i);
|
||||
}
|
||||
|
||||
float free_hugepages_ratio = (float)postrun_hp->free / (float)prerun_hp->free;
|
||||
if (free_hugepages_ratio > 0.5) {
|
||||
int32_t used_hps = prerun_hp->free - postrun_hp->free;
|
||||
SCLogPerf("Hugepages on NUMA node %u can be set to %.0lf (only using %u/%u "
|
||||
"%ukB hugepages)",
|
||||
i, ceil((prerun_hp->free - postrun_hp->free) * 1.15), used_hps,
|
||||
prerun_hp->free, postrun_hp->size_kb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
/* Copyright (C) 2023 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Lukas Sismis <lsismis@oisf.net>
|
||||
*/
|
||||
|
||||
#ifndef UTIL_HUGEPAGES_H
|
||||
#define UTIL_HUGEPAGES_H
|
||||
|
||||
typedef struct {
|
||||
uint32_t size_kb;
|
||||
uint16_t allocated;
|
||||
uint16_t free;
|
||||
} HugepageInfo;
|
||||
|
||||
// Structure to hold information about individual NUMA nodes in the system and
|
||||
// and their respective allocated hugepages
|
||||
// So for e.g. NUMA node 0 there can be 2 hugepage_size - 2 MB and 1 GB
|
||||
// Each hugepage size will then have a record of number of allocated/free hpages
|
||||
typedef struct {
|
||||
uint16_t num_hugepage_sizes;
|
||||
HugepageInfo *hugepages;
|
||||
} NodeInfo;
|
||||
|
||||
// Structure to hold information about all hugepage sizes residing on all NUMA
|
||||
// nodes in the system
|
||||
typedef struct {
|
||||
uint16_t num_nodes;
|
||||
NodeInfo *nodes;
|
||||
} SystemHugepageSnapshot;
|
||||
|
||||
SystemHugepageSnapshot *SystemHugepageSnapshotCreate(void);
|
||||
void SystemHugepageSnapshotDestroy(SystemHugepageSnapshot *s);
|
||||
void SystemHugepageEvaluateHugepages(SystemHugepageSnapshot *pre_s, SystemHugepageSnapshot *post_s);
|
||||
|
||||
#endif /* UTIL_HUGEPAGES_H */
|
Loading…
Reference in New Issue