datasets: allow memcap, hashsize be set via yaml or rule

It is now possible to set the memcap and hashsize via suricata.yaml and
rules.

Rule example:

alert http any any -> any any (http.user_agent; dataset:isset,ua-seen,type string,load datasets.csv,memcap 100mb,hashsize 2048; sid:1;)

suricata.yaml example:

datasets:
  ua-seen:
    type: string
    load: datasets.csv
    memcap: 20mb
    hashsize: 2048
pull/5407/head
Shivani Bhardwaj 5 years ago committed by Victor Julien
parent b2482d6c60
commit 5ac94fc407

@ -33,6 +33,7 @@
#include "util-crypt.h" // encode base64
#include "util-base64.h" // decode base64
#include "util-byte.h"
#include "util-misc.h"
SCMutex sets_lock = SCMUTEX_INITIALIZER;
static Dataset *sets = NULL;
@ -216,6 +217,7 @@ static int DatasetLoadMd5(Dataset *set)
(uint32_t)strlen(line), line);
}
}
THashConsolidateMemcap(set->hash);
fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
@ -281,6 +283,7 @@ static int DatasetLoadSha256(Dataset *set)
cnt++;
}
}
THashConsolidateMemcap(set->hash);
fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
@ -356,6 +359,7 @@ static int DatasetLoadString(Dataset *set)
SCLogDebug("line with rep %s, %s", line, r);
}
}
THashConsolidateMemcap(set->hash);
fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
@ -416,8 +420,8 @@ Dataset *DatasetFind(const char *name, enum DatasetTypes type)
return set;
}
Dataset *DatasetGet(const char *name, enum DatasetTypes type,
const char *save, const char *load)
Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
uint64_t memcap, uint32_t hashsize)
{
if (strlen(name) > DATASET_NAME_MAX_LEN) {
return NULL;
@ -489,24 +493,24 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type,
switch (type) {
case DATASET_TYPE_MD5:
set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet,
Md5StrFree, Md5StrHash, Md5StrCompare, load != NULL ? 1 : 0);
set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
Md5StrCompare, load != NULL ? 1 : 0, memcap, hashsize);
if (set->hash == NULL)
goto out_err;
if (DatasetLoadMd5(set) < 0)
goto out_err;
break;
case DATASET_TYPE_STRING:
set->hash = THashInit(cnf_name, sizeof(StringType), StringSet,
StringFree, StringHash, StringCompare, load != NULL ? 1 : 0);
set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
StringCompare, load != NULL ? 1 : 0, memcap, hashsize);
if (set->hash == NULL)
goto out_err;
if (DatasetLoadString(set) < 0)
goto out_err;
break;
case DATASET_TYPE_SHA256:
set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet,
Sha256StrFree, Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0);
set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0, memcap, hashsize);
if (set->hash == NULL)
goto out_err;
if (DatasetLoadSha256(set) < 0)
@ -609,6 +613,8 @@ int DatasetsInit(void)
char save[PATH_MAX] = "";
char load[PATH_MAX] = "";
uint64_t memcap = 0;
uint32_t hashsize = 0;
const char *set_name = iter->name;
if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
@ -636,13 +642,34 @@ int DatasetsInit(void)
}
}
ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
if (set_memcap) {
if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"memcap value cannot be"
" deduced: %s, resetting to default",
set_memcap->val);
memcap = 0;
}
}
ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
if (set_hashsize) {
if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"hashsize value cannot be"
" deduced: %s, resetting to default",
set_hashsize->val);
hashsize = 0;
}
}
char conf_str[1024];
snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
SCLogDebug("(%d) set %s type %s. Conf %s", n, set_name, set_type->val, conf_str);
if (strcmp(set_type->val, "md5") == 0) {
Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load);
Dataset *dset =
DatasetGet(set_name, DATASET_TYPE_MD5, save, load, memcap, hashsize);
if (dset == NULL)
FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
@ -650,7 +677,8 @@ int DatasetsInit(void)
n++;
} else if (strcmp(set_type->val, "sha256") == 0) {
Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load);
Dataset *dset =
DatasetGet(set_name, DATASET_TYPE_SHA256, save, load, memcap, hashsize);
if (dset == NULL)
FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
@ -658,7 +686,8 @@ int DatasetsInit(void)
n++;
} else if (strcmp(set_type->val, "string") == 0) {
Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load);
Dataset *dset =
DatasetGet(set_name, DATASET_TYPE_STRING, save, load, memcap, hashsize);
if (dset == NULL)
FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);

@ -51,8 +51,8 @@ typedef struct Dataset {
enum DatasetTypes DatasetGetTypeFromString(const char *s);
Dataset *DatasetFind(const char *name, enum DatasetTypes type);
Dataset *DatasetGet(const char *name, enum DatasetTypes type,
const char *save, const char *load);
Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
uint64_t memcap, uint32_t hashsize);
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len);
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len);
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,

@ -38,6 +38,7 @@
#include "util-byte.h"
#include "util-debug.h"
#include "util-print.h"
#include "util-misc.h"
#define PARSE_REGEX "([a-z]+)(?:,\\s*([\\-_A-z0-9\\s\\.]+)){1,4}"
static DetectParseRegex parse_regex;
@ -91,12 +92,9 @@ int DetectDatarepBufferMatch(DetectEngineThreadCtx *det_ctx,
return 0;
}
static int DetectDatarepParse(const char *str,
char *cmd, int cmd_len,
char *name, int name_len,
enum DatasetTypes *type,
char *load, size_t load_size,
uint16_t *rep_value)
static int DetectDatarepParse(const char *str, char *cmd, int cmd_len, char *name, int name_len,
enum DatasetTypes *type, char *load, size_t load_size, uint16_t *rep_value,
uint64_t *memcap, uint32_t *hashsize)
{
bool cmd_set = false;
bool name_set = false;
@ -169,6 +167,24 @@ static int DetectDatarepParse(const char *str,
SCLogDebug("load %s", val);
strlcpy(load, val, load_size);
}
if (strcmp(key, "memcap") == 0) {
if (ParseSizeStringU64(val, memcap) < 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"invalid value for memcap: %s,"
" resetting to default",
val);
*memcap = 0;
}
}
if (strcmp(key, "hashsize") == 0) {
if (ParseSizeStringU32(val, hashsize) < 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"invalid value for hashsize: %s,"
" resetting to default",
val);
*hashsize = 0;
}
}
}
SCLogDebug("key: %s, value: %s", key, val);
@ -279,6 +295,8 @@ static int DetectDatarepSetup (DetectEngineCtx *de_ctx, Signature *s, const char
enum DatasetTypes type = DATASET_TYPE_NOTSET;
char load[PATH_MAX];
uint16_t value = 0;
uint64_t memcap = 0;
uint32_t hashsize = 0;
if (DetectBufferGetActiveList(de_ctx, s) == -1) {
SCLogError(SC_ERR_INVALID_SIGNATURE,
@ -293,8 +311,8 @@ static int DetectDatarepSetup (DetectEngineCtx *de_ctx, Signature *s, const char
SCReturnInt(-1);
}
if (!DetectDatarepParse(rawstr, cmd_str, sizeof(cmd_str), name,
sizeof(name), &type, load, sizeof(load), &value)) {
if (!DetectDatarepParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load,
sizeof(load), &value, &memcap, &hashsize)) {
return -1;
}
@ -316,7 +334,7 @@ static int DetectDatarepSetup (DetectEngineCtx *de_ctx, Signature *s, const char
return -1;
}
Dataset *set = DatasetGet(name, type, /* no save */ NULL, load);
Dataset *set = DatasetGet(name, type, /* no save */ NULL, load, memcap, hashsize);
if (set == NULL) {
SCLogError(SC_ERR_UNKNOWN_VALUE,
"failed to set up datarep set '%s'.", name);

@ -37,6 +37,7 @@
#include "util-debug.h"
#include "util-print.h"
#include "util-misc.h"
#define PARSE_REGEX "([a-z]+)(?:,\\s*([\\-_A-z0-9\\s\\.]+)){1,4}"
static DetectParseRegex parse_regex;
@ -99,12 +100,9 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx,
return 0;
}
static int DetectDatasetParse(const char *str,
char *cmd, int cmd_len,
char *name, int name_len,
enum DatasetTypes *type,
char *load, size_t load_size,
char *save, size_t save_size)
static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *name, int name_len,
enum DatasetTypes *type, char *load, size_t load_size, char *save, size_t save_size,
uint64_t *memcap, uint32_t *hashsize)
{
bool cmd_set = false;
bool name_set = false;
@ -195,6 +193,24 @@ static int DetectDatasetParse(const char *str,
strlcpy(save, val, save_size);
state_set = true;
}
if (strcmp(key, "memcap") == 0) {
if (ParseSizeStringU64(val, memcap) < 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"invalid value for memcap: %s,"
" resetting to default",
val);
*memcap = 0;
}
}
if (strcmp(key, "hashsize") == 0) {
if (ParseSizeStringU32(val, hashsize) < 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"invalid value for hashsize: %s,"
" resetting to default",
val);
*hashsize = 0;
}
}
}
SCLogDebug("key: %s, value: %s", key, val);
@ -314,6 +330,8 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
DetectDatasetData *cd = NULL;
SigMatch *sm = NULL;
uint8_t cmd = 0;
uint64_t memcap = 0;
uint32_t hashsize = 0;
char cmd_str[16] = "", name[DATASET_NAME_MAX_LEN + 1] = "";
enum DatasetTypes type = DATASET_TYPE_NOTSET;
char load[PATH_MAX] = "";
@ -332,8 +350,8 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
SCReturnInt(-1);
}
if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name,
sizeof(name), &type, load, sizeof(load), save, sizeof(save))) {
if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load,
sizeof(load), save, sizeof(save), &memcap, &hashsize)) {
return -1;
}
@ -371,7 +389,7 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
}
SCLogDebug("name '%s' load '%s' save '%s'", name, load, save);
Dataset *set = DatasetGet(name, type, save, load);
Dataset *set = DatasetGet(name, type, save, load, memcap, hashsize);
if (set == NULL) {
SCLogError(SC_ERR_INVALID_SIGNATURE,
"failed to set up dataset '%s'.", name);

@ -290,12 +290,9 @@ static void THashInitConfig(THashTableContext *ctx, const char *cnf_prefix)
return;
}
THashTableContext* THashInit(const char *cnf_prefix, size_t data_size,
int (*DataSet)(void *, void *),
void (*DataFree)(void *),
uint32_t (*DataHash)(void *),
bool (*DataCompare)(void *, void *),
bool reset_memcap)
THashTableContext *THashInit(const char *cnf_prefix, size_t data_size,
int (*DataSet)(void *, void *), void (*DataFree)(void *), uint32_t (*DataHash)(void *),
bool (*DataCompare)(void *, void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
{
THashTableContext *ctx = SCCalloc(1, sizeof(*ctx));
BUG_ON(!ctx);
@ -308,8 +305,14 @@ THashTableContext* THashInit(const char *cnf_prefix, size_t data_size,
/* set defaults */
ctx->config.hash_rand = (uint32_t)RandomGet();
ctx->config.hash_size = THASH_DEFAULT_HASHSIZE;
ctx->config.hash_size = hashsize > 0 ? hashsize : THASH_DEFAULT_HASHSIZE;
/* Reset memcap in case of loading from file to the highest possible value
unless defined by the rule keyword */
if (memcap > 0) {
ctx->config.memcap = memcap;
} else {
ctx->config.memcap = reset_memcap ? UINT64_MAX : THASH_DEFAULT_MEMCAP;
}
ctx->config.prealloc = THASH_DEFAULT_PREALLOC;
SC_ATOMIC_INIT(ctx->counter);
@ -321,6 +324,14 @@ THashTableContext* THashInit(const char *cnf_prefix, size_t data_size,
return ctx;
}
/* \brief Set memcap to current memuse
* */
void THashConsolidateMemcap(THashTableContext *ctx)
{
ctx->config.memcap = MAX(SC_ATOMIC_GET(ctx->memuse), THASH_DEFAULT_MEMCAP);
SCLogDebug("memcap after load set to: %lu", ctx->config.memcap);
}
/** \brief shutdown the flow engine
* \warning Not thread safe */
void THashShutdown(THashTableContext *ctx)

@ -185,12 +185,10 @@ typedef struct THashTableContext_ {
} \
} while (0)
THashTableContext* THashInit(const char *cnf_prefix, size_t data_size,
int (*DataSet)(void *dst, void *src),
void (*DataFree)(void *),
uint32_t (*DataHash)(void *),
bool (*DataCompare)(void *, void *),
bool reset_memcap);
THashTableContext *THashInit(const char *cnf_prefix, size_t data_size,
int (*DataSet)(void *dst, void *src), void (*DataFree)(void *),
uint32_t (*DataHash)(void *), bool (*DataCompare)(void *, void *), bool reset_memcap,
uint64_t memcap, uint32_t hashsize);
void THashShutdown(THashTableContext *ctx);
@ -215,5 +213,6 @@ THashDataQueue *THashDataQueueNew(void);
void THashCleanup(THashTableContext *ctx);
int THashWalk(THashTableContext *, THashFormatFunc, THashOutputFunc, void *);
int THashRemoveFromHash (THashTableContext *ctx, void *data);
void THashConsolidateMemcap(THashTableContext *ctx);
#endif /* __THASH_H__ */

Loading…
Cancel
Save