datajson: introduce feature

This patch introduces new option to dataset keyword.
Where regular dataset allows match from sets, dataset with json
format allows the same but also adds JSON data to the alert
event. This data is coming from the set definition it self.
For example, an ipv4 set will look like:

  [{"ip": "10.16.1.11", "test": "success","context":3}]

The syntax is a JSON array but it can also be a JSON object
with an array inside. The idea is to directly used data coming
from the API of a threat intel management software.

The syntax of the keyword is the following:

  dataset:isset,src_ip,type ip,load src.lst,format json, \
       enrichment_key src_ip, value_key ip;

Compare to dataset, it just have a supplementary option key
that is used to indicate in which subobject the JSON value
should be added.

The information is added in the even under the alert.extra
subobject:

  "alert": {
    "extra": {
      "src_ip": {
        "ip": "10.6.1.11",
        "test": "success",
        "context": 3
      },

The main interest of the feature is to be able to contextualize
a match. For example, if you have an IOC source, you can do

 [
   {"buffer": "value1", "actor":"APT28","Country":"FR"},
   {"buffer": "value2", "actor":"APT32","Country":"NL"}
 ]

This way, a single dataset is able to produce context to the
event where it was not possible before and multiple signatures
had to be used.

The format introduced in datajson is an evolution of the
historical datarep format. This has some limitations. For example,
if a user fetch IOCs from a threat intel server there is a large
change that the format will be JSON or XML. Suricata has no support
for the second but can support the first one.

Keeping the key value may seem redundant but it is useful to have it
directly accessible in the extra data to be able to query it
independantly of the signature (where it can be multiple metadata
or even be a transformed metadata).

In some case, when interacting with data (mostly coming from
threat intel servers), the JSON array containing the data
to use is not at the root of the object and it is ncessary
to access a subobject.

This patch implements this with support of key in level1.level2.
This is done via the `array_key` option that contains the path
to the data.

Ticket: #7372
pull/13432/head
Eric Leblond 5 months ago committed by Victor Julien
parent 53ac35337a
commit dd94dc6cc6

@ -71,12 +71,11 @@ impl<'a> CommandParser<'a> {
}
pub fn parse(&self, input: &str) -> Result<serde_json::Value, CommandParseError> {
let parts: Vec<&str> = input.split(' ').map(|s| s.trim()).collect();
let mut parts: Vec<&str> = input.split(' ').map(|s| s.trim()).collect();
if parts.is_empty() {
return Err(CommandParseError::Other("No command provided".to_string()));
}
let command = parts[0];
let args = &parts[1..];
let spec = self
.commands
@ -91,6 +90,13 @@ impl<'a> CommandParser<'a> {
// Calculate the number of required arguments for better error reporting.
let required = spec.iter().filter(|e| e.required).count();
let optional = spec.iter().filter(|e| !e.required).count();
// Handle the case where the command has only required arguments and allow
// last one to contain spaces.
if optional == 0 {
parts = input.splitn(required + 1, ' ').collect();
}
let args = &parts[1..];
let mut json_args = HashMap::new();
@ -386,6 +392,28 @@ fn command_defs() -> Result<HashMap<String, Vec<Argument>>, serde_json::Error> {
"type": "string",
},
],
"dataset-add-json": [
{
"name": "setname",
"required": true,
"type": "string",
},
{
"name": "settype",
"required": true,
"type": "string",
},
{
"name": "datavalue",
"required": true,
"type": "string",
},
{
"name": "datajson",
"required": true,
"type": "string",
},
],
"get-flow-stats-by-id": [
{
"name": "flow_id",

@ -54,6 +54,7 @@ noinst_HEADERS = \
conf-yaml-loader.h \
conf.h \
counters.h \
datajson.h \
datasets-ipv4.h \
datasets-ipv6.h \
datasets-md5.h \
@ -653,6 +654,7 @@ libsuricata_c_a_SOURCES = \
conf-yaml-loader.c \
conf.c \
counters.c \
datajson.c \
datasets-ipv4.c \
datasets-ipv6.c \
datasets-md5.c \

@ -0,0 +1,985 @@
/* Copyright (C) 2025 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* \file
*
* \author Eric Leblond <el@stamus-networks.com>
*/
#include "suricata-common.h"
#include "suricata.h"
#include "rust.h"
#include "datasets.h"
#include "datajson.h"
#include "datasets-ipv4.h"
#include "datasets-ipv6.h"
#include "datasets-md5.h"
#include "datasets-sha256.h"
#include "datasets-string.h"
#include "util-byte.h"
#include "util-ip.h"
#include "util-debug.h"
static int DatajsonAdd(
Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json);
static inline void DatajsonUnlockData(THashData *d)
{
(void)THashDecrUsecnt(d);
THashDataUnlock(d);
}
/* return true if number is a float or an integer */
static bool IsFloat(const char *in, size_t ins)
{
char *endptr;
float val = strtof(in, &endptr);
const char *end_ins = in + ins - 1;
if (val != 0 && (endptr == end_ins)) {
return true;
}
/* if value is 0 then we need to check if some parsing has been done */
if (val == 0 && (endptr == in)) {
return false;
}
return true;
}
static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out)
{
json_error_t jerror;
json_t *msg = json_loads(in, 0, &jerror);
if (msg == NULL) {
/* JANSSON does not see an integer, float or a string as valid JSON.
So we need to exclude them from failure. */
if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) {
SCLogWarning("dataset: Invalid json: %s: '%s'\n", jerror.text, in);
return -1;
}
} else {
json_decref(msg);
}
rep_out->len = ins;
rep_out->value = SCStrndup(in, ins);
if (rep_out->value == NULL) {
return -1;
}
return 0;
}
static json_t *GetSubObjectByKey(json_t *json, const char *key)
{
if (!json || !key || !json_is_object(json)) {
return NULL;
}
const char *current_key = key;
json_t *current = json;
while (current_key) {
const char *dot = strchr(current_key, '.');
size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key);
char key_buffer[key_len + 1];
strlcpy(key_buffer, current_key, key_len + 1);
if (json_is_object(current) == false) {
return NULL;
}
current = json_object_get(current, key_buffer);
if (current == NULL) {
return NULL;
}
current_key = dot ? dot + 1 : NULL;
}
return current;
}
static int ParseJsonFile(const char *file, json_t **array, char *key)
{
json_t *json;
json_error_t error;
/* assume we have one single JSON element in FILE */
json = json_load_file(file, 0, &error);
if (json == NULL) {
FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text);
return -1;
}
if (key == NULL || strlen(key) == 0) {
*array = json;
} else {
*array = GetSubObjectByKey(json, key);
if (*array == NULL) {
SCLogError("dataset: %s failed to get key '%s'", file, key);
json_decref(json);
return -1;
}
json_incref(*array);
json_decref(json);
}
if (!json_is_array(*array)) {
FatalErrorOnInit("not an array");
json_decref(*array);
return -1;
}
return 0;
}
/**
* \retval 1 data was added to the hash
* \retval 0 data was not added to the hash as it is already there
* \retval -1 failed to add data to the hash
*/
static int DatajsonAddString(
Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
{
if (set == NULL)
return -1;
StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json };
struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
if (res.data) {
DatajsonUnlockData(res.data);
return res.is_new ? 1 : 0;
}
return -1;
}
static int DatajsonAddMd5(
Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
{
if (set == NULL)
return -1;
if (data_len != SC_MD5_LEN)
return -2;
Md5Type lookup = { .json = *json };
memcpy(lookup.md5, data, SC_MD5_LEN);
struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
if (res.data) {
DatajsonUnlockData(res.data);
return res.is_new ? 1 : 0;
}
return -1;
}
static int DatajsonAddSha256(
Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
{
if (set == NULL)
return -1;
if (data_len != SC_SHA256_LEN)
return -2;
Sha256Type lookup = { .json = *json };
memcpy(lookup.sha256, data, SC_SHA256_LEN);
struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
if (res.data) {
DatajsonUnlockData(res.data);
return res.is_new ? 1 : 0;
}
return -1;
}
static int DatajsonAddIPv4(
Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
{
if (set == NULL)
return -1;
if (data_len < SC_IPV4_LEN)
return -2;
IPv4Type lookup = { .json = *json };
memcpy(lookup.ipv4, data, SC_IPV4_LEN);
struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
if (res.data) {
DatajsonUnlockData(res.data);
return res.is_new ? 1 : 0;
}
return -1;
}
static int DatajsonAddIPv6(
Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
{
if (set == NULL)
return -1;
if (data_len != SC_IPV6_LEN)
return -2;
IPv6Type lookup = { .json = *json };
memcpy(lookup.ipv6, data, SC_IPV6_LEN);
struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
if (res.data) {
DatajsonUnlockData(res.data);
return res.is_new ? 1 : 0;
}
return -1;
}
static int DatajsonAdd(
Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json)
{
if (set == NULL)
return -1;
switch (set->type) {
case DATASET_TYPE_STRING:
return DatajsonAddString(set, data, data_len, json);
case DATASET_TYPE_MD5:
return DatajsonAddMd5(set, data, data_len, json);
case DATASET_TYPE_SHA256:
return DatajsonAddSha256(set, data, data_len, json);
case DATASET_TYPE_IPV4:
return DatajsonAddIPv4(set, data, data_len, json);
case DATASET_TYPE_IPV6:
return DatajsonAddIPv6(set, data, data_len, json);
default:
break;
}
return -1;
}
static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key)
{
if (strlen(set->load) == 0)
return 0;
SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
uint32_t cnt = 0;
json_t *json;
bool found = false;
SCLogDebug("dataset: array_key '%s' %p", array_key, array_key);
if (ParseJsonFile(set->load, &json, array_key) == -1) {
SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load);
return -1;
}
int add_ret;
size_t index;
json_t *value;
json_array_foreach (json, index, value) {
json_t *key = GetSubObjectByKey(value, json_key);
if (key == NULL) {
/* ignore error as it can be a working mode where some entries
are not in the same format */
continue;
}
found = true;
const char *val = json_string_value(key);
DataJsonType elt = { .value = NULL, .len = 0 };
elt.value = json_dumps(value, JSON_COMPACT);
elt.len = strlen(elt.value);
add_ret = DatajsonAdd(set, (const uint8_t *)val, strlen(val), &elt);
if (add_ret < 0) {
FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
continue;
}
if (add_ret == 0) {
SCFree(elt.value);
} else {
cnt++;
}
}
json_decref(json);
if (found == false) {
FatalErrorOnInit(
"No valid entries for key '%s' found in the file '%s'", json_key, set->load);
return -1;
}
THashConsolidateMemcap(set->hash);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
static uint32_t DatajsonLoadMd5FromJSON(Dataset *set, char *array_key, char *json_key)
{
int add_ret;
uint32_t cnt = 0;
json_t *json;
bool found = false;
if (ParseJsonFile(set->load, &json, array_key) == -1)
return -1;
size_t index;
json_t *value;
json_array_foreach (json, index, value) {
json_t *key = GetSubObjectByKey(value, json_key);
if (key == NULL) {
/* ignore error as it can be a working mode where some entries
are not in the same format */
continue;
}
found = true;
const char *hash_string = json_string_value(key);
if (strlen(hash_string) != SC_MD5_HEX_LEN) {
FatalErrorOnInit("Not correct length for a hash");
continue;
}
uint8_t hash[SC_MD5_LEN];
if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) {
FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
continue;
}
DataJsonType elt = { .value = NULL, .len = 0 };
elt.value = json_dumps(value, JSON_COMPACT);
elt.len = strlen(elt.value);
add_ret = DatajsonAdd(set, (const uint8_t *)hash, SC_MD5_LEN, &elt);
if (add_ret < 0) {
FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
continue;
}
if (add_ret == 0) {
SCFree(elt.value);
} else {
cnt++;
}
}
json_decref(json);
if (found == false) {
FatalErrorOnInit(
"No valid entries for key '%s' found in the file '%s'", json_key, set->load);
return -1;
}
return cnt;
}
static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key)
{
if (strlen(set->load) == 0)
return 0;
SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
uint32_t cnt = DatajsonLoadMd5FromJSON(set, array_key, json_key);
THashConsolidateMemcap(set->hash);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
static uint32_t DatajsonLoadSHA256FromJSON(Dataset *set, char *array_key, char *json_key)
{
int add_ret;
uint32_t cnt = 0;
json_t *json;
bool found = false;
if (ParseJsonFile(set->load, &json, array_key) == -1)
return -1;
size_t index;
json_t *value;
json_array_foreach (json, index, value) {
json_t *key = GetSubObjectByKey(value, json_key);
if (key == NULL) {
/* ignore error as it can be a working mode where some entries
are not in the same format */
continue;
}
found = true;
const char *hash_string = json_string_value(key);
if (strlen(hash_string) != SC_SHA256_HEX_LEN) {
FatalErrorOnInit("Not correct length for a hash");
continue;
}
uint8_t hash[SC_SHA256_LEN];
if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) {
FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
continue;
}
DataJsonType elt = { .value = NULL, .len = 0 };
elt.value = json_dumps(value, JSON_COMPACT);
elt.len = strlen(elt.value);
add_ret = DatajsonAdd(set, (const uint8_t *)hash, SC_SHA256_LEN, &elt);
if (add_ret < 0) {
FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
continue;
}
if (add_ret == 0) {
SCFree(elt.value);
} else {
cnt++;
}
}
json_decref(json);
if (found == false) {
FatalErrorOnInit(
"No valid entries for key '%s' found in the file '%s'", json_key, set->load);
return -1;
}
return cnt;
}
static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key)
{
if (strlen(set->load) == 0)
return 0;
SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
uint32_t cnt = DatajsonLoadSHA256FromJSON(set, array_key, json_key);
THashConsolidateMemcap(set->hash);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
static uint32_t DatajsonLoadIPv4FromJSON(Dataset *set, char *array_key, char *json_key)
{
uint32_t cnt = 0;
int add_ret;
json_t *json;
bool found = false;
if (ParseJsonFile(set->load, &json, array_key) == -1)
return -1;
size_t index;
json_t *value;
json_array_foreach (json, index, value) {
json_t *key = GetSubObjectByKey(value, json_key);
if (key == NULL) {
/* ignore error as it can be a working mode where some entries
are not in the same format */
continue;
}
found = true;
const char *ip_string = json_string_value(key);
struct in_addr in;
if (inet_pton(AF_INET, ip_string, &in) != 1) {
FatalErrorOnInit(
"datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string);
continue;
}
DataJsonType elt = { .value = NULL, .len = 0 };
elt.value = json_dumps(value, JSON_COMPACT);
elt.len = strlen(elt.value);
add_ret = DatajsonAdd(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, &elt);
if (add_ret < 0) {
FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
continue;
}
if (add_ret == 0) {
SCFree(elt.value);
} else {
cnt++;
}
}
json_decref(json);
if (found == false) {
FatalErrorOnInit(
"No valid entries for key '%s' found in the file '%s'", json_key, set->load);
return 0;
}
return cnt;
}
static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key)
{
if (strlen(set->load) == 0)
return 0;
SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
uint32_t cnt = DatajsonLoadIPv4FromJSON(set, array_key, json_key);
THashConsolidateMemcap(set->hash);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
static uint32_t DatajsonLoadIPv6FromJSON(Dataset *set, char *array_key, char *json_key)
{
uint32_t cnt = 0;
int add_ret;
json_t *json;
bool found = false;
if (ParseJsonFile(set->load, &json, array_key) == -1)
return -1;
size_t index;
json_t *value;
json_array_foreach (json, index, value) {
json_t *key = GetSubObjectByKey(value, json_key);
if (key == NULL) {
/* ignore error as it can be a working mode where some entries
are not in the same format */
continue;
}
found = true;
const char *ip_string = json_string_value(key);
struct in6_addr in6;
int ret = DatasetParseIpv6String(set, ip_string, &in6);
if (ret < 0) {
FatalErrorOnInit("unable to parse IP address");
continue;
}
DataJsonType elt = { .value = NULL, .len = 0 };
elt.value = json_dumps(value, JSON_COMPACT);
elt.len = strlen(elt.value);
add_ret = DatajsonAdd(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &elt);
if (add_ret < 0) {
FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
continue;
}
if (add_ret == 0) {
SCFree(elt.value);
} else {
cnt++;
}
}
json_decref(json);
if (found == false) {
FatalErrorOnInit(
"No valid entries for key '%s' found in the file '%s'", json_key, set->load);
return 0;
}
return cnt;
}
static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key)
{
if (strlen(set->load) == 0)
return 0;
SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
uint32_t cnt = DatajsonLoadIPv6FromJSON(set, array_key, json_key);
THashConsolidateMemcap(set->hash);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
uint32_t hashsize, char *json_key_value, char *json_array_key)
{
uint64_t default_memcap = 0;
uint32_t default_hashsize = 0;
if (strlen(name) > DATASET_NAME_MAX_LEN) {
SCLogError("dataset name too long");
return NULL;
}
DatasetLock();
Dataset *set = DatasetSearchByName(name);
if (set) {
if (type != DATASET_TYPE_NOTSET && set->type != type) {
SCLogError("dataset %s already "
"exists and is of type %u",
set->name, set->type);
DatasetUnlock();
return NULL;
}
if (load == NULL || strlen(load) == 0) {
// OK, rule keyword doesn't have to set state/load,
// even when yaml set has set it.
} else {
if ((load == NULL && strlen(set->load) > 0) ||
(load != NULL && strcmp(set->load, load) != 0)) {
SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
DatasetUnlock();
return NULL;
}
}
DatasetUnlock();
return set;
}
if (type == DATASET_TYPE_NOTSET) {
SCLogError("dataset %s not defined", name);
goto out_err;
}
set = DatasetAlloc(name);
if (set == NULL) {
SCLogError("dataset %s allocation failed", name);
goto out_err;
}
strlcpy(set->name, name, sizeof(set->name));
set->type = type;
if (load && strlen(load)) {
strlcpy(set->load, load, sizeof(set->load));
SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
}
static const char conf_format_str[] = "datasets.%s.hash";
char cnf_name[DATASET_NAME_MAX_LEN + (sizeof(conf_format_str) / sizeof(char))];
int p_ret = snprintf(cnf_name, sizeof(cnf_name), conf_format_str, name);
if (p_ret == 0) {
SCLogError("Can't build configuration variable for set: '%s'", name);
goto out_err;
}
DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
switch (type) {
case DATASET_TYPE_MD5:
set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree,
Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0,
memcap > 0 ? memcap : default_memcap,
hashsize > 0 ? hashsize : default_hashsize);
if (set->hash == NULL)
goto out_err;
if (DatajsonLoadMd5(set, json_key_value, json_array_key) < 0)
goto out_err;
break;
case DATASET_TYPE_STRING:
set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree,
StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0,
memcap > 0 ? memcap : default_memcap,
hashsize > 0 ? hashsize : default_hashsize);
if (set->hash == NULL)
goto out_err;
if (DatajsonLoadString(set, json_key_value, json_array_key) < 0) {
SCLogError("dataset %s loading failed", name);
goto out_err;
}
break;
case DATASET_TYPE_SHA256:
set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree,
Sha256StrHash, Sha256StrCompare, NULL, Sha256StrJsonGetLength,
load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
hashsize > 0 ? hashsize : default_hashsize);
if (set->hash == NULL)
goto out_err;
if (DatajsonLoadSha256(set, json_key_value, json_array_key) < 0)
goto out_err;
break;
case DATASET_TYPE_IPV4:
set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash,
IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0,
memcap > 0 ? memcap : default_memcap,
hashsize > 0 ? hashsize : default_hashsize);
if (set->hash == NULL)
goto out_err;
if (DatajsonLoadIPv4(set, json_key_value, json_array_key) < 0)
goto out_err;
break;
case DATASET_TYPE_IPV6:
set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash,
IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0,
memcap > 0 ? memcap : default_memcap,
hashsize > 0 ? hashsize : default_hashsize);
if (set->hash == NULL)
goto out_err;
if (DatajsonLoadIPv6(set, json_key_value, json_array_key) < 0)
goto out_err;
break;
}
SCLogDebug(
"set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
DatasetAppendSet(set);
DatasetUnlock();
return set;
out_err:
if (set) {
if (set->hash) {
THashShutdown(set->hash);
}
SCFree(set);
}
DatasetUnlock();
return NULL;
}
static DataJsonResultType DatajsonLookupString(
Dataset *set, const uint8_t *data, const uint32_t data_len)
{
DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
if (set == NULL)
return rrep;
StringType lookup = {
.ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0
};
THashData *rdata = THashLookupFromHash(set->hash, &lookup);
if (rdata) {
StringType *found = rdata->data;
rrep.found = true;
rrep.json = found->json;
DatajsonUnlockData(rdata);
return rrep;
}
return rrep;
}
static DataJsonResultType DatajsonLookupMd5(
Dataset *set, const uint8_t *data, const uint32_t data_len)
{
DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
if (set == NULL)
return rrep;
if (data_len != SC_MD5_LEN)
return rrep;
Md5Type lookup = { .json.value = NULL, .json.len = 0 };
memcpy(lookup.md5, data, data_len);
THashData *rdata = THashLookupFromHash(set->hash, &lookup);
if (rdata) {
Md5Type *found = rdata->data;
rrep.found = true;
rrep.json = found->json;
DatajsonUnlockData(rdata);
return rrep;
}
return rrep;
}
static DataJsonResultType DatajsonLookupSha256(
Dataset *set, const uint8_t *data, const uint32_t data_len)
{
DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
if (set == NULL)
return rrep;
if (data_len != SC_SHA256_LEN)
return rrep;
Sha256Type lookup = { .json.value = NULL, .json.len = 0 };
memcpy(lookup.sha256, data, data_len);
THashData *rdata = THashLookupFromHash(set->hash, &lookup);
if (rdata) {
Sha256Type *found = rdata->data;
rrep.found = true;
rrep.json = found->json;
DatajsonUnlockData(rdata);
return rrep;
}
return rrep;
}
static DataJsonResultType DatajsonLookupIPv4(
Dataset *set, const uint8_t *data, const uint32_t data_len)
{
DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
if (set == NULL)
return rrep;
if (data_len != SC_IPV4_LEN)
return rrep;
IPv4Type lookup = { .json.value = NULL, .json.len = 0 };
memcpy(lookup.ipv4, data, data_len);
THashData *rdata = THashLookupFromHash(set->hash, &lookup);
if (rdata) {
IPv4Type *found = rdata->data;
rrep.found = true;
rrep.json = found->json;
DatajsonUnlockData(rdata);
return rrep;
}
return rrep;
}
static DataJsonResultType DatajsonLookupIPv6(
Dataset *set, const uint8_t *data, const uint32_t data_len)
{
DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
if (set == NULL)
return rrep;
/* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */
if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN)
return rrep;
IPv6Type lookup = { .json.value = NULL, .json.len = 0 };
memcpy(lookup.ipv6, data, data_len);
THashData *rdata = THashLookupFromHash(set->hash, &lookup);
if (rdata) {
IPv6Type *found = rdata->data;
rrep.found = true;
rrep.json = found->json;
DatajsonUnlockData(rdata);
return rrep;
}
return rrep;
}
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
{
DataJsonResultType rrep = { .found = false, .json = { .value = 0 } };
if (set == NULL)
return rrep;
switch (set->type) {
case DATASET_TYPE_STRING:
return DatajsonLookupString(set, data, data_len);
case DATASET_TYPE_MD5:
return DatajsonLookupMd5(set, data, data_len);
case DATASET_TYPE_SHA256:
return DatajsonLookupSha256(set, data, data_len);
case DATASET_TYPE_IPV4:
return DatajsonLookupIPv4(set, data, data_len);
case DATASET_TYPE_IPV6:
return DatajsonLookupIPv6(set, data, data_len);
default:
break;
}
return rrep;
}
typedef int (*DatajsonOpFunc)(
Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json);
static int DatajsonOpSerialized(Dataset *set, const char *string, const char *json,
DatajsonOpFunc DatajsonOpString, DatajsonOpFunc DatajsonOpMd5,
DatajsonOpFunc DatajsonOpSha256, DatajsonOpFunc DatajsonOpIPv4,
DatajsonOpFunc DatajsonOpIPv6)
{
int ret;
if (set == NULL)
return -1;
if (strlen(string) == 0)
return -1;
DataJsonType jvalue = { .value = NULL, .len = 0 };
if (json) {
if (ParseJsonLine(json, strlen(json), &jvalue) < 0) {
SCLogNotice("bad json value for dataset %s/%s", set->name, set->load);
return -1;
}
}
switch (set->type) {
case DATASET_TYPE_STRING: {
uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(string));
uint8_t decoded[decoded_size];
uint32_t num_decoded = SCBase64Decode(
(const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
if (num_decoded == 0)
goto operror;
ret = DatajsonOpString(set, decoded, num_decoded, &jvalue);
if (ret <= 0) {
SCFree(jvalue.value);
}
return ret;
}
case DATASET_TYPE_MD5: {
if (strlen(string) != SC_MD5_HEX_LEN)
goto operror;
uint8_t hash[SC_MD5_LEN];
if (HexToRaw((const uint8_t *)string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0)
goto operror;
ret = DatajsonOpMd5(set, hash, SC_MD5_LEN, &jvalue);
if (ret <= 0) {
SCFree(jvalue.value);
}
return ret;
}
case DATASET_TYPE_SHA256: {
if (strlen(string) != SC_SHA256_HEX_LEN)
goto operror;
uint8_t hash[SC_SHA256_LEN];
if (HexToRaw((const uint8_t *)string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0)
goto operror;
ret = DatajsonOpSha256(set, hash, SC_SHA256_LEN, &jvalue);
if (ret <= 0) {
SCFree(jvalue.value);
}
return ret;
}
case DATASET_TYPE_IPV4: {
struct in_addr in;
if (inet_pton(AF_INET, string, &in) != 1)
goto operror;
ret = DatajsonOpIPv4(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue);
if (ret <= 0) {
SCFree(jvalue.value);
}
return ret;
}
case DATASET_TYPE_IPV6: {
struct in6_addr in6;
if (DatasetParseIpv6String(set, string, &in6) != 0) {
SCLogError("Dataset failed to import %s as IPv6", string);
goto operror;
}
ret = DatajsonOpIPv6(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue);
if (ret <= 0) {
SCFree(jvalue.value);
}
return ret;
}
}
SCFree(jvalue.value);
return -1;
operror:
SCFree(jvalue.value);
return -2;
}
/** \brief add serialized data to json set
* \retval int 1 added
* \retval int 0 already in hash
* \retval int -1 API error (not added)
* \retval int -2 DATA error
*/
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
{
return DatajsonOpSerialized(set, value, json, DatajsonAddString, DatajsonAddMd5,
DatajsonAddSha256, DatajsonAddIPv4, DatajsonAddIPv6);
}

@ -0,0 +1,51 @@
/* Copyright (C) 2024 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* \file
*
* \author Eric Leblond <el@stamus-networks.com>
*/
#ifndef SURICATA_DATAJSON_H
#define SURICATA_DATAJSON_H
#include <suricata-common.h>
#include "datasets.h"
#define DATAJSON_JSON_LENGTH 1024
typedef struct DataJsonType {
char *value;
size_t len;
} DataJsonType;
typedef struct DataJsonResultType {
bool found;
DataJsonType json;
} DataJsonResultType;
/* Common functions */
Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
uint32_t hashsize, char *json_key_value, char *json_array_key);
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len);
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json);
#endif /* SURICATA_DATAJSON_H*/

@ -38,6 +38,17 @@ int IPv4Set(void *dst, void *src)
return 0;
}
int IPv4JsonSet(void *dst, void *src)
{
IPv4Type *src_s = src;
IPv4Type *dst_s = dst;
memcpy(dst_s->ipv4, src_s->ipv4, sizeof(dst_s->ipv4));
dst_s->json.value = src_s->json.value;
dst_s->json.len = src_s->json.len;
return 0;
}
bool IPv4Compare(void *a, void *b)
{
const IPv4Type *as = a;
@ -56,3 +67,17 @@ uint32_t IPv4Hash(uint32_t hash_seed, void *s)
void IPv4Free(void *s)
{
}
void IPv4JsonFree(void *s)
{
const IPv4Type *as = s;
if (as->json.value) {
SCFree(as->json.value);
}
}
uint32_t IPv4JsonGetLength(void *s)
{
const IPv4Type *as = s;
return as->json.len;
}

@ -25,15 +25,22 @@
#define SURICATA_DATASETS_IPV4_H
#include "datasets-reputation.h"
#include "datajson.h"
typedef struct IPv4Type {
uint8_t ipv4[4];
DataRepType rep;
union {
DataRepType rep;
DataJsonType json;
};
} IPv4Type;
int IPv4Set(void *dst, void *src);
int IPv4JsonSet(void *dst, void *src);
bool IPv4Compare(void *a, void *b);
uint32_t IPv4Hash(uint32_t hash_seed, void *s);
void IPv4Free(void *s);
void IPv4JsonFree(void *s);
uint32_t IPv4JsonGetLength(void *s);
#endif /* SURICATA_DATASETS_IPV4_H */

@ -24,6 +24,7 @@
#include "suricata-common.h"
#include "conf.h"
#include "datasets.h"
#include "datajson.h"
#include "datasets-ipv6.h"
#include "util-hash-lookup3.h"
#include "util-thash.h"
@ -38,6 +39,17 @@ int IPv6Set(void *dst, void *src)
return 0;
}
int IPv6JsonSet(void *dst, void *src)
{
IPv6Type *src_s = src;
IPv6Type *dst_s = dst;
memcpy(dst_s->ipv6, src_s->ipv6, sizeof(dst_s->ipv6));
dst_s->json.value = src_s->json.value;
dst_s->json.len = src_s->json.len;
return 0;
}
bool IPv6Compare(void *a, void *b)
{
const IPv6Type *as = a;
@ -56,3 +68,17 @@ uint32_t IPv6Hash(uint32_t hash_seed, void *s)
void IPv6Free(void *s)
{
}
void IPv6JsonFree(void *s)
{
const IPv6Type *as = s;
if (as->json.value) {
SCFree(as->json.value);
}
}
uint32_t IPv6JsonGetLength(void *s)
{
const IPv6Type *as = s;
return as->json.len;
}

@ -25,15 +25,22 @@
#define SURICATA_DATASETS_IPV6_H
#include "datasets-reputation.h"
#include "datajson.h"
typedef struct IPv6Type {
uint8_t ipv6[16];
DataRepType rep;
union {
DataRepType rep;
DataJsonType json;
};
} IPv6Type;
int IPv6Set(void *dst, void *src);
int IPv6JsonSet(void *dst, void *src);
bool IPv6Compare(void *a, void *b);
uint32_t IPv6Hash(uint32_t hash_seed, void *s);
void IPv6Free(void *s);
void IPv6JsonFree(void *s);
uint32_t IPv6JsonGetLength(void *s);
#endif /* __DATASETS_IPV4_H__ */

@ -24,6 +24,7 @@
#include "suricata-common.h"
#include "conf.h"
#include "datasets.h"
#include "datajson.h"
#include "datasets-md5.h"
#include "util-hash-lookup3.h"
@ -39,6 +40,16 @@ int Md5StrSet(void *dst, void *src)
return 0;
}
int Md5StrJsonSet(void *dst, void *src)
{
Md5Type *src_s = src;
Md5Type *dst_s = dst;
memcpy(dst_s->md5, src_s->md5, sizeof(dst_s->md5));
dst_s->json.value = src_s->json.value;
dst_s->json.len = src_s->json.len;
return 0;
}
bool Md5StrCompare(void *a, void *b)
{
const Md5Type *as = a;
@ -57,3 +68,17 @@ uint32_t Md5StrHash(uint32_t hash_seed, void *s)
void Md5StrFree(void *s)
{
}
void Md5StrJsonFree(void *s)
{
const Md5Type *as = s;
if (as->json.value) {
SCFree(as->json.value);
}
}
uint32_t Md5StrJsonGetLength(void *s)
{
const Md5Type *as = s;
return as->json.len;
}

@ -25,15 +25,22 @@
#define SURICATA_DATASETS_MD5_H
#include "datasets-reputation.h"
#include "datajson.h"
typedef struct Md5Type {
uint8_t md5[16];
DataRepType rep;
union {
DataRepType rep;
DataJsonType json;
};
} Md5Type;
int Md5StrSet(void *dst, void *src);
int Md5StrJsonSet(void *dst, void *src);
bool Md5StrCompare(void *a, void *b);
uint32_t Md5StrHash(uint32_t hash_seed, void *s);
void Md5StrFree(void *s);
void Md5StrJsonFree(void *s);
uint32_t Md5StrJsonGetLength(void *s);
#endif /* SURICATA_DATASETS_MD5_H */

@ -24,6 +24,7 @@
#include "suricata-common.h"
#include "conf.h"
#include "datasets.h"
#include "datajson.h"
#include "datasets-sha256.h"
#include "util-hash-lookup3.h"
#include "util-thash.h"
@ -37,6 +38,16 @@ int Sha256StrSet(void *dst, void *src)
return 0;
}
int Sha256StrJsonSet(void *dst, void *src)
{
Sha256Type *src_s = src;
Sha256Type *dst_s = dst;
memcpy(dst_s->sha256, src_s->sha256, sizeof(dst_s->sha256));
dst_s->json.value = src_s->json.value;
dst_s->json.len = src_s->json.len;
return 0;
}
bool Sha256StrCompare(void *a, void *b)
{
Sha256Type *as = a;
@ -56,3 +67,17 @@ void Sha256StrFree(void *s)
{
// no dynamic data
}
void Sha256StrJsonFree(void *s)
{
const Sha256Type *as = s;
if (as->json.value) {
SCFree(as->json.value);
}
}
uint32_t Sha256StrJsonGetLength(void *s)
{
const Sha256Type *as = s;
return as->json.len;
}

@ -25,15 +25,22 @@
#define SURICATA_DATASETS_SHA256_H
#include "datasets-reputation.h"
#include "datajson.h"
typedef struct Sha256Type {
uint8_t sha256[32];
DataRepType rep;
union {
DataRepType rep;
DataJsonType json;
};
} Sha256Type;
int Sha256StrSet(void *dst, void *src);
int Sha256StrJsonSet(void *dst, void *src);
bool Sha256StrCompare(void *a, void *b);
uint32_t Sha256StrHash(uint32_t hash_seed, void *s);
void Sha256StrFree(void *s);
void Sha256StrJsonFree(void *s);
uint32_t Sha256StrJsonGetLength(void *s);
#endif /* SURICATA_DATASETS_SHA256_H */

@ -73,6 +73,24 @@ int StringSet(void *dst, void *src)
return 0;
}
int StringJsonSet(void *dst, void *src)
{
StringType *src_s = src;
StringType *dst_s = dst;
SCLogDebug("dst %p src %p, src_s->ptr %p src_s->len %u", dst, src, src_s->ptr, src_s->len);
dst_s->len = src_s->len;
dst_s->ptr = SCMalloc(dst_s->len);
BUG_ON(dst_s->ptr == NULL);
memcpy(dst_s->ptr, src_s->ptr, dst_s->len);
dst_s->json.value = src_s->json.value;
dst_s->json.len = src_s->json.len;
SCLogDebug("dst %p src %p, dst_s->ptr %p dst_s->len %u", dst, src, dst_s->ptr, dst_s->len);
return 0;
}
bool StringCompare(void *a, void *b)
{
const StringType *as = a;
@ -102,3 +120,18 @@ void StringFree(void *s)
StringType *str = s;
SCFree(str->ptr);
}
void StringJsonFree(void *s)
{
StringType *str = s;
SCFree(str->ptr);
if (str->json.value) {
SCFree(str->json.value);
}
}
uint32_t StringJsonGetLength(void *s)
{
StringType *str = s;
return str->json.len + str->len;
}

@ -25,18 +25,25 @@
#define SURICATA_DATASETS_STRING_H
#include "datasets-reputation.h"
#include "datajson.h"
typedef struct StringType {
uint32_t len;
DataRepType rep;
union {
DataRepType rep;
DataJsonType json;
};
uint8_t *ptr;
} StringType;
int StringSet(void *dst, void *src);
int StringJsonSet(void *dst, void *src);
bool StringCompare(void *a, void *b);
uint32_t StringHash(uint32_t hash_seed, void *s);
uint32_t StringGetLength(void *s);
void StringFree(void *s);
void StringJsonFree(void *s);
int StringAsBase64(const void *s, char *out, size_t out_size);
uint32_t StringJsonGetLength(void *s);
#endif /* SURICATA_DATASETS_STRING_H */

@ -32,7 +32,9 @@
#include "datasets-md5.h"
#include "datasets-sha256.h"
#include "datasets-reputation.h"
#include "datajson.h"
#include "util-conf.h"
#include "util-mem.h"
#include "util-thash.h"
#include "util-print.h"
#include "util-byte.h"
@ -57,7 +59,6 @@ static inline void DatasetUnlockData(THashData *d)
THashDataUnlock(d);
}
static bool DatasetIsStatic(const char *save, const char *load);
static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
enum DatasetTypes DatasetGetTypeFromString(const char *s)
{
@ -74,7 +75,23 @@ enum DatasetTypes DatasetGetTypeFromString(const char *s)
return DATASET_TYPE_NOTSET;
}
static Dataset *DatasetAlloc(const char *name)
void DatasetAppendSet(Dataset *set)
{
set->next = sets;
sets = set;
}
void DatasetLock(void)
{
SCMutexLock(&sets_lock);
}
void DatasetUnlock(void)
{
SCMutexUnlock(&sets_lock);
}
Dataset *DatasetAlloc(const char *name)
{
Dataset *set = SCCalloc(1, sizeof(*set));
if (set) {
@ -83,7 +100,7 @@ static Dataset *DatasetAlloc(const char *name)
return set;
}
static Dataset *DatasetSearchByName(const char *name)
Dataset *DatasetSearchByName(const char *name)
{
Dataset *set = sets;
while (set) {
@ -118,7 +135,7 @@ static int DatasetLoadIPv4(Dataset *set)
return 0;
}
static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
{
/* Checking IPv6 case */
char *got_colon = strchr(line, ':');
@ -249,8 +266,8 @@ enum DatasetGetPathType {
TYPE_LOAD,
};
static void DatasetGetPath(const char *in_path,
char *out_path, size_t out_size, enum DatasetGetPathType type)
static void DatasetGetPath(
const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type)
{
char path[PATH_MAX];
struct stat st;
@ -372,7 +389,7 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save,
}
}
GetDefaultMemcap(&default_memcap, &default_hashsize);
DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
if (hashsize == 0) {
hashsize = default_hashsize;
}
@ -547,7 +564,7 @@ void DatasetPostReloadCleanup(void)
* despite 2048 commented out in the default yaml. */
#define DATASETS_HASHSIZE_DEFAULT 4096
static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
{
const char *str = NULL;
if (SCConfGet("datasets.defaults.memcap", &str) == 1) {
@ -576,7 +593,7 @@ int DatasetsInit(void)
SCConfNode *datasets = SCConfGetNode("datasets");
uint64_t default_memcap = 0;
uint32_t default_hashsize = 0;
GetDefaultMemcap(&default_memcap, &default_hashsize);
DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
if (datasets != NULL) {
const char *str = NULL;
if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) {
@ -1378,7 +1395,7 @@ static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc D
}
case DATASET_TYPE_IPV6: {
struct in6_addr in6;
if (ParseIpv6String(set, string, &in6) != 0) {
if (DatasetParseIpv6String(set, string, &in6) != 0) {
SCLogError("Dataset failed to import %s as IPv6", string);
return -2;
}

@ -28,6 +28,11 @@ void DatasetsSave(void);
void DatasetReload(void);
void DatasetPostReloadCleanup(void);
typedef enum {
DATASET_FORMAT_CSV = 0,
DATASET_FORMAT_JSON,
} DatasetFormats;
enum DatasetTypes {
#define DATASET_TYPE_NOTSET 0
DATASET_TYPE_STRING = 1,
@ -53,6 +58,11 @@ typedef struct Dataset {
} Dataset;
enum DatasetTypes DatasetGetTypeFromString(const char *s);
void DatasetAppendSet(Dataset *set);
Dataset *DatasetAlloc(const char *name);
void DatasetLock(void);
void DatasetUnlock(void);
Dataset *DatasetSearchByName(const char *name);
Dataset *DatasetFind(const char *name, enum DatasetTypes type);
Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
uint64_t memcap, uint32_t hashsize);
@ -62,6 +72,9 @@ int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len);
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
const DataRepType *rep);
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6);
int DatasetAddSerialized(Dataset *set, const char *string);
int DatasetRemoveSerialized(Dataset *set, const char *string);
int DatasetLookupSerialized(Dataset *set, const char *string);

@ -145,9 +145,38 @@ PacketAlert *PacketAlertCreate(void)
return pa_array;
}
void PacketAlertRecycle(PacketAlert *pa_array)
{
if (pa_array != NULL) {
for (int i = 0; i < packet_alert_max; i++) {
if (pa_array[i].json_info.next != NULL) {
struct ExtraDataJsonList *current_json = pa_array[i].json_info.next;
while (current_json) {
struct ExtraDataJsonList *next_json = current_json->next;
SCFree(current_json);
current_json = next_json;
}
}
pa_array[i].json_info.json_string = NULL;
pa_array[i].json_info.next = NULL;
}
}
}
void PacketAlertFree(PacketAlert *pa)
{
if (pa != NULL) {
for (int i = 0; i < packet_alert_max; i++) {
/* first item is not allocated so start at second one */
if (pa[i].json_info.next != NULL) {
struct ExtraDataJsonList *allocated_json = pa[i].json_info.next;
while (allocated_json) {
struct ExtraDataJsonList *next_json = allocated_json->next;
SCFree(allocated_json);
allocated_json = next_json;
}
}
}
SCFree(pa);
}
}

@ -238,6 +238,11 @@ typedef uint16_t Port;
#define PKT_IS_TOSERVER(p) (((p)->flowflags & FLOW_PKT_TOSERVER))
#define PKT_IS_TOCLIENT(p) (((p)->flowflags & FLOW_PKT_TOCLIENT))
struct ExtraDataJsonList {
char *json_string;
struct ExtraDataJsonList *next;
};
/* structure to store the sids/gids/etc the detection engine
* found in this packet */
typedef struct PacketAlert_ {
@ -247,6 +252,7 @@ typedef struct PacketAlert_ {
const struct Signature_ *s;
uint64_t tx_id; /* Used for sorting */
int64_t frame_id;
struct ExtraDataJsonList json_info;
} PacketAlert;
/**
@ -288,6 +294,7 @@ typedef struct PacketAlerts_ {
} PacketAlerts;
PacketAlert *PacketAlertCreate(void);
void PacketAlertRecycle(PacketAlert *pa_array);
void PacketAlertFree(PacketAlert *pa);

@ -1,4 +1,4 @@
/* Copyright (C) 2018-2020 Open Information Security Foundation
/* Copyright (C) 2018-2025 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -28,6 +28,7 @@
#include "detect.h"
#include "threads.h"
#include "datasets.h"
#include "datajson.h"
#include "detect-dataset.h"
#include "detect-parse.h"
@ -60,6 +61,51 @@ void DetectDatasetRegister (void)
sigmatch_table[DETECT_DATASET].Free = DetectDatasetFree;
}
/*
1 match
0 no match
-1 can't match
*/
static int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatasetData *sd,
const uint8_t *data, const uint32_t data_len)
{
if (data == NULL || data_len == 0)
return 0;
switch (sd->cmd) {
case DETECT_DATASET_CMD_ISSET: {
// PrintRawDataFp(stdout, data, data_len);
DataJsonResultType r = DatajsonLookup(sd->set, data, data_len);
SCLogDebug("r found: %d, len: %zu", r.found, r.json.len);
if (!r.found)
return 0;
if (r.json.len > 0) {
/* we need to add 3 on length check for the added quotes and colon when
building the json string */
if ((det_ctx->json_content_len < SIG_JSON_CONTENT_ARRAY_LEN) &&
(r.json.len + strlen(sd->json_key) + 3 < SIG_JSON_CONTENT_ITEM_LEN)) {
snprintf(det_ctx->json_content[det_ctx->json_content_len].json_content,
SIG_JSON_CONTENT_ITEM_LEN, "\"%s\":%s", sd->json_key, r.json.value);
det_ctx->json_content[det_ctx->json_content_len].id = sd->id;
det_ctx->json_content_len++;
}
}
return 1;
}
case DETECT_DATASET_CMD_ISNOTSET: {
// PrintRawDataFp(stdout, data, data_len);
DataJsonResultType r = DatajsonLookup(sd->set, data, data_len);
SCLogDebug("r found: %d, len: %zu", r.found, r.json.len);
if (r.found)
return 0;
return 1;
}
default:
DEBUG_VALIDATE_BUG_ON("unknown dataset with json command");
}
return 0;
}
/*
1 match
0 no match
@ -72,6 +118,10 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx,
if (data == NULL || data_len == 0)
return 0;
if (sd->format == DATASET_FORMAT_JSON) {
return DetectDatajsonBufferMatch(det_ctx, sd, data, data_len);
}
switch (sd->cmd) {
case DETECT_DATASET_CMD_ISSET: {
//PrintRawDataFp(stdout, data, data_len);
@ -110,18 +160,22 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx,
static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *name, int name_len,
enum DatasetTypes *type, char *load, size_t load_size, char *save, size_t save_size,
uint64_t *memcap, uint32_t *hashsize)
uint64_t *memcap, uint32_t *hashsize, DatasetFormats *format, char *value_key,
size_t value_key_size, char *array_key, size_t array_key_size, char *enrichment_key,
size_t enrichment_key_size)
{
bool cmd_set = false;
bool name_set = false;
bool load_set = false;
bool save_set = false;
bool state_set = false;
bool format_set = false;
char copy[strlen(str)+1];
strlcpy(copy, str, sizeof(copy));
char *xsaveptr = NULL;
char *key = strtok_r(copy, ",", &xsaveptr);
while (key != NULL) {
while (*key != '\0' && isblank(*key)) {
key++;
@ -203,7 +257,41 @@ static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *nam
strlcpy(load, val, load_size);
strlcpy(save, val, save_size);
state_set = true;
} else if (strcmp(key, "format") == 0) {
if (format_set) {
SCLogWarning("'format' can only appear once");
return -1;
}
SCLogDebug("format %s", val);
if (strcmp(val, "csv") == 0) {
*format = DATASET_FORMAT_CSV;
} else if (strcmp(val, "json") == 0) {
*format = DATASET_FORMAT_JSON;
} else {
SCLogWarning("unknown format %s", val);
return -1;
}
format_set = true;
} else if (strcmp(key, "value_key") == 0) {
if (strlen(val) > value_key_size) {
SCLogWarning("'key' value too long (limit is %zu)", value_key_size);
return -1;
}
strlcpy(value_key, val, value_key_size);
} else if (strcmp(key, "array_key") == 0) {
if (strlen(val) > array_key_size) {
SCLogWarning("'key' value too long (limit is %zu)", array_key_size);
return -1;
}
strlcpy(array_key, val, array_key_size);
} else if (strcmp(key, "enrichment_key") == 0) {
if (strlen(val) > enrichment_key_size) {
SCLogWarning("'key' value too long (limit is %zu)", enrichment_key_size);
return -1;
}
strlcpy(enrichment_key, val, enrichment_key_size);
}
if (strcmp(key, "memcap") == 0) {
if (ParseSizeStringU64(val, memcap) < 0) {
SCLogWarning("invalid value for memcap: %s,"
@ -361,6 +449,10 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
enum DatasetTypes type = DATASET_TYPE_NOTSET;
char load[PATH_MAX] = "";
char save[PATH_MAX] = "";
DatasetFormats format = DATASET_FORMAT_CSV;
char value_key[SIG_JSON_CONTENT_KEY_LEN] = "";
char array_key[SIG_JSON_CONTENT_KEY_LEN] = "";
char enrichment_key[SIG_JSON_CONTENT_KEY_LEN] = "";
if (DetectBufferGetActiveList(de_ctx, s) == -1) {
SCLogError("datasets are only supported for sticky buffers");
@ -374,7 +466,9 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
}
if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load,
sizeof(load), save, sizeof(save), &memcap, &hashsize)) {
sizeof(load), save, sizeof(save), &memcap, &hashsize, &format, value_key,
sizeof(value_key), array_key, sizeof(array_key), enrichment_key,
sizeof(enrichment_key))) {
return -1;
}
@ -383,14 +477,33 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
} else if (strcmp(cmd_str,"isnotset") == 0) {
cmd = DETECT_DATASET_CMD_ISNOTSET;
} else if (strcmp(cmd_str,"set") == 0) {
if (format == DATASET_FORMAT_JSON) {
SCLogError("json format is not supported for 'set' command");
return -1;
}
cmd = DETECT_DATASET_CMD_SET;
} else if (strcmp(cmd_str,"unset") == 0) {
if (format == DATASET_FORMAT_JSON) {
SCLogError("json format is not supported for 'unset' command");
return -1;
}
cmd = DETECT_DATASET_CMD_UNSET;
} else {
SCLogError("dataset action \"%s\" is not supported.", cmd_str);
return -1;
}
if (format == DATASET_FORMAT_JSON) {
if (strlen(save) != 0) {
SCLogError("json format is not supported with 'save' or 'state' option");
return -1;
}
if (strlen(enrichment_key) == 0) {
SCLogError("json format needs an 'enrichment_key' parameter");
return -1;
}
}
/* if just 'load' is set, we load data from the same dir as the
* rule file. If load+save is used, we use data dir */
if (strlen(save) == 0 && strlen(load) != 0) {
@ -411,7 +524,13 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
}
SCLogDebug("name '%s' load '%s' save '%s'", name, load, save);
Dataset *set = DatasetGet(name, type, save, load, memcap, hashsize);
Dataset *set = NULL;
if (format == DATASET_FORMAT_JSON) {
set = DatajsonGet(name, type, load, memcap, hashsize, value_key, array_key);
} else {
set = DatasetGet(name, type, save, load, memcap, hashsize);
}
if (set == NULL) {
SCLogError("failed to set up dataset '%s'.", name);
return -1;
@ -423,6 +542,11 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst
cd->set = set;
cd->cmd = cmd;
cd->format = format;
if (format == DATASET_FORMAT_JSON) {
strlcpy(cd->json_key, enrichment_key, sizeof(cd->json_key));
}
cd->id = s;
SCLogDebug("cmd %s, name %s",
cmd_str, strlen(name) ? name : "(none)");

@ -25,10 +25,15 @@
#define SURICATA_DETECT_DATASET_H
#include "datasets.h"
#include "datajson.h"
typedef struct DetectDatasetData_ {
Dataset *set;
uint8_t cmd;
DatasetFormats format;
DataJsonType json;
char json_key[SIG_JSON_CONTENT_KEY_LEN];
void *id;
} DetectDatasetData;
int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx,

@ -297,6 +297,30 @@ static inline PacketAlert PacketAlertSet(
/* Set tx_id if the frame has it */
pa.tx_id = tx_id;
pa.frame_id = (alert_flags & PACKET_ALERT_FLAG_FRAME) ? det_ctx->frame_id : 0;
pa.json_info.json_string = NULL;
pa.json_info.next = NULL;
if (det_ctx->json_content_len) {
/* We have some JSON attached in the current detection so let's try
to see if some need to be used for current signature. */
struct ExtraDataJsonList *current_json = &pa.json_info;
for (size_t i = 0; i < det_ctx->json_content_len; i++) {
if (s == det_ctx->json_content[i].id) {
if (current_json->json_string != NULL) {
struct ExtraDataJsonList *next_json =
SCCalloc(1, sizeof(struct ExtraDataJsonList));
if (next_json) {
current_json->next = next_json;
current_json = next_json;
current_json->next = NULL;
} else {
/* Allocation error, let's return now */
return pa;
}
}
current_json->json_string = det_ctx->json_content[i].json_content;
}
}
}
return pa;
}

@ -85,6 +85,7 @@ enum DetectKeywordId {
DETECT_BYTE_EXTRACT,
DETECT_DATASET,
DETECT_DATAREP,
DETECT_DATAJSON,
DETECT_BASE64_DECODE,
DETECT_BASE64_DATA,
DETECT_BSIZE,

@ -941,6 +941,7 @@ static DetectRunScratchpad DetectRunSetup(const DetectEngineCtx *de_ctx,
det_ctx->base64_decoded_len = 0;
det_ctx->raw_stream_progress = 0;
det_ctx->match_array_cnt = 0;
det_ctx->json_content_len = 0;
det_ctx->alert_queue_size = 0;
p->alerts.drop.action = 0;

@ -1228,6 +1228,16 @@ typedef struct PostRuleMatchWorkQueue {
uint32_t size; /**< allocation size in number of elements. */
} PostRuleMatchWorkQueue;
#define SIG_JSON_CONTENT_ARRAY_LEN 16
#define SIG_JSON_CONTENT_ITEM_LEN 1024
#define SIG_JSON_CONTENT_KEY_LEN 32
/** structure to store the json content with info on sig that triggered it */
typedef struct SigJsonContent {
void *id;
char json_content[SIG_JSON_CONTENT_ITEM_LEN];
} SigJsonContent;
/**
* Detection engine thread data.
*/
@ -1268,6 +1278,9 @@ typedef struct DetectEngineThreadCtx_ {
/* byte_* values */
uint64_t *byte_values;
SigJsonContent json_content[SIG_JSON_CONTENT_ARRAY_LEN];
size_t json_content_len;
/* counter for the filestore array below -- up here for cache reasons. */
uint16_t filestore_cnt;

@ -253,6 +253,15 @@ void AlertJsonHeader(const Packet *p, const PacketAlert *pa, SCJsonBuilder *js,
AlertJsonMetadata(pa, js);
}
if (pa->json_info.json_string != NULL) {
SCJbOpenObject(js, "extra");
const struct ExtraDataJsonList *json_info = &pa->json_info;
while (json_info) {
SCJbSetFormatted(js, json_info->json_string);
json_info = json_info->next;
}
SCJbClose(js);
}
if (flags & LOG_JSON_RULE) {
SCJbSetString(js, "rule", pa->s->sig_str);
}

@ -127,6 +127,7 @@ void PacketReinit(Packet *p)
p->alerts.discarded = 0;
p->alerts.suppressed = 0;
p->alerts.drop.action = 0;
PacketAlertRecycle(p->alerts.alerts);
p->pcap_cnt = 0;
p->tunnel_rtv_cnt = 0;
p->tunnel_tpr_cnt = 0;

@ -55,6 +55,7 @@
#include "conf-yaml-loader.h"
#include "datasets.h"
#include "datajson.h"
#include "runmode-unix-socket.h"
int unix_socket_mode_is_running = 0;
@ -806,6 +807,74 @@ TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data)
}
}
/**
* \brief Command to add data to a datajson
*
* \param cmd the content of command Arguments as a json_t object
* \param answer the json_t object that has to be used to answer
* \param data pointer to data defining the context here a PcapCommand::
*/
TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data)
{
/* 1 get dataset name */
json_t *narg = json_object_get(cmd, "setname");
if (!json_is_string(narg)) {
json_object_set_new(answer, "message", json_string("setname is not a string"));
return TM_ECODE_FAILED;
}
const char *set_name = json_string_value(narg);
/* 2 get the data type */
json_t *targ = json_object_get(cmd, "settype");
if (!json_is_string(targ)) {
json_object_set_new(answer, "message", json_string("settype is not a string"));
return TM_ECODE_FAILED;
}
const char *type = json_string_value(targ);
/* 3 get value */
json_t *varg = json_object_get(cmd, "datavalue");
if (!json_is_string(varg)) {
json_object_set_new(answer, "message", json_string("datavalue is not string"));
return TM_ECODE_FAILED;
}
const char *value = json_string_value(varg);
/* 4 get json */
json_t *jarg = json_object_get(cmd, "datajson");
if (!json_is_string(varg)) {
json_object_set_new(answer, "message", json_string("datajson is not string"));
return TM_ECODE_FAILED;
}
const char *json = json_string_value(jarg);
SCLogDebug("datajson-add: %s type %s value %s json %s", set_name, type, value, json);
enum DatasetTypes t = DatasetGetTypeFromString(type);
if (t == DATASET_TYPE_NOTSET) {
json_object_set_new(answer, "message", json_string("unknown settype"));
return TM_ECODE_FAILED;
}
Dataset *set = DatasetFind(set_name, t);
if (set == NULL) {
json_object_set_new(answer, "message", json_string("set not found or wrong type"));
return TM_ECODE_FAILED;
}
int r = DatajsonAddSerialized(set, value, json);
if (r == 1) {
json_object_set_new(answer, "message", json_string("data added"));
return TM_ECODE_OK;
} else if (r == 0) {
json_object_set_new(answer, "message", json_string("data already in set"));
return TM_ECODE_OK;
} else {
json_object_set_new(answer, "message", json_string("failed to add data"));
return TM_ECODE_FAILED;
}
}
static bool JsonU32Value(json_t *jarg, uint32_t *ret)
{
int64_t r = json_integer_value(jarg);

@ -38,6 +38,7 @@ TmEcode UnixSocketDatasetRemove(json_t *cmd, json_t* answer, void *data);
TmEcode UnixSocketDatasetDump(json_t *cmd, json_t *answer, void *data);
TmEcode UnixSocketDatasetClear(json_t *cmd, json_t *answer, void *data);
TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data);
TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data);
TmEcode UnixSocketRegisterTenantHandler(json_t *cmd, json_t* answer, void *data);
TmEcode UnixSocketUnregisterTenantHandler(json_t *cmd, json_t* answer, void *data);
TmEcode UnixSocketRegisterTenant(json_t *cmd, json_t* answer, void *data);

@ -1111,6 +1111,8 @@ int UnixManagerInit(void)
UnixManagerRegisterCommand("dataset-add", UnixSocketDatasetAdd, &command, UNIX_CMD_TAKE_ARGS);
UnixManagerRegisterCommand("dataset-remove", UnixSocketDatasetRemove, &command, UNIX_CMD_TAKE_ARGS);
UnixManagerRegisterCommand(
"dataset-add-json", UnixSocketDatajsonAdd, &command, UNIX_CMD_TAKE_ARGS);
UnixManagerRegisterCommand(
"get-flow-stats-by-id", UnixSocketGetFlowStatsById, &command, UNIX_CMD_TAKE_ARGS);
UnixManagerRegisterCommand("dataset-dump", UnixSocketDatasetDump, NULL, 0);

Loading…
Cancel
Save