diff --git a/rust/suricatasc/src/unix/commands.rs b/rust/suricatasc/src/unix/commands.rs index 707e934c8f..6b784e0079 100644 --- a/rust/suricatasc/src/unix/commands.rs +++ b/rust/suricatasc/src/unix/commands.rs @@ -71,12 +71,11 @@ impl<'a> CommandParser<'a> { } pub fn parse(&self, input: &str) -> Result { - let parts: Vec<&str> = input.split(' ').map(|s| s.trim()).collect(); + let mut parts: Vec<&str> = input.split(' ').map(|s| s.trim()).collect(); if parts.is_empty() { return Err(CommandParseError::Other("No command provided".to_string())); } let command = parts[0]; - let args = &parts[1..]; let spec = self .commands @@ -91,6 +90,13 @@ impl<'a> CommandParser<'a> { // Calculate the number of required arguments for better error reporting. let required = spec.iter().filter(|e| e.required).count(); + let optional = spec.iter().filter(|e| !e.required).count(); + // Handle the case where the command has only required arguments and allow + // last one to contain spaces. + if optional == 0 { + parts = input.splitn(required + 1, ' ').collect(); + } + let args = &parts[1..]; let mut json_args = HashMap::new(); @@ -386,6 +392,28 @@ fn command_defs() -> Result>, serde_json::Error> { "type": "string", }, ], + "dataset-add-json": [ + { + "name": "setname", + "required": true, + "type": "string", + }, + { + "name": "settype", + "required": true, + "type": "string", + }, + { + "name": "datavalue", + "required": true, + "type": "string", + }, + { + "name": "datajson", + "required": true, + "type": "string", + }, + ], "get-flow-stats-by-id": [ { "name": "flow_id", diff --git a/src/Makefile.am b/src/Makefile.am index c70254f3f3..8365f98451 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -54,6 +54,7 @@ noinst_HEADERS = \ conf-yaml-loader.h \ conf.h \ counters.h \ + datajson.h \ datasets-ipv4.h \ datasets-ipv6.h \ datasets-md5.h \ @@ -653,6 +654,7 @@ libsuricata_c_a_SOURCES = \ conf-yaml-loader.c \ conf.c \ counters.c \ + datajson.c \ datasets-ipv4.c \ datasets-ipv6.c \ datasets-md5.c \ diff --git a/src/datajson.c b/src/datajson.c new file mode 100644 index 0000000000..98cb838eae --- /dev/null +++ b/src/datajson.c @@ -0,0 +1,985 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#include "suricata-common.h" +#include "suricata.h" +#include "rust.h" +#include "datasets.h" +#include "datajson.h" +#include "datasets-ipv4.h" +#include "datasets-ipv6.h" +#include "datasets-md5.h" +#include "datasets-sha256.h" +#include "datasets-string.h" +#include "util-byte.h" +#include "util-ip.h" +#include "util-debug.h" + +static int DatajsonAdd( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json); + +static inline void DatajsonUnlockData(THashData *d) +{ + (void)THashDecrUsecnt(d); + THashDataUnlock(d); +} + +/* return true if number is a float or an integer */ +static bool IsFloat(const char *in, size_t ins) +{ + char *endptr; + float val = strtof(in, &endptr); + const char *end_ins = in + ins - 1; + if (val != 0 && (endptr == end_ins)) { + return true; + } + /* if value is 0 then we need to check if some parsing has been done */ + if (val == 0 && (endptr == in)) { + return false; + } + return true; +} + +static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out) +{ + json_error_t jerror; + json_t *msg = json_loads(in, 0, &jerror); + if (msg == NULL) { + /* JANSSON does not see an integer, float or a string as valid JSON. + So we need to exclude them from failure. */ + if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) { + SCLogWarning("dataset: Invalid json: %s: '%s'\n", jerror.text, in); + return -1; + } + } else { + json_decref(msg); + } + rep_out->len = ins; + rep_out->value = SCStrndup(in, ins); + if (rep_out->value == NULL) { + return -1; + } + return 0; +} + +static json_t *GetSubObjectByKey(json_t *json, const char *key) +{ + if (!json || !key || !json_is_object(json)) { + return NULL; + } + + const char *current_key = key; + json_t *current = json; + while (current_key) { + const char *dot = strchr(current_key, '.'); + + size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key); + char key_buffer[key_len + 1]; + strlcpy(key_buffer, current_key, key_len + 1); + + if (json_is_object(current) == false) { + return NULL; + } + current = json_object_get(current, key_buffer); + if (current == NULL) { + return NULL; + } + current_key = dot ? dot + 1 : NULL; + } + return current; +} + +static int ParseJsonFile(const char *file, json_t **array, char *key) +{ + json_t *json; + json_error_t error; + /* assume we have one single JSON element in FILE */ + json = json_load_file(file, 0, &error); + if (json == NULL) { + FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text); + return -1; + } + + if (key == NULL || strlen(key) == 0) { + *array = json; + } else { + *array = GetSubObjectByKey(json, key); + if (*array == NULL) { + SCLogError("dataset: %s failed to get key '%s'", file, key); + json_decref(json); + return -1; + } + json_incref(*array); + json_decref(json); + } + if (!json_is_array(*array)) { + FatalErrorOnInit("not an array"); + json_decref(*array); + return -1; + } + return 0; +} + +/** + * \retval 1 data was added to the hash + * \retval 0 data was not added to the hash as it is already there + * \retval -1 failed to add data to the hash + */ +static int DatajsonAddString( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json }; + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddMd5( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != SC_MD5_LEN) + return -2; + + Md5Type lookup = { .json = *json }; + memcpy(lookup.md5, data, SC_MD5_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddSha256( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != SC_SHA256_LEN) + return -2; + + Sha256Type lookup = { .json = *json }; + memcpy(lookup.sha256, data, SC_SHA256_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddIPv4( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len < SC_IPV4_LEN) + return -2; + + IPv4Type lookup = { .json = *json }; + memcpy(lookup.ipv4, data, SC_IPV4_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAddIPv6( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != SC_IPV6_LEN) + return -2; + + IPv6Type lookup = { .json = *json }; + memcpy(lookup.ipv6, data, SC_IPV6_LEN); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatajsonUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatajsonAdd( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json) +{ + if (set == NULL) + return -1; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatajsonAddString(set, data, data_len, json); + case DATASET_TYPE_MD5: + return DatajsonAddMd5(set, data, data_len, json); + case DATASET_TYPE_SHA256: + return DatajsonAddSha256(set, data, data_len, json); + case DATASET_TYPE_IPV4: + return DatajsonAddIPv4(set, data, data_len, json); + case DATASET_TYPE_IPV6: + return DatajsonAddIPv6(set, data, data_len, json); + default: + break; + } + return -1; +} + +static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = 0; + json_t *json; + bool found = false; + SCLogDebug("dataset: array_key '%s' %p", array_key, array_key); + if (ParseJsonFile(set->load, &json, array_key) == -1) { + SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load); + return -1; + } + + int add_ret; + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *val = json_string_value(key); + + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)val, strlen(val), &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return -1; + } + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadMd5FromJSON(Dataset *set, char *array_key, char *json_key) +{ + int add_ret; + uint32_t cnt = 0; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *hash_string = json_string_value(key); + if (strlen(hash_string) != SC_MD5_HEX_LEN) { + FatalErrorOnInit("Not correct length for a hash"); + continue; + } + + uint8_t hash[SC_MD5_LEN]; + if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) { + FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); + continue; + } + + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)hash, SC_MD5_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return -1; + } + + return cnt; +} + +static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = DatajsonLoadMd5FromJSON(set, array_key, json_key); + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadSHA256FromJSON(Dataset *set, char *array_key, char *json_key) +{ + int add_ret; + uint32_t cnt = 0; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *hash_string = json_string_value(key); + if (strlen(hash_string) != SC_SHA256_HEX_LEN) { + FatalErrorOnInit("Not correct length for a hash"); + continue; + } + + uint8_t hash[SC_SHA256_LEN]; + if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) { + FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); + continue; + } + + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)hash, SC_SHA256_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return -1; + } + return cnt; +} + +static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = DatajsonLoadSHA256FromJSON(set, array_key, json_key); + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadIPv4FromJSON(Dataset *set, char *array_key, char *json_key) +{ + uint32_t cnt = 0; + int add_ret; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *ip_string = json_string_value(key); + struct in_addr in; + if (inet_pton(AF_INET, ip_string, &in) != 1) { + FatalErrorOnInit( + "datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string); + continue; + } + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return 0; + } + + return cnt; +} + +static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + uint32_t cnt = DatajsonLoadIPv4FromJSON(set, array_key, json_key); + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static uint32_t DatajsonLoadIPv6FromJSON(Dataset *set, char *array_key, char *json_key) +{ + uint32_t cnt = 0; + int add_ret; + json_t *json; + bool found = false; + + if (ParseJsonFile(set->load, &json, array_key) == -1) + return -1; + + size_t index; + json_t *value; + json_array_foreach (json, index, value) { + json_t *key = GetSubObjectByKey(value, json_key); + if (key == NULL) { + /* ignore error as it can be a working mode where some entries + are not in the same format */ + continue; + } + + found = true; + + const char *ip_string = json_string_value(key); + struct in6_addr in6; + int ret = DatasetParseIpv6String(set, ip_string, &in6); + if (ret < 0) { + FatalErrorOnInit("unable to parse IP address"); + continue; + } + DataJsonType elt = { .value = NULL, .len = 0 }; + elt.value = json_dumps(value, JSON_COMPACT); + elt.len = strlen(elt.value); + + add_ret = DatajsonAdd(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &elt); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } + if (add_ret == 0) { + SCFree(elt.value); + } else { + cnt++; + } + } + json_decref(json); + + if (found == false) { + FatalErrorOnInit( + "No valid entries for key '%s' found in the file '%s'", json_key, set->load); + return 0; + } + return cnt; +} + +static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + + uint32_t cnt = DatajsonLoadIPv6FromJSON(set, array_key, json_key); + + THashConsolidateMemcap(set->hash); + + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize, char *json_key_value, char *json_array_key) +{ + uint64_t default_memcap = 0; + uint32_t default_hashsize = 0; + if (strlen(name) > DATASET_NAME_MAX_LEN) { + SCLogError("dataset name too long"); + return NULL; + } + + DatasetLock(); + Dataset *set = DatasetSearchByName(name); + if (set) { + if (type != DATASET_TYPE_NOTSET && set->type != type) { + SCLogError("dataset %s already " + "exists and is of type %u", + set->name, set->type); + DatasetUnlock(); + return NULL; + } + + if (load == NULL || strlen(load) == 0) { + // OK, rule keyword doesn't have to set state/load, + // even when yaml set has set it. + } else { + if ((load == NULL && strlen(set->load) > 0) || + (load != NULL && strcmp(set->load, load) != 0)) { + SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load); + DatasetUnlock(); + return NULL; + } + } + + DatasetUnlock(); + return set; + } + + if (type == DATASET_TYPE_NOTSET) { + SCLogError("dataset %s not defined", name); + goto out_err; + } + + set = DatasetAlloc(name); + if (set == NULL) { + SCLogError("dataset %s allocation failed", name); + goto out_err; + } + + strlcpy(set->name, name, sizeof(set->name)); + set->type = type; + if (load && strlen(load)) { + strlcpy(set->load, load, sizeof(set->load)); + SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load); + } + + static const char conf_format_str[] = "datasets.%s.hash"; + char cnf_name[DATASET_NAME_MAX_LEN + (sizeof(conf_format_str) / sizeof(char))]; + int p_ret = snprintf(cnf_name, sizeof(cnf_name), conf_format_str, name); + if (p_ret == 0) { + SCLogError("Can't build configuration variable for set: '%s'", name); + goto out_err; + } + + DatasetGetDefaultMemcap(&default_memcap, &default_hashsize); + switch (type) { + case DATASET_TYPE_MD5: + set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree, + Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadMd5(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + case DATASET_TYPE_STRING: + set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree, + StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadString(set, json_key_value, json_array_key) < 0) { + SCLogError("dataset %s loading failed", name); + goto out_err; + } + break; + case DATASET_TYPE_SHA256: + set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree, + Sha256StrHash, Sha256StrCompare, NULL, Sha256StrJsonGetLength, + load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadSha256(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV4: + set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash, + IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadIPv4(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV6: + set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash, + IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatajsonLoadIPv6(set, json_key_value, json_array_key) < 0) + goto out_err; + break; + } + + SCLogDebug( + "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load); + + DatasetAppendSet(set); + + DatasetUnlock(); + return set; +out_err: + if (set) { + if (set->hash) { + THashShutdown(set->hash); + } + SCFree(set); + } + DatasetUnlock(); + return NULL; +} + +static DataJsonResultType DatajsonLookupString( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + StringType lookup = { + .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0 + }; + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + StringType *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupMd5( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != SC_MD5_LEN) + return rrep; + + Md5Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.md5, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Md5Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupSha256( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != SC_SHA256_LEN) + return rrep; + + Sha256Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.sha256, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Sha256Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupIPv4( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != SC_IPV4_LEN) + return rrep; + + IPv4Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv4, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv4Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +static DataJsonResultType DatajsonLookupIPv6( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */ + if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN) + return rrep; + + IPv6Type lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv6, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv6Type *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatajsonUnlockData(rdata); + return rrep; + } + return rrep; +} + +DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = 0 } }; + if (set == NULL) + return rrep; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatajsonLookupString(set, data, data_len); + case DATASET_TYPE_MD5: + return DatajsonLookupMd5(set, data, data_len); + case DATASET_TYPE_SHA256: + return DatajsonLookupSha256(set, data, data_len); + case DATASET_TYPE_IPV4: + return DatajsonLookupIPv4(set, data, data_len); + case DATASET_TYPE_IPV6: + return DatajsonLookupIPv6(set, data, data_len); + default: + break; + } + return rrep; +} + +typedef int (*DatajsonOpFunc)( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json); + +static int DatajsonOpSerialized(Dataset *set, const char *string, const char *json, + DatajsonOpFunc DatajsonOpString, DatajsonOpFunc DatajsonOpMd5, + DatajsonOpFunc DatajsonOpSha256, DatajsonOpFunc DatajsonOpIPv4, + DatajsonOpFunc DatajsonOpIPv6) +{ + int ret; + + if (set == NULL) + return -1; + if (strlen(string) == 0) + return -1; + + DataJsonType jvalue = { .value = NULL, .len = 0 }; + if (json) { + if (ParseJsonLine(json, strlen(json), &jvalue) < 0) { + SCLogNotice("bad json value for dataset %s/%s", set->name, set->load); + return -1; + } + } + + switch (set->type) { + case DATASET_TYPE_STRING: { + uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(string)); + uint8_t decoded[decoded_size]; + uint32_t num_decoded = SCBase64Decode( + (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded); + if (num_decoded == 0) + goto operror; + ret = DatajsonOpString(set, decoded, num_decoded, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_MD5: { + if (strlen(string) != SC_MD5_HEX_LEN) + goto operror; + uint8_t hash[SC_MD5_LEN]; + if (HexToRaw((const uint8_t *)string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) + goto operror; + ret = DatajsonOpMd5(set, hash, SC_MD5_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_SHA256: { + if (strlen(string) != SC_SHA256_HEX_LEN) + goto operror; + uint8_t hash[SC_SHA256_LEN]; + if (HexToRaw((const uint8_t *)string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) + goto operror; + ret = DatajsonOpSha256(set, hash, SC_SHA256_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV4: { + struct in_addr in; + if (inet_pton(AF_INET, string, &in) != 1) + goto operror; + ret = DatajsonOpIPv4(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV6: { + struct in6_addr in6; + if (DatasetParseIpv6String(set, string, &in6) != 0) { + SCLogError("Dataset failed to import %s as IPv6", string); + goto operror; + } + ret = DatajsonOpIPv6(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + } + SCFree(jvalue.value); + return -1; +operror: + SCFree(jvalue.value); + return -2; +} + +/** \brief add serialized data to json set + * \retval int 1 added + * \retval int 0 already in hash + * \retval int -1 API error (not added) + * \retval int -2 DATA error + */ +int DatajsonAddSerialized(Dataset *set, const char *value, const char *json) +{ + return DatajsonOpSerialized(set, value, json, DatajsonAddString, DatajsonAddMd5, + DatajsonAddSha256, DatajsonAddIPv4, DatajsonAddIPv6); +} diff --git a/src/datajson.h b/src/datajson.h new file mode 100644 index 0000000000..449f1d0573 --- /dev/null +++ b/src/datajson.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef SURICATA_DATAJSON_H +#define SURICATA_DATAJSON_H + +#include +#include "datasets.h" + +#define DATAJSON_JSON_LENGTH 1024 + +typedef struct DataJsonType { + char *value; + size_t len; +} DataJsonType; + +typedef struct DataJsonResultType { + bool found; + DataJsonType json; +} DataJsonResultType; + +/* Common functions */ + +Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize, char *json_key_value, char *json_array_key); + +DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); + +int DatajsonAddSerialized(Dataset *set, const char *value, const char *json); + +#endif /* SURICATA_DATAJSON_H*/ diff --git a/src/datasets-ipv4.c b/src/datasets-ipv4.c index 67f8778fd2..92dd49ece7 100644 --- a/src/datasets-ipv4.c +++ b/src/datasets-ipv4.c @@ -38,6 +38,17 @@ int IPv4Set(void *dst, void *src) return 0; } +int IPv4JsonSet(void *dst, void *src) +{ + IPv4Type *src_s = src; + IPv4Type *dst_s = dst; + memcpy(dst_s->ipv4, src_s->ipv4, sizeof(dst_s->ipv4)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + bool IPv4Compare(void *a, void *b) { const IPv4Type *as = a; @@ -56,3 +67,17 @@ uint32_t IPv4Hash(uint32_t hash_seed, void *s) void IPv4Free(void *s) { } + +void IPv4JsonFree(void *s) +{ + const IPv4Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t IPv4JsonGetLength(void *s) +{ + const IPv4Type *as = s; + return as->json.len; +} diff --git a/src/datasets-ipv4.h b/src/datasets-ipv4.h index 4a840e9aa6..9bb0c16641 100644 --- a/src/datasets-ipv4.h +++ b/src/datasets-ipv4.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_IPV4_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct IPv4Type { uint8_t ipv4[4]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } IPv4Type; int IPv4Set(void *dst, void *src); +int IPv4JsonSet(void *dst, void *src); bool IPv4Compare(void *a, void *b); uint32_t IPv4Hash(uint32_t hash_seed, void *s); void IPv4Free(void *s); +void IPv4JsonFree(void *s); +uint32_t IPv4JsonGetLength(void *s); #endif /* SURICATA_DATASETS_IPV4_H */ diff --git a/src/datasets-ipv6.c b/src/datasets-ipv6.c index ac96374da7..2888b34826 100644 --- a/src/datasets-ipv6.c +++ b/src/datasets-ipv6.c @@ -24,6 +24,7 @@ #include "suricata-common.h" #include "conf.h" #include "datasets.h" +#include "datajson.h" #include "datasets-ipv6.h" #include "util-hash-lookup3.h" #include "util-thash.h" @@ -38,6 +39,17 @@ int IPv6Set(void *dst, void *src) return 0; } +int IPv6JsonSet(void *dst, void *src) +{ + IPv6Type *src_s = src; + IPv6Type *dst_s = dst; + memcpy(dst_s->ipv6, src_s->ipv6, sizeof(dst_s->ipv6)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + bool IPv6Compare(void *a, void *b) { const IPv6Type *as = a; @@ -56,3 +68,17 @@ uint32_t IPv6Hash(uint32_t hash_seed, void *s) void IPv6Free(void *s) { } + +void IPv6JsonFree(void *s) +{ + const IPv6Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t IPv6JsonGetLength(void *s) +{ + const IPv6Type *as = s; + return as->json.len; +} diff --git a/src/datasets-ipv6.h b/src/datasets-ipv6.h index c75ad194d6..4251c77a28 100644 --- a/src/datasets-ipv6.h +++ b/src/datasets-ipv6.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_IPV6_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct IPv6Type { uint8_t ipv6[16]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } IPv6Type; int IPv6Set(void *dst, void *src); +int IPv6JsonSet(void *dst, void *src); bool IPv6Compare(void *a, void *b); uint32_t IPv6Hash(uint32_t hash_seed, void *s); void IPv6Free(void *s); +void IPv6JsonFree(void *s); +uint32_t IPv6JsonGetLength(void *s); #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-md5.c b/src/datasets-md5.c index 28fd37d830..92bcae3442 100644 --- a/src/datasets-md5.c +++ b/src/datasets-md5.c @@ -24,6 +24,7 @@ #include "suricata-common.h" #include "conf.h" #include "datasets.h" +#include "datajson.h" #include "datasets-md5.h" #include "util-hash-lookup3.h" @@ -39,6 +40,16 @@ int Md5StrSet(void *dst, void *src) return 0; } +int Md5StrJsonSet(void *dst, void *src) +{ + Md5Type *src_s = src; + Md5Type *dst_s = dst; + memcpy(dst_s->md5, src_s->md5, sizeof(dst_s->md5)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + bool Md5StrCompare(void *a, void *b) { const Md5Type *as = a; @@ -57,3 +68,17 @@ uint32_t Md5StrHash(uint32_t hash_seed, void *s) void Md5StrFree(void *s) { } + +void Md5StrJsonFree(void *s) +{ + const Md5Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t Md5StrJsonGetLength(void *s) +{ + const Md5Type *as = s; + return as->json.len; +} diff --git a/src/datasets-md5.h b/src/datasets-md5.h index 88c1ff1dfd..5fdbd795c7 100644 --- a/src/datasets-md5.h +++ b/src/datasets-md5.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_MD5_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct Md5Type { uint8_t md5[16]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } Md5Type; int Md5StrSet(void *dst, void *src); +int Md5StrJsonSet(void *dst, void *src); bool Md5StrCompare(void *a, void *b); uint32_t Md5StrHash(uint32_t hash_seed, void *s); void Md5StrFree(void *s); +void Md5StrJsonFree(void *s); +uint32_t Md5StrJsonGetLength(void *s); #endif /* SURICATA_DATASETS_MD5_H */ diff --git a/src/datasets-sha256.c b/src/datasets-sha256.c index 240939c084..b7fd48aed1 100644 --- a/src/datasets-sha256.c +++ b/src/datasets-sha256.c @@ -24,6 +24,7 @@ #include "suricata-common.h" #include "conf.h" #include "datasets.h" +#include "datajson.h" #include "datasets-sha256.h" #include "util-hash-lookup3.h" #include "util-thash.h" @@ -37,6 +38,16 @@ int Sha256StrSet(void *dst, void *src) return 0; } +int Sha256StrJsonSet(void *dst, void *src) +{ + Sha256Type *src_s = src; + Sha256Type *dst_s = dst; + memcpy(dst_s->sha256, src_s->sha256, sizeof(dst_s->sha256)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + bool Sha256StrCompare(void *a, void *b) { Sha256Type *as = a; @@ -56,3 +67,17 @@ void Sha256StrFree(void *s) { // no dynamic data } + +void Sha256StrJsonFree(void *s) +{ + const Sha256Type *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} + +uint32_t Sha256StrJsonGetLength(void *s) +{ + const Sha256Type *as = s; + return as->json.len; +} diff --git a/src/datasets-sha256.h b/src/datasets-sha256.h index 4f99b85a96..16c5932b18 100644 --- a/src/datasets-sha256.h +++ b/src/datasets-sha256.h @@ -25,15 +25,22 @@ #define SURICATA_DATASETS_SHA256_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct Sha256Type { uint8_t sha256[32]; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; } Sha256Type; int Sha256StrSet(void *dst, void *src); +int Sha256StrJsonSet(void *dst, void *src); bool Sha256StrCompare(void *a, void *b); uint32_t Sha256StrHash(uint32_t hash_seed, void *s); void Sha256StrFree(void *s); +void Sha256StrJsonFree(void *s); +uint32_t Sha256StrJsonGetLength(void *s); #endif /* SURICATA_DATASETS_SHA256_H */ diff --git a/src/datasets-string.c b/src/datasets-string.c index da6e039bc3..c9c2b3b4be 100644 --- a/src/datasets-string.c +++ b/src/datasets-string.c @@ -73,6 +73,24 @@ int StringSet(void *dst, void *src) return 0; } +int StringJsonSet(void *dst, void *src) +{ + StringType *src_s = src; + StringType *dst_s = dst; + SCLogDebug("dst %p src %p, src_s->ptr %p src_s->len %u", dst, src, src_s->ptr, src_s->len); + + dst_s->len = src_s->len; + dst_s->ptr = SCMalloc(dst_s->len); + BUG_ON(dst_s->ptr == NULL); + memcpy(dst_s->ptr, src_s->ptr, dst_s->len); + + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + SCLogDebug("dst %p src %p, dst_s->ptr %p dst_s->len %u", dst, src, dst_s->ptr, dst_s->len); + return 0; +} + bool StringCompare(void *a, void *b) { const StringType *as = a; @@ -102,3 +120,18 @@ void StringFree(void *s) StringType *str = s; SCFree(str->ptr); } + +void StringJsonFree(void *s) +{ + StringType *str = s; + SCFree(str->ptr); + if (str->json.value) { + SCFree(str->json.value); + } +} + +uint32_t StringJsonGetLength(void *s) +{ + StringType *str = s; + return str->json.len + str->len; +} diff --git a/src/datasets-string.h b/src/datasets-string.h index 745754fc49..c3952d8ccf 100644 --- a/src/datasets-string.h +++ b/src/datasets-string.h @@ -25,18 +25,25 @@ #define SURICATA_DATASETS_STRING_H #include "datasets-reputation.h" +#include "datajson.h" typedef struct StringType { uint32_t len; - DataRepType rep; + union { + DataRepType rep; + DataJsonType json; + }; uint8_t *ptr; } StringType; int StringSet(void *dst, void *src); +int StringJsonSet(void *dst, void *src); bool StringCompare(void *a, void *b); uint32_t StringHash(uint32_t hash_seed, void *s); uint32_t StringGetLength(void *s); void StringFree(void *s); +void StringJsonFree(void *s); int StringAsBase64(const void *s, char *out, size_t out_size); +uint32_t StringJsonGetLength(void *s); #endif /* SURICATA_DATASETS_STRING_H */ diff --git a/src/datasets.c b/src/datasets.c index 7addb37274..90ba78ee4c 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -32,7 +32,9 @@ #include "datasets-md5.h" #include "datasets-sha256.h" #include "datasets-reputation.h" +#include "datajson.h" #include "util-conf.h" +#include "util-mem.h" #include "util-thash.h" #include "util-print.h" #include "util-byte.h" @@ -57,7 +59,6 @@ static inline void DatasetUnlockData(THashData *d) THashDataUnlock(d); } static bool DatasetIsStatic(const char *save, const char *load); -static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize); enum DatasetTypes DatasetGetTypeFromString(const char *s) { @@ -74,7 +75,23 @@ enum DatasetTypes DatasetGetTypeFromString(const char *s) return DATASET_TYPE_NOTSET; } -static Dataset *DatasetAlloc(const char *name) +void DatasetAppendSet(Dataset *set) +{ + set->next = sets; + sets = set; +} + +void DatasetLock(void) +{ + SCMutexLock(&sets_lock); +} + +void DatasetUnlock(void) +{ + SCMutexUnlock(&sets_lock); +} + +Dataset *DatasetAlloc(const char *name) { Dataset *set = SCCalloc(1, sizeof(*set)); if (set) { @@ -83,7 +100,7 @@ static Dataset *DatasetAlloc(const char *name) return set; } -static Dataset *DatasetSearchByName(const char *name) +Dataset *DatasetSearchByName(const char *name) { Dataset *set = sets; while (set) { @@ -118,7 +135,7 @@ static int DatasetLoadIPv4(Dataset *set) return 0; } -static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6) +int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6) { /* Checking IPv6 case */ char *got_colon = strchr(line, ':'); @@ -249,8 +266,8 @@ enum DatasetGetPathType { TYPE_LOAD, }; -static void DatasetGetPath(const char *in_path, - char *out_path, size_t out_size, enum DatasetGetPathType type) +static void DatasetGetPath( + const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type) { char path[PATH_MAX]; struct stat st; @@ -372,7 +389,7 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, } } - GetDefaultMemcap(&default_memcap, &default_hashsize); + DatasetGetDefaultMemcap(&default_memcap, &default_hashsize); if (hashsize == 0) { hashsize = default_hashsize; } @@ -547,7 +564,7 @@ void DatasetPostReloadCleanup(void) * despite 2048 commented out in the default yaml. */ #define DATASETS_HASHSIZE_DEFAULT 4096 -static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize) +void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize) { const char *str = NULL; if (SCConfGet("datasets.defaults.memcap", &str) == 1) { @@ -576,7 +593,7 @@ int DatasetsInit(void) SCConfNode *datasets = SCConfGetNode("datasets"); uint64_t default_memcap = 0; uint32_t default_hashsize = 0; - GetDefaultMemcap(&default_memcap, &default_hashsize); + DatasetGetDefaultMemcap(&default_memcap, &default_hashsize); if (datasets != NULL) { const char *str = NULL; if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) { @@ -1378,7 +1395,7 @@ static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc D } case DATASET_TYPE_IPV6: { struct in6_addr in6; - if (ParseIpv6String(set, string, &in6) != 0) { + if (DatasetParseIpv6String(set, string, &in6) != 0) { SCLogError("Dataset failed to import %s as IPv6", string); return -2; } diff --git a/src/datasets.h b/src/datasets.h index 1abfa889ba..60787ae9fe 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -28,6 +28,11 @@ void DatasetsSave(void); void DatasetReload(void); void DatasetPostReloadCleanup(void); +typedef enum { + DATASET_FORMAT_CSV = 0, + DATASET_FORMAT_JSON, +} DatasetFormats; + enum DatasetTypes { #define DATASET_TYPE_NOTSET 0 DATASET_TYPE_STRING = 1, @@ -53,6 +58,11 @@ typedef struct Dataset { } Dataset; enum DatasetTypes DatasetGetTypeFromString(const char *s); +void DatasetAppendSet(Dataset *set); +Dataset *DatasetAlloc(const char *name); +void DatasetLock(void); +void DatasetUnlock(void); +Dataset *DatasetSearchByName(const char *name); Dataset *DatasetFind(const char *name, enum DatasetTypes type); Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize); @@ -62,6 +72,9 @@ int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep); +void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize); +int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6); + int DatasetAddSerialized(Dataset *set, const char *string); int DatasetRemoveSerialized(Dataset *set, const char *string); int DatasetLookupSerialized(Dataset *set, const char *string); diff --git a/src/decode.c b/src/decode.c index d55d282242..164985930c 100644 --- a/src/decode.c +++ b/src/decode.c @@ -145,9 +145,38 @@ PacketAlert *PacketAlertCreate(void) return pa_array; } +void PacketAlertRecycle(PacketAlert *pa_array) +{ + if (pa_array != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + if (pa_array[i].json_info.next != NULL) { + struct ExtraDataJsonList *current_json = pa_array[i].json_info.next; + while (current_json) { + struct ExtraDataJsonList *next_json = current_json->next; + SCFree(current_json); + current_json = next_json; + } + } + pa_array[i].json_info.json_string = NULL; + pa_array[i].json_info.next = NULL; + } + } +} + void PacketAlertFree(PacketAlert *pa) { if (pa != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + /* first item is not allocated so start at second one */ + if (pa[i].json_info.next != NULL) { + struct ExtraDataJsonList *allocated_json = pa[i].json_info.next; + while (allocated_json) { + struct ExtraDataJsonList *next_json = allocated_json->next; + SCFree(allocated_json); + allocated_json = next_json; + } + } + } SCFree(pa); } } diff --git a/src/decode.h b/src/decode.h index 5a65805d4f..dccd3f48fc 100644 --- a/src/decode.h +++ b/src/decode.h @@ -238,6 +238,11 @@ typedef uint16_t Port; #define PKT_IS_TOSERVER(p) (((p)->flowflags & FLOW_PKT_TOSERVER)) #define PKT_IS_TOCLIENT(p) (((p)->flowflags & FLOW_PKT_TOCLIENT)) +struct ExtraDataJsonList { + char *json_string; + struct ExtraDataJsonList *next; +}; + /* structure to store the sids/gids/etc the detection engine * found in this packet */ typedef struct PacketAlert_ { @@ -247,6 +252,7 @@ typedef struct PacketAlert_ { const struct Signature_ *s; uint64_t tx_id; /* Used for sorting */ int64_t frame_id; + struct ExtraDataJsonList json_info; } PacketAlert; /** @@ -288,6 +294,7 @@ typedef struct PacketAlerts_ { } PacketAlerts; PacketAlert *PacketAlertCreate(void); +void PacketAlertRecycle(PacketAlert *pa_array); void PacketAlertFree(PacketAlert *pa); diff --git a/src/detect-dataset.c b/src/detect-dataset.c index 5d9a932bda..18a96f6380 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2020 Open Information Security Foundation +/* Copyright (C) 2018-2025 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -28,6 +28,7 @@ #include "detect.h" #include "threads.h" #include "datasets.h" +#include "datajson.h" #include "detect-dataset.h" #include "detect-parse.h" @@ -60,6 +61,51 @@ void DetectDatasetRegister (void) sigmatch_table[DETECT_DATASET].Free = DetectDatasetFree; } +/* + 1 match + 0 no match + -1 can't match + */ +static int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatasetData *sd, + const uint8_t *data, const uint32_t data_len) +{ + if (data == NULL || data_len == 0) + return 0; + + switch (sd->cmd) { + case DETECT_DATASET_CMD_ISSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatajsonLookup(sd->set, data, data_len); + SCLogDebug("r found: %d, len: %zu", r.found, r.json.len); + if (!r.found) + return 0; + if (r.json.len > 0) { + /* we need to add 3 on length check for the added quotes and colon when + building the json string */ + if ((det_ctx->json_content_len < SIG_JSON_CONTENT_ARRAY_LEN) && + (r.json.len + strlen(sd->json_key) + 3 < SIG_JSON_CONTENT_ITEM_LEN)) { + snprintf(det_ctx->json_content[det_ctx->json_content_len].json_content, + SIG_JSON_CONTENT_ITEM_LEN, "\"%s\":%s", sd->json_key, r.json.value); + det_ctx->json_content[det_ctx->json_content_len].id = sd->id; + det_ctx->json_content_len++; + } + } + return 1; + } + case DETECT_DATASET_CMD_ISNOTSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatajsonLookup(sd->set, data, data_len); + SCLogDebug("r found: %d, len: %zu", r.found, r.json.len); + if (r.found) + return 0; + return 1; + } + default: + DEBUG_VALIDATE_BUG_ON("unknown dataset with json command"); + } + return 0; +} + /* 1 match 0 no match @@ -72,6 +118,10 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, if (data == NULL || data_len == 0) return 0; + if (sd->format == DATASET_FORMAT_JSON) { + return DetectDatajsonBufferMatch(det_ctx, sd, data, data_len); + } + switch (sd->cmd) { case DETECT_DATASET_CMD_ISSET: { //PrintRawDataFp(stdout, data, data_len); @@ -110,18 +160,22 @@ int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *name, int name_len, enum DatasetTypes *type, char *load, size_t load_size, char *save, size_t save_size, - uint64_t *memcap, uint32_t *hashsize) + uint64_t *memcap, uint32_t *hashsize, DatasetFormats *format, char *value_key, + size_t value_key_size, char *array_key, size_t array_key_size, char *enrichment_key, + size_t enrichment_key_size) { bool cmd_set = false; bool name_set = false; bool load_set = false; bool save_set = false; bool state_set = false; + bool format_set = false; char copy[strlen(str)+1]; strlcpy(copy, str, sizeof(copy)); char *xsaveptr = NULL; char *key = strtok_r(copy, ",", &xsaveptr); + while (key != NULL) { while (*key != '\0' && isblank(*key)) { key++; @@ -203,7 +257,41 @@ static int DetectDatasetParse(const char *str, char *cmd, int cmd_len, char *nam strlcpy(load, val, load_size); strlcpy(save, val, save_size); state_set = true; + } else if (strcmp(key, "format") == 0) { + if (format_set) { + SCLogWarning("'format' can only appear once"); + return -1; + } + SCLogDebug("format %s", val); + if (strcmp(val, "csv") == 0) { + *format = DATASET_FORMAT_CSV; + } else if (strcmp(val, "json") == 0) { + *format = DATASET_FORMAT_JSON; + } else { + SCLogWarning("unknown format %s", val); + return -1; + } + format_set = true; + } else if (strcmp(key, "value_key") == 0) { + if (strlen(val) > value_key_size) { + SCLogWarning("'key' value too long (limit is %zu)", value_key_size); + return -1; + } + strlcpy(value_key, val, value_key_size); + } else if (strcmp(key, "array_key") == 0) { + if (strlen(val) > array_key_size) { + SCLogWarning("'key' value too long (limit is %zu)", array_key_size); + return -1; + } + strlcpy(array_key, val, array_key_size); + } else if (strcmp(key, "enrichment_key") == 0) { + if (strlen(val) > enrichment_key_size) { + SCLogWarning("'key' value too long (limit is %zu)", enrichment_key_size); + return -1; + } + strlcpy(enrichment_key, val, enrichment_key_size); } + if (strcmp(key, "memcap") == 0) { if (ParseSizeStringU64(val, memcap) < 0) { SCLogWarning("invalid value for memcap: %s," @@ -361,6 +449,10 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst enum DatasetTypes type = DATASET_TYPE_NOTSET; char load[PATH_MAX] = ""; char save[PATH_MAX] = ""; + DatasetFormats format = DATASET_FORMAT_CSV; + char value_key[SIG_JSON_CONTENT_KEY_LEN] = ""; + char array_key[SIG_JSON_CONTENT_KEY_LEN] = ""; + char enrichment_key[SIG_JSON_CONTENT_KEY_LEN] = ""; if (DetectBufferGetActiveList(de_ctx, s) == -1) { SCLogError("datasets are only supported for sticky buffers"); @@ -374,7 +466,9 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } if (!DetectDatasetParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load, - sizeof(load), save, sizeof(save), &memcap, &hashsize)) { + sizeof(load), save, sizeof(save), &memcap, &hashsize, &format, value_key, + sizeof(value_key), array_key, sizeof(array_key), enrichment_key, + sizeof(enrichment_key))) { return -1; } @@ -383,14 +477,33 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } else if (strcmp(cmd_str,"isnotset") == 0) { cmd = DETECT_DATASET_CMD_ISNOTSET; } else if (strcmp(cmd_str,"set") == 0) { + if (format == DATASET_FORMAT_JSON) { + SCLogError("json format is not supported for 'set' command"); + return -1; + } cmd = DETECT_DATASET_CMD_SET; } else if (strcmp(cmd_str,"unset") == 0) { + if (format == DATASET_FORMAT_JSON) { + SCLogError("json format is not supported for 'unset' command"); + return -1; + } cmd = DETECT_DATASET_CMD_UNSET; } else { SCLogError("dataset action \"%s\" is not supported.", cmd_str); return -1; } + if (format == DATASET_FORMAT_JSON) { + if (strlen(save) != 0) { + SCLogError("json format is not supported with 'save' or 'state' option"); + return -1; + } + if (strlen(enrichment_key) == 0) { + SCLogError("json format needs an 'enrichment_key' parameter"); + return -1; + } + } + /* if just 'load' is set, we load data from the same dir as the * rule file. If load+save is used, we use data dir */ if (strlen(save) == 0 && strlen(load) != 0) { @@ -411,7 +524,13 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst } SCLogDebug("name '%s' load '%s' save '%s'", name, load, save); - Dataset *set = DatasetGet(name, type, save, load, memcap, hashsize); + Dataset *set = NULL; + + if (format == DATASET_FORMAT_JSON) { + set = DatajsonGet(name, type, load, memcap, hashsize, value_key, array_key); + } else { + set = DatasetGet(name, type, save, load, memcap, hashsize); + } if (set == NULL) { SCLogError("failed to set up dataset '%s'.", name); return -1; @@ -423,6 +542,11 @@ int DetectDatasetSetup (DetectEngineCtx *de_ctx, Signature *s, const char *rawst cd->set = set; cd->cmd = cmd; + cd->format = format; + if (format == DATASET_FORMAT_JSON) { + strlcpy(cd->json_key, enrichment_key, sizeof(cd->json_key)); + } + cd->id = s; SCLogDebug("cmd %s, name %s", cmd_str, strlen(name) ? name : "(none)"); diff --git a/src/detect-dataset.h b/src/detect-dataset.h index 047a5b11cb..a4db07b219 100644 --- a/src/detect-dataset.h +++ b/src/detect-dataset.h @@ -25,10 +25,15 @@ #define SURICATA_DETECT_DATASET_H #include "datasets.h" +#include "datajson.h" typedef struct DetectDatasetData_ { Dataset *set; uint8_t cmd; + DatasetFormats format; + DataJsonType json; + char json_key[SIG_JSON_CONTENT_KEY_LEN]; + void *id; } DetectDatasetData; int DetectDatasetBufferMatch(DetectEngineThreadCtx *det_ctx, diff --git a/src/detect-engine-alert.c b/src/detect-engine-alert.c index 1378e8e0df..e3d67ae3b5 100644 --- a/src/detect-engine-alert.c +++ b/src/detect-engine-alert.c @@ -297,6 +297,30 @@ static inline PacketAlert PacketAlertSet( /* Set tx_id if the frame has it */ pa.tx_id = tx_id; pa.frame_id = (alert_flags & PACKET_ALERT_FLAG_FRAME) ? det_ctx->frame_id : 0; + pa.json_info.json_string = NULL; + pa.json_info.next = NULL; + if (det_ctx->json_content_len) { + /* We have some JSON attached in the current detection so let's try + to see if some need to be used for current signature. */ + struct ExtraDataJsonList *current_json = &pa.json_info; + for (size_t i = 0; i < det_ctx->json_content_len; i++) { + if (s == det_ctx->json_content[i].id) { + if (current_json->json_string != NULL) { + struct ExtraDataJsonList *next_json = + SCCalloc(1, sizeof(struct ExtraDataJsonList)); + if (next_json) { + current_json->next = next_json; + current_json = next_json; + current_json->next = NULL; + } else { + /* Allocation error, let's return now */ + return pa; + } + } + current_json->json_string = det_ctx->json_content[i].json_content; + } + } + } return pa; } diff --git a/src/detect-engine-register.h b/src/detect-engine-register.h index 0e5e52242c..ceb86ff98e 100644 --- a/src/detect-engine-register.h +++ b/src/detect-engine-register.h @@ -85,6 +85,7 @@ enum DetectKeywordId { DETECT_BYTE_EXTRACT, DETECT_DATASET, DETECT_DATAREP, + DETECT_DATAJSON, DETECT_BASE64_DECODE, DETECT_BASE64_DATA, DETECT_BSIZE, diff --git a/src/detect.c b/src/detect.c index 906a81200a..e6e297ca73 100644 --- a/src/detect.c +++ b/src/detect.c @@ -941,6 +941,7 @@ static DetectRunScratchpad DetectRunSetup(const DetectEngineCtx *de_ctx, det_ctx->base64_decoded_len = 0; det_ctx->raw_stream_progress = 0; det_ctx->match_array_cnt = 0; + det_ctx->json_content_len = 0; det_ctx->alert_queue_size = 0; p->alerts.drop.action = 0; diff --git a/src/detect.h b/src/detect.h index 7f08f50fb3..74ed556467 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1228,6 +1228,16 @@ typedef struct PostRuleMatchWorkQueue { uint32_t size; /**< allocation size in number of elements. */ } PostRuleMatchWorkQueue; +#define SIG_JSON_CONTENT_ARRAY_LEN 16 +#define SIG_JSON_CONTENT_ITEM_LEN 1024 +#define SIG_JSON_CONTENT_KEY_LEN 32 + +/** structure to store the json content with info on sig that triggered it */ +typedef struct SigJsonContent { + void *id; + char json_content[SIG_JSON_CONTENT_ITEM_LEN]; +} SigJsonContent; + /** * Detection engine thread data. */ @@ -1268,6 +1278,9 @@ typedef struct DetectEngineThreadCtx_ { /* byte_* values */ uint64_t *byte_values; + SigJsonContent json_content[SIG_JSON_CONTENT_ARRAY_LEN]; + size_t json_content_len; + /* counter for the filestore array below -- up here for cache reasons. */ uint16_t filestore_cnt; diff --git a/src/output-json-alert.c b/src/output-json-alert.c index ed2bbb584a..003e3f56c5 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -253,6 +253,15 @@ void AlertJsonHeader(const Packet *p, const PacketAlert *pa, SCJsonBuilder *js, AlertJsonMetadata(pa, js); } + if (pa->json_info.json_string != NULL) { + SCJbOpenObject(js, "extra"); + const struct ExtraDataJsonList *json_info = &pa->json_info; + while (json_info) { + SCJbSetFormatted(js, json_info->json_string); + json_info = json_info->next; + } + SCJbClose(js); + } if (flags & LOG_JSON_RULE) { SCJbSetString(js, "rule", pa->s->sig_str); } diff --git a/src/packet.c b/src/packet.c index 5254c8cde9..1bff85a6fc 100644 --- a/src/packet.c +++ b/src/packet.c @@ -127,6 +127,7 @@ void PacketReinit(Packet *p) p->alerts.discarded = 0; p->alerts.suppressed = 0; p->alerts.drop.action = 0; + PacketAlertRecycle(p->alerts.alerts); p->pcap_cnt = 0; p->tunnel_rtv_cnt = 0; p->tunnel_tpr_cnt = 0; diff --git a/src/runmode-unix-socket.c b/src/runmode-unix-socket.c index e45fb3ae1e..8c28e4de10 100644 --- a/src/runmode-unix-socket.c +++ b/src/runmode-unix-socket.c @@ -55,6 +55,7 @@ #include "conf-yaml-loader.h" #include "datasets.h" +#include "datajson.h" #include "runmode-unix-socket.h" int unix_socket_mode_is_running = 0; @@ -806,6 +807,74 @@ TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data) } } +/** + * \brief Command to add data to a datajson + * + * \param cmd the content of command Arguments as a json_t object + * \param answer the json_t object that has to be used to answer + * \param data pointer to data defining the context here a PcapCommand:: + */ +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data) +{ + /* 1 get dataset name */ + json_t *narg = json_object_get(cmd, "setname"); + if (!json_is_string(narg)) { + json_object_set_new(answer, "message", json_string("setname is not a string")); + return TM_ECODE_FAILED; + } + const char *set_name = json_string_value(narg); + + /* 2 get the data type */ + json_t *targ = json_object_get(cmd, "settype"); + if (!json_is_string(targ)) { + json_object_set_new(answer, "message", json_string("settype is not a string")); + return TM_ECODE_FAILED; + } + const char *type = json_string_value(targ); + + /* 3 get value */ + json_t *varg = json_object_get(cmd, "datavalue"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datavalue is not string")); + return TM_ECODE_FAILED; + } + const char *value = json_string_value(varg); + + /* 4 get json */ + json_t *jarg = json_object_get(cmd, "datajson"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datajson is not string")); + return TM_ECODE_FAILED; + } + const char *json = json_string_value(jarg); + + SCLogDebug("datajson-add: %s type %s value %s json %s", set_name, type, value, json); + + enum DatasetTypes t = DatasetGetTypeFromString(type); + if (t == DATASET_TYPE_NOTSET) { + json_object_set_new(answer, "message", json_string("unknown settype")); + return TM_ECODE_FAILED; + } + + Dataset *set = DatasetFind(set_name, t); + if (set == NULL) { + json_object_set_new(answer, "message", json_string("set not found or wrong type")); + return TM_ECODE_FAILED; + } + + int r = DatajsonAddSerialized(set, value, json); + if (r == 1) { + json_object_set_new(answer, "message", json_string("data added")); + return TM_ECODE_OK; + } else if (r == 0) { + json_object_set_new(answer, "message", json_string("data already in set")); + return TM_ECODE_OK; + } else { + json_object_set_new(answer, "message", json_string("failed to add data")); + return TM_ECODE_FAILED; + } +} + static bool JsonU32Value(json_t *jarg, uint32_t *ret) { int64_t r = json_integer_value(jarg); diff --git a/src/runmode-unix-socket.h b/src/runmode-unix-socket.h index 8ea432f57d..dfd76b985d 100644 --- a/src/runmode-unix-socket.h +++ b/src/runmode-unix-socket.h @@ -38,6 +38,7 @@ TmEcode UnixSocketDatasetRemove(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketDatasetDump(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetClear(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketRegisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketUnregisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketRegisterTenant(json_t *cmd, json_t* answer, void *data); diff --git a/src/unix-manager.c b/src/unix-manager.c index daa05a2b92..067a89fb94 100644 --- a/src/unix-manager.c +++ b/src/unix-manager.c @@ -1111,6 +1111,8 @@ int UnixManagerInit(void) UnixManagerRegisterCommand("dataset-add", UnixSocketDatasetAdd, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-remove", UnixSocketDatasetRemove, &command, UNIX_CMD_TAKE_ARGS); + UnixManagerRegisterCommand( + "dataset-add-json", UnixSocketDatajsonAdd, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand( "get-flow-stats-by-id", UnixSocketGetFlowStatsById, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-dump", UnixSocketDatasetDump, NULL, 0);