|
|
|
/* Copyright (C) 2017 Open Information Security Foundation
|
|
|
|
*
|
|
|
|
* You can copy, redistribute or modify this Program under the terms of
|
|
|
|
* the GNU General Public License version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* version 2 along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
* 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef SURICATA_DATASETS_H
|
|
|
|
#define SURICATA_DATASETS_H
|
|
|
|
|
|
|
|
#include "util-thash.h"
|
datasets: move initial file reading to rust
In a recent warning reported by scan-build, datasets were found to be
using a blocking call in a critical section.
datasets.c:187:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
187 | while (fgets(line, (int)sizeof(line), fp) != NULL) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:292:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
292 | while (fgets(line, (int)sizeof(line), fp) != NULL) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:368:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
368 | while (fgets(line, (int)sizeof(line), fp) != NULL) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:442:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
442 | while (fgets(line, (int)sizeof(line), fp) != NULL) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:512:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
512 | while (fgets(line, (int)sizeof(line), fp) != NULL) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 warnings generated.
These calls are blocking in the multi tenant mode where several tenants
may be trying to load the same dataset in parallel.
In a single tenant mode, this operation is performed as a part of a
single thread before the engine startup.
In order to evade the warning and simplify the code, the initial file
reading is moved to Rust with this commit with a much simpler handling
of dataset and datarep.
Bug 7398
7 months ago
|
|
|
#include "rust.h"
|
|
|
|
#include "datasets-reputation.h"
|
|
|
|
|
|
|
|
int DatasetsInit(void);
|
|
|
|
void DatasetsDestroy(void);
|
|
|
|
void DatasetsSave(void);
|
|
|
|
void DatasetReload(void);
|
|
|
|
void DatasetPostReloadCleanup(void);
|
|
|
|
|
datajson: introduce feature
This patch introduces new option to dataset keyword.
Where regular dataset allows match from sets, dataset with json
format allows the same but also adds JSON data to the alert
event. This data is coming from the set definition it self.
For example, an ipv4 set will look like:
[{"ip": "10.16.1.11", "test": "success","context":3}]
The syntax is a JSON array but it can also be a JSON object
with an array inside. The idea is to directly used data coming
from the API of a threat intel management software.
The syntax of the keyword is the following:
dataset:isset,src_ip,type ip,load src.lst,format json, \
enrichment_key src_ip, value_key ip;
Compare to dataset, it just have a supplementary option key
that is used to indicate in which subobject the JSON value
should be added.
The information is added in the even under the alert.extra
subobject:
"alert": {
"extra": {
"src_ip": {
"ip": "10.6.1.11",
"test": "success",
"context": 3
},
The main interest of the feature is to be able to contextualize
a match. For example, if you have an IOC source, you can do
[
{"buffer": "value1", "actor":"APT28","Country":"FR"},
{"buffer": "value2", "actor":"APT32","Country":"NL"}
]
This way, a single dataset is able to produce context to the
event where it was not possible before and multiple signatures
had to be used.
The format introduced in datajson is an evolution of the
historical datarep format. This has some limitations. For example,
if a user fetch IOCs from a threat intel server there is a large
change that the format will be JSON or XML. Suricata has no support
for the second but can support the first one.
Keeping the key value may seem redundant but it is useful to have it
directly accessible in the extra data to be able to query it
independantly of the signature (where it can be multiple metadata
or even be a transformed metadata).
In some case, when interacting with data (mostly coming from
threat intel servers), the JSON array containing the data
to use is not at the root of the object and it is ncessary
to access a subobject.
This patch implements this with support of key in level1.level2.
This is done via the `array_key` option that contains the path
to the data.
Ticket: #7372
5 months ago
|
|
|
typedef enum {
|
|
|
|
DATASET_FORMAT_CSV = 0,
|
|
|
|
DATASET_FORMAT_JSON, /* File contains one single JSON object */
|
|
|
|
DATASET_FORMAT_NDJSON, /* Newline Delimited JSON */
|
datajson: introduce feature
This patch introduces new option to dataset keyword.
Where regular dataset allows match from sets, dataset with json
format allows the same but also adds JSON data to the alert
event. This data is coming from the set definition it self.
For example, an ipv4 set will look like:
[{"ip": "10.16.1.11", "test": "success","context":3}]
The syntax is a JSON array but it can also be a JSON object
with an array inside. The idea is to directly used data coming
from the API of a threat intel management software.
The syntax of the keyword is the following:
dataset:isset,src_ip,type ip,load src.lst,format json, \
enrichment_key src_ip, value_key ip;
Compare to dataset, it just have a supplementary option key
that is used to indicate in which subobject the JSON value
should be added.
The information is added in the even under the alert.extra
subobject:
"alert": {
"extra": {
"src_ip": {
"ip": "10.6.1.11",
"test": "success",
"context": 3
},
The main interest of the feature is to be able to contextualize
a match. For example, if you have an IOC source, you can do
[
{"buffer": "value1", "actor":"APT28","Country":"FR"},
{"buffer": "value2", "actor":"APT32","Country":"NL"}
]
This way, a single dataset is able to produce context to the
event where it was not possible before and multiple signatures
had to be used.
The format introduced in datajson is an evolution of the
historical datarep format. This has some limitations. For example,
if a user fetch IOCs from a threat intel server there is a large
change that the format will be JSON or XML. Suricata has no support
for the second but can support the first one.
Keeping the key value may seem redundant but it is useful to have it
directly accessible in the extra data to be able to query it
independantly of the signature (where it can be multiple metadata
or even be a transformed metadata).
In some case, when interacting with data (mostly coming from
threat intel servers), the JSON array containing the data
to use is not at the root of the object and it is ncessary
to access a subobject.
This patch implements this with support of key in level1.level2.
This is done via the `array_key` option that contains the path
to the data.
Ticket: #7372
5 months ago
|
|
|
} DatasetFormats;
|
|
|
|
|
|
|
|
enum DatasetTypes {
|
|
|
|
#define DATASET_TYPE_NOTSET 0
|
|
|
|
DATASET_TYPE_STRING = 1,
|
|
|
|
DATASET_TYPE_MD5,
|
|
|
|
DATASET_TYPE_SHA256,
|
|
|
|
DATASET_TYPE_IPV4,
|
|
|
|
DATASET_TYPE_IPV6,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define DATASET_NAME_MAX_LEN 63
|
|
|
|
typedef struct Dataset {
|
|
|
|
char name[DATASET_NAME_MAX_LEN + 1];
|
|
|
|
enum DatasetTypes type;
|
|
|
|
uint32_t id;
|
|
|
|
bool from_yaml; /* Mark whether the set was retrieved from YAML */
|
|
|
|
bool hidden; /* Mark the old sets hidden in case of reload */
|
datajson: add remove_key option to dataset
This option allows to remove the key corresponding to the match
value from the JSON object before creating the JSON object that
will be added to the `extra` data.
For example, matching on the following JSON on the `ip` key:
```json
{"ip": "10.16.1.11", "test": "success", "context":3}
```
with a match like:
```
dataset:isset,src_ip,type ip,load src.lst,format jsonline,enrichment_key src_ip,value_key ip;
```
will produce the following:
```json
"extra": {
"src_ip": {
"ip": "10.16.1.11",
"test": "success",
"context": 3
}
```
if we add the `remove_key` option to the match:
```
dataset:isset,src_ip,type ip,load src.lst,format jsonline,enrichment_key src_ip,value_key ip, remove_key;
```
it will produce the following:
```json
"extra": {
"src_ip": {
"test": "success",
"context": 3
}
```
The option is set to false by default.
Ticket: #7372
4 months ago
|
|
|
bool remove_key; /* Mark that value key should be removed from extra data */
|
|
|
|
THashTableContext *hash;
|
|
|
|
|
|
|
|
char load[PATH_MAX];
|
|
|
|
char save[PATH_MAX];
|
|
|
|
|
|
|
|
struct Dataset *next;
|
|
|
|
} Dataset;
|
|
|
|
|
|
|
|
enum DatasetTypes DatasetGetTypeFromString(const char *s);
|
|
|
|
int DatasetAppendSet(Dataset *set);
|
datajson: introduce feature
This patch introduces new option to dataset keyword.
Where regular dataset allows match from sets, dataset with json
format allows the same but also adds JSON data to the alert
event. This data is coming from the set definition it self.
For example, an ipv4 set will look like:
[{"ip": "10.16.1.11", "test": "success","context":3}]
The syntax is a JSON array but it can also be a JSON object
with an array inside. The idea is to directly used data coming
from the API of a threat intel management software.
The syntax of the keyword is the following:
dataset:isset,src_ip,type ip,load src.lst,format json, \
enrichment_key src_ip, value_key ip;
Compare to dataset, it just have a supplementary option key
that is used to indicate in which subobject the JSON value
should be added.
The information is added in the even under the alert.extra
subobject:
"alert": {
"extra": {
"src_ip": {
"ip": "10.6.1.11",
"test": "success",
"context": 3
},
The main interest of the feature is to be able to contextualize
a match. For example, if you have an IOC source, you can do
[
{"buffer": "value1", "actor":"APT28","Country":"FR"},
{"buffer": "value2", "actor":"APT32","Country":"NL"}
]
This way, a single dataset is able to produce context to the
event where it was not possible before and multiple signatures
had to be used.
The format introduced in datajson is an evolution of the
historical datarep format. This has some limitations. For example,
if a user fetch IOCs from a threat intel server there is a large
change that the format will be JSON or XML. Suricata has no support
for the second but can support the first one.
Keeping the key value may seem redundant but it is useful to have it
directly accessible in the extra data to be able to query it
independantly of the signature (where it can be multiple metadata
or even be a transformed metadata).
In some case, when interacting with data (mostly coming from
threat intel servers), the JSON array containing the data
to use is not at the root of the object and it is ncessary
to access a subobject.
This patch implements this with support of key in level1.level2.
This is done via the `array_key` option that contains the path
to the data.
Ticket: #7372
5 months ago
|
|
|
Dataset *DatasetAlloc(const char *name);
|
|
|
|
void DatasetLock(void);
|
|
|
|
void DatasetUnlock(void);
|
|
|
|
Dataset *DatasetSearchByName(const char *name);
|
|
|
|
Dataset *DatasetFind(const char *name, enum DatasetTypes type);
|
|
|
|
Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
|
|
|
|
uint64_t memcap, uint32_t hashsize);
|
|
|
|
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load,
|
|
|
|
uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set);
|
|
|
|
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len);
|
|
|
|
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len);
|
|
|
|
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len);
|
|
|
|
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
|
|
|
|
const DataRepType *rep);
|
|
|
|
|
datajson: introduce feature
This patch introduces new option to dataset keyword.
Where regular dataset allows match from sets, dataset with json
format allows the same but also adds JSON data to the alert
event. This data is coming from the set definition it self.
For example, an ipv4 set will look like:
[{"ip": "10.16.1.11", "test": "success","context":3}]
The syntax is a JSON array but it can also be a JSON object
with an array inside. The idea is to directly used data coming
from the API of a threat intel management software.
The syntax of the keyword is the following:
dataset:isset,src_ip,type ip,load src.lst,format json, \
enrichment_key src_ip, value_key ip;
Compare to dataset, it just have a supplementary option key
that is used to indicate in which subobject the JSON value
should be added.
The information is added in the even under the alert.extra
subobject:
"alert": {
"extra": {
"src_ip": {
"ip": "10.6.1.11",
"test": "success",
"context": 3
},
The main interest of the feature is to be able to contextualize
a match. For example, if you have an IOC source, you can do
[
{"buffer": "value1", "actor":"APT28","Country":"FR"},
{"buffer": "value2", "actor":"APT32","Country":"NL"}
]
This way, a single dataset is able to produce context to the
event where it was not possible before and multiple signatures
had to be used.
The format introduced in datajson is an evolution of the
historical datarep format. This has some limitations. For example,
if a user fetch IOCs from a threat intel server there is a large
change that the format will be JSON or XML. Suricata has no support
for the second but can support the first one.
Keeping the key value may seem redundant but it is useful to have it
directly accessible in the extra data to be able to query it
independantly of the signature (where it can be multiple metadata
or even be a transformed metadata).
In some case, when interacting with data (mostly coming from
threat intel servers), the JSON array containing the data
to use is not at the root of the object and it is ncessary
to access a subobject.
This patch implements this with support of key in level1.level2.
This is done via the `array_key` option that contains the path
to the data.
Ticket: #7372
5 months ago
|
|
|
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
|
|
|
|
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6);
|
|
|
|
|
|
|
|
int DatasetAddSerialized(Dataset *set, const char *string);
|
|
|
|
int DatasetRemoveSerialized(Dataset *set, const char *string);
|
|
|
|
int DatasetLookupSerialized(Dataset *set, const char *string);
|
|
|
|
|
|
|
|
#endif /* SURICATA_DATASETS_H */
|