You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
suricata/src/util-mpm.h

253 lines
9.1 KiB
C

/* Copyright (C) 2007-2010 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* \file
*
* \author Victor Julien <victor@inliniac.net>
*/
#ifndef __UTIL_MPM_H__
#define __UTIL_MPM_H__
#include "suricata-common.h"
#define MPM_ENDMATCH_SINGLE 0x01 /**< A single match is sufficient. No
depth, offset, etc settings. */
#define MPM_ENDMATCH_OFFSET 0x02 /**< has offset setting */
#define MPM_ENDMATCH_DEPTH 0x04 /**< has depth setting */
#define MPM_ENDMATCH_NOSEARCH 0x08 /**< if this matches, no search is
required (for this pattern) */
#define HASHSIZE_LOWEST 2048 /**< Lowest hash size for the multi
pattern matcher algorithms */
#define HASHSIZE_LOW 4096 /**< Low hash size for the multi
pattern matcher algorithms */
#define HASHSIZE_MEDIUM 8192 /**< Medium hash size for the multi
pattern matcher algorithms */
#define HASHSIZE_HIGH 16384 /**< High hash size for the multi
pattern matcher algorithms */
#define HASHSIZE_HIGHER 32768 /**< Higher hash size for the multi
pattern matcher algorithms */
#define HASHSIZE_MAX 65536 /**< Max hash size for the multi
pattern matcher algorithms */
#define BLOOMSIZE_LOW 512 /*<* Low bloomfilter size for the multi
pattern matcher algorithms */
#define BLOOMSIZE_MEDIUM 1024 /**< Medium bloomfilter size for the multi
pattern matcher algorithms */
#define BLOOMSIZE_HIGH 2048 /**< High bloomfilter size for the multi
pattern matcher algorithms */
enum {
MPM_NOTSET = 0,
/* wumanber as the name suggests */
MPM_WUMANBER,
/* bndmq 2 gram */
MPM_B2G,
/* bndmq 3 gram */
MPM_B3G,
15 years ago
MPM_B2GC,
MPM_B2GM,
/* aho-corasick */
MPM_AC,
#ifdef __SC_CUDA_SUPPORT__
MPM_AC_CUDA,
#endif
/* aho-corasick-goto-failure state based */
MPM_AC_GFBS,
MPM_AC_BS,
New Multi-pattern matcher, ac-tile, optimized for Tile architecture. Aho-Corasick mpm optimized for Tilera Tile-Gx architecture. Based on the util-mpm-ac.c code base. The primary optimizations are: 1) Matching function used Tilera specific instructions. 2) Alphabet compression to reduce delta table size to increase cache utilization and performance. The basic observation is that not all 256 ASCII characters are used by the set of multiple patterns in a group for which a DFA is created. The first reason is that Suricata's pattern matching is case-insensitive, so all uppercase characters are converted to lowercase, leaving a hole of 26 characters in the alphabet. Previously, this hole was simply left in the middle of the alphabet and thus in the generated Next State (delta) tables. A new, smaller, alphabet is created using a translation table of 256 bytes per mpm group. Previously, there was one global translation table for converting upper case to lowercase. Additional, unused characters are found by creating a histogram of all the characters in all the patterns. Then all the characters with zero counts are mapped to one character (0) in the new alphabet. Since These characters appear in no pattern, they can all be mapped to a single character and still result in the same matches being found. Zero was chosen for the value in the new alphabet since this "character" is more likely to appear in the input. The unused character always results in the next state being state zero, but that fact is not currently used by the code, since special casing takes additional instructions. The characters that do appear in some pattern are mapped to consecutive characters in the new alphabet, starting at 1. This results in a dense packing of next state values in the delta tables and additionally can allow for a smaller number of columns in that table, thus using less memory and better packing into the cache. The size of the new alphabet is the number of used characters plus 1 for the unused catch-all character. The alphabet size is rounded up to the next larger power-of-2 so that multiplication by the alphabet size can be done with a shift. It might be possible to use a multiply instruction, so that the exact alphabet size could be used, which would further reduce the size of the delta tables, increase cache density and not require the specialized search functions. The multiply would likely add 1 cycle to the inner search loop. Since the multiply by alphabet-size is cleverly merged with a mask instruction (in the SINDEX macro), specialized versions of the SCACSearch function are generated for alphabet sizes 256, 128, 64, 32 and 16. This is done by including the file util-mpm-ac-small.c multiple times with a redefined SINDEX macro. A function pointer is then stored in the mpm context for the search function. For alpha bit sizes of 8 or smaller, the number of states usually small, so the DFA is already very small, so there is little difference using the 16 state search function. The SCACSearch function is also specialized by the size of the value stored in the next state (delta) tables, either 16-bits or 32-bits. This removes a conditional inside the Search function. That conditional is only called once, but doesn't hurt to remove it. 16-bits are used for up to 32K states, with the sign bit set for states with matches. Future optimization: The state-has-match values is only needed per state, not per next state, so checking the next-state sign bit could be replaced with reading a different value, at the cost of an additional load, but increasing the 16-bit next state span to 64K. Since the order of the characters in the new alphabet doesn't matter, the new alphabet could be sorted by the frequency of the characters in the expected input stream for that multi-pattern matcher. This would group more frequent characters into the same cache lines, thus increasing the probability of reusing a cache-line. All the next state values for each state live in their own set of cache-lines. With power-of-two sizes alphabets, these don't overlap. So either 32 or 16 character's next states are loaded in each cache line load. If the alphabet size is not an exact power-of-2, then the last cache-line is not completely full and up to 31*2 bytes of that line could be wasted per state. The next state table could be transposed, so that all the next states for a specific character are stored sequentially, this could be better if some characters, for example the unused character, are much more frequent.
12 years ago
MPM_AC_TILE,
/* table size */
MPM_TABLE_SIZE,
};
New Multi-pattern matcher, ac-tile, optimized for Tile architecture. Aho-Corasick mpm optimized for Tilera Tile-Gx architecture. Based on the util-mpm-ac.c code base. The primary optimizations are: 1) Matching function used Tilera specific instructions. 2) Alphabet compression to reduce delta table size to increase cache utilization and performance. The basic observation is that not all 256 ASCII characters are used by the set of multiple patterns in a group for which a DFA is created. The first reason is that Suricata's pattern matching is case-insensitive, so all uppercase characters are converted to lowercase, leaving a hole of 26 characters in the alphabet. Previously, this hole was simply left in the middle of the alphabet and thus in the generated Next State (delta) tables. A new, smaller, alphabet is created using a translation table of 256 bytes per mpm group. Previously, there was one global translation table for converting upper case to lowercase. Additional, unused characters are found by creating a histogram of all the characters in all the patterns. Then all the characters with zero counts are mapped to one character (0) in the new alphabet. Since These characters appear in no pattern, they can all be mapped to a single character and still result in the same matches being found. Zero was chosen for the value in the new alphabet since this "character" is more likely to appear in the input. The unused character always results in the next state being state zero, but that fact is not currently used by the code, since special casing takes additional instructions. The characters that do appear in some pattern are mapped to consecutive characters in the new alphabet, starting at 1. This results in a dense packing of next state values in the delta tables and additionally can allow for a smaller number of columns in that table, thus using less memory and better packing into the cache. The size of the new alphabet is the number of used characters plus 1 for the unused catch-all character. The alphabet size is rounded up to the next larger power-of-2 so that multiplication by the alphabet size can be done with a shift. It might be possible to use a multiply instruction, so that the exact alphabet size could be used, which would further reduce the size of the delta tables, increase cache density and not require the specialized search functions. The multiply would likely add 1 cycle to the inner search loop. Since the multiply by alphabet-size is cleverly merged with a mask instruction (in the SINDEX macro), specialized versions of the SCACSearch function are generated for alphabet sizes 256, 128, 64, 32 and 16. This is done by including the file util-mpm-ac-small.c multiple times with a redefined SINDEX macro. A function pointer is then stored in the mpm context for the search function. For alpha bit sizes of 8 or smaller, the number of states usually small, so the DFA is already very small, so there is little difference using the 16 state search function. The SCACSearch function is also specialized by the size of the value stored in the next state (delta) tables, either 16-bits or 32-bits. This removes a conditional inside the Search function. That conditional is only called once, but doesn't hurt to remove it. 16-bits are used for up to 32K states, with the sign bit set for states with matches. Future optimization: The state-has-match values is only needed per state, not per next state, so checking the next-state sign bit could be replaced with reading a different value, at the cost of an additional load, but increasing the 16-bit next state span to 64K. Since the order of the characters in the new alphabet doesn't matter, the new alphabet could be sorted by the frequency of the characters in the expected input stream for that multi-pattern matcher. This would group more frequent characters into the same cache lines, thus increasing the probability of reusing a cache-line. All the next state values for each state live in their own set of cache-lines. With power-of-two sizes alphabets, these don't overlap. So either 32 or 16 character's next states are loaded in each cache line load. If the alphabet size is not an exact power-of-2, then the last cache-line is not completely full and up to 31*2 bytes of that line could be wasted per state. The next state table could be transposed, so that all the next states for a specific character are stored sequentially, this could be better if some characters, for example the unused character, are much more frequent.
12 years ago
#ifdef __tile__
#define DEFAULT_MPM MPM_AC_TILE
#else
#define DEFAULT_MPM MPM_AC
#endif
typedef struct MpmMatchBucket_ {
uint32_t len;
} MpmMatchBucket;
typedef struct MpmThreadCtx_ {
void *ctx;
uint32_t memory_cnt;
uint32_t memory_size;
} MpmThreadCtx;
/** \brief helper structure for the pattern matcher engine. The Pattern Matcher
* thread has this and passes a pointer to it to the pattern matcher.
* The actual pattern matcher will fill the structure. */
typedef struct PatternMatcherQueue_ {
uint32_t *pattern_id_array; /** array with pattern id's that had a
pattern match. These will be inspected
futher by the detection engine. */
uint32_t pattern_id_array_cnt;
uint32_t pattern_id_array_size; /**< size in bytes */
uint8_t *pattern_id_bitarray; /** bitarray with pattern id matches */
uint32_t pattern_id_bitarray_size; /**< size in bytes */
17 years ago
} PatternMatcherQueue;
typedef struct MpmCtx_ {
void *ctx;
uint16_t mpm_type;
/* Indicates if this a global mpm_ctx. Global mpm_ctx is the one that
* is instantiated when we use "single". Non-global is "full", i.e.
* one per sgh. We are using a uint16_t here to avoiding using a pad.
* You can use a uint8_t here as well. */
uint16_t global;
/* unique patterns */
uint32_t pattern_cnt;
uint16_t minlen;
uint16_t maxlen;
uint32_t memory_cnt;
uint32_t memory_size;
} MpmCtx;
/* if we want to retrieve an unique mpm context from the mpm context factory
* we should supply this as the key */
#define MPM_CTX_FACTORY_UNIQUE_CONTEXT -1
#define MPM_CTX_FACTORY_FLAGS_PREPARE_WITH_SIG_GROUP_BUILD 0x01
typedef struct MpmCtxFactoryItem_ {
char *name;
MpmCtx *mpm_ctx_ts;
MpmCtx *mpm_ctx_tc;
int32_t id;
uint8_t flags;
} MpmCtxFactoryItem;
typedef struct MpmCtxFactoryContainer_ {
MpmCtxFactoryItem *items;
int32_t no_of_items;
} MpmCtxFactoryContainer;
/** pattern is case insensitive */
#define MPM_PATTERN_FLAG_NOCASE 0x01
/** pattern is negated */
#define MPM_PATTERN_FLAG_NEGATED 0x02
/** pattern has a depth setting */
#define MPM_PATTERN_FLAG_DEPTH 0x04
/** pattern has an offset setting */
#define MPM_PATTERN_FLAG_OFFSET 0x08
/** one byte pattern (used in b2g) */
#define MPM_PATTERN_ONE_BYTE 0x10
typedef struct MpmTableElmt_ {
char *name;
uint8_t max_pattern_length;
void (*InitCtx)(struct MpmCtx_ *);
void (*InitThreadCtx)(struct MpmCtx_ *, struct MpmThreadCtx_ *, uint32_t);
void (*DestroyCtx)(struct MpmCtx_ *);
void (*DestroyThreadCtx)(struct MpmCtx_ *, struct MpmThreadCtx_ *);
/** function pointers for adding patterns to the mpm ctx.
*
* \param mpm_ctx Mpm context to add the pattern to
* \param pattern pointer to the pattern
* \param pattern_len length of the pattern in bytes
* \param offset pattern offset setting
* \param depth pattern depth setting
* \param pid pattern id
* \param sid signature _internal_ id
* \param flags pattern flags
*/
int (*AddPattern)(struct MpmCtx_ *, uint8_t *, uint16_t, uint16_t, uint16_t, uint32_t, uint32_t, uint8_t);
int (*AddPatternNocase)(struct MpmCtx_ *, uint8_t *, uint16_t, uint16_t, uint16_t, uint32_t, uint32_t, uint8_t);
int (*Prepare)(struct MpmCtx_ *);
uint32_t (*Search)(struct MpmCtx_ *, struct MpmThreadCtx_ *, PatternMatcherQueue *, uint8_t *, uint16_t);
void (*Cleanup)(struct MpmThreadCtx_ *);
void (*PrintCtx)(struct MpmCtx_ *);
void (*PrintThreadCtx)(struct MpmThreadCtx_ *);
void (*RegisterUnittests)(void);
uint8_t flags;
} MpmTableElmt;
16 years ago
MpmTableElmt mpm_table[MPM_TABLE_SIZE];
/* macros decides if cuda is enabled for the platform or not */
#ifdef __SC_CUDA_SUPPORT__
/* the min size limit of a payload(or any other data) to be buffered */
#define UTIL_MPM_CUDA_DATA_BUFFER_SIZE_MIN_LIMIT_DEFAULT 0
/* the max size limit of a payload(or any other data) to be buffered */
#define UTIL_MPM_CUDA_DATA_BUFFER_SIZE_MAX_LIMIT_DEFAULT 1500
/* Default value for data buffer used by cuda mpm engine for CudaBuffer reg */
#define UTIL_MPM_CUDA_CUDA_BUFFER_DBUFFER_SIZE_DEFAULT 500 * 1024 * 1024
/* Default value for the max data chunk that would be sent to gpu */
#define UTIL_MPM_CUDA_GPU_TRANSFER_SIZE 50 * 1024 * 1024
/* Default value for offset/pointer buffer to be used by cuda mpm
* engine for CudaBuffer reg */
#define UTIL_MPM_CUDA_CUDA_BUFFER_OPBUFFER_ITEMS_DEFAULT 500000
#define UTIL_MPM_CUDA_BATCHING_TIMEOUT_DEFAULT 2000
#define UTIL_MPM_CUDA_CUDA_STREAMS_DEFAULT 2
#define UTIL_MPM_CUDA_DEVICE_ID_DEFAULT 0
/**
* \brief Cuda configuration for "mpm" profile. We can further extend this
* to have conf for specific mpms. For now its common for all mpms.
*/
typedef struct MpmCudaConf_ {
uint16_t data_buffer_size_min_limit;
uint16_t data_buffer_size_max_limit;
uint32_t cb_buffer_size;
uint32_t gpu_transfer_size;
int batching_timeout;
int device_id;
int cuda_streams;
} MpmCudaConf;
void MpmCudaEnvironmentSetup();
#endif /* __SC_CUDA_SUPPORT__ */
struct DetectEngineCtx_;
int32_t MpmFactoryRegisterMpmCtxProfile(struct DetectEngineCtx_ *, const char *, uint8_t);
void MpmFactoryReClaimMpmCtx(struct DetectEngineCtx_ *, MpmCtx *);
MpmCtx *MpmFactoryGetMpmCtxForProfile(struct DetectEngineCtx_ *, int32_t, int);
void MpmFactoryDeRegisterAllMpmCtxProfiles(struct DetectEngineCtx_ *);
int32_t MpmFactoryIsMpmCtxAvailable(struct DetectEngineCtx_ *, MpmCtx *);
int PmqSetup(PatternMatcherQueue *, uint32_t, uint32_t);
void PmqMerge(PatternMatcherQueue *src, PatternMatcherQueue *dst);
void PmqReset(PatternMatcherQueue *);
void PmqCleanup(PatternMatcherQueue *);
void PmqFree(PatternMatcherQueue *);
void MpmTableSetup(void);
void MpmRegisterTests(void);
int MpmVerifyMatch(MpmThreadCtx *, PatternMatcherQueue *, uint32_t);
void MpmInitCtx(MpmCtx *mpm_ctx, uint16_t matcher);
void MpmInitThreadCtx(MpmThreadCtx *mpm_thread_ctx, uint16_t, uint32_t);
uint32_t MpmGetHashSize(const char *);
uint32_t MpmGetBloomSize(const char *);
#endif /* __UTIL_MPM_H__ */