Adding single pattern matcher algorithms. If you cannot store a context for the patterns, use SpmSearch() macro. Adding unittests and stats

remotes/origin/master-1.0.x
Pablo Rincon 16 years ago committed by Victor Julien
parent cae8e06cb9
commit 705471e4ee

@ -101,7 +101,10 @@ detect-dce-opnum.c detect-dce-opnum.h \
detect-dce-stub-data.c detect-dce-stub-data.h \
util-print.c util-print.h \
util-mpm.c util-mpm.h \
util-binsearch.c util-binsearch.h \
util-spm.c util-spm.h util-clock.h \
util-spm-bs.c util-spm-bs.h \
util-spm-bs2bm.c util-spm-bs2bm.h \
util-spm-bm.c util-spm-bm.h \
util-mpm-wumanber.c util-mpm-wumanber.h \
util-mpm-b2g.c util-mpm-b2g.h \
util-mpm-b3g.c util-mpm-b3g.h \

@ -21,7 +21,7 @@
#include "app-layer-protos.h"
#include "app-layer-parser.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "app-layer-dcerpc.h"

@ -23,7 +23,7 @@
#include "app-layer-parser.h"
#include "app-layer-ftp.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "util-debug.h"

@ -24,7 +24,7 @@
#include "app-layer-parser.h"
#include "app-layer-htp.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "util-debug.h"
#include "app-layer-htp.h"

@ -15,7 +15,7 @@
#include "app-layer-protos.h"
#include "app-layer-parser.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "util-debug.h"

@ -16,7 +16,7 @@
#include "app-layer-protos.h"
#include "app-layer-parser.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-debug.h"
@ -302,7 +302,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
pstate->store_len, delim_len);
if (pstate->store_len == 0) {
uint8_t *ptr = BinSearch(input, input_len, delim, delim_len);
uint8_t *ptr = SpmSearch(input, input_len, (uint8_t*)delim, delim_len);
if (ptr != NULL) {
uint32_t len = ptr - input;
SCLogDebug(" len %" PRIu32 "", len);
@ -333,7 +333,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
pstate->store_len = input_len;
}
} else {
uint8_t *ptr = BinSearch(input, input_len, delim, delim_len);
uint8_t *ptr = SpmSearch(input, input_len, (uint8_t*)delim, delim_len);
if (ptr != NULL) {
uint32_t len = ptr - input;
SCLogDebug("len %" PRIu32 " + %" PRIu32 " = %" PRIu32 "", len,
@ -378,7 +378,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
SCLogDebug("input_len < delim_len, checking pstate->store");
if (pstate->store_len >= delim_len) {
ptr = BinSearch(pstate->store, pstate->store_len, delim,
ptr = SpmSearch(pstate->store, pstate->store_len, (uint8_t*)delim,
delim_len);
if (ptr != NULL) {
SCLogDebug("now we found the delim");
@ -427,7 +427,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
if (delim_len > input_len && delim_len <= pstate->store_len) {
SCLogDebug("input_len < delim_len, checking pstate->store");
ptr = BinSearch(pstate->store, pstate->store_len, delim, delim_len);
ptr = SpmSearch(pstate->store, pstate->store_len, (uint8_t*)delim, delim_len);
if (ptr != NULL) {
SCLogDebug("now we found the delim");

@ -21,7 +21,7 @@
#include "app-layer-protos.h"
#include "app-layer-parser.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "app-layer-smb.h"

@ -20,7 +20,7 @@
#include "app-layer-protos.h"
#include "app-layer-parser.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "util-debug.h"

@ -26,7 +26,7 @@
#include "app-layer-tls.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-unittest.h"
#include "util-debug.h"
#include "flow-private.h"

@ -12,7 +12,7 @@
#include "flow.h"
#include "flow-bit.h"
#include "detect-flowbits.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "detect-parse.h"
#include "detect-engine.h"

@ -12,7 +12,7 @@
#include "flow.h"
#include "flow-var.h"
#include "detect-flowint.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-var-name.h"
#include "util-debug.h"
#include "util-unittest.h"

@ -10,7 +10,7 @@
#include "flow.h"
#include "flow-var.h"
#include "detect-flowvar.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-var-name.h"
#include "util-debug.h"
@ -68,7 +68,7 @@ int DetectFlowvarMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx, Packet *p
FlowVar *fv = FlowVarGet(p->flow, fd->idx);
if (fv != NULL) {
uint8_t *ptr = BinSearch(fv->data.fv_str.value,
uint8_t *ptr = SpmSearch(fv->data.fv_str.value,
fv->data.fv_str.value_len,
fd->content, fd->content_len);
if (ptr != NULL)

@ -20,7 +20,7 @@
#include "util-debug.h"
#include "util-unittest.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-print.h"
#include "app-layer.h"
@ -114,7 +114,7 @@ int DetectHttpCookieMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx,
SCLogDebug("we have a cookie header");
if (BinSearch((const uint8_t *)bstr_ptr(h->value), bstr_size(h->value), co->data,
if (SpmSearch((uint8_t *)bstr_ptr(h->value), bstr_size(h->value), co->data,
co->data_len) != NULL)
{
SCLogDebug("match has been found in received request and given http_"

@ -21,7 +21,7 @@
#include "util-debug.h"
#include "util-unittest.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "app-layer.h"
@ -99,7 +99,7 @@ int DetectHttpMethodMatch(ThreadVars *t, DetectEngineThreadCtx *det_ctx,
const uint8_t *meth_str = (const uint8_t *)bstr_ptr(tx->request_method);
if ( (meth_str != NULL)
&& BinSearch(meth_str, bstr_size(tx->request_method),
&& SpmSearch((uint8_t*)meth_str, bstr_size(tx->request_method),
data->content, data->content_len) != NULL)
{
SCLogDebug("Matched raw HTTP method values.");

@ -8,7 +8,7 @@
#include "threads.h"
#include "pkt-var.h"
#include "detect-pktvar.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-debug.h"
#define PARSE_REGEX "(.*),(.*)"
@ -64,7 +64,7 @@ int DetectPktvarMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx, Packet *p,
PktVar *pv = PktVarGet(p, pd->name);
if (pv != NULL) {
uint8_t *ptr = BinSearch(pv->value, pv->value_len, pd->content, pd->content_len);
uint8_t *ptr = SpmSearch(pv->value, pv->value_len, pd->content, pd->content_len);
if (ptr != NULL)
ret = 1;
}

@ -13,7 +13,7 @@
#include "threads.h"
#include "threadvars.h"
#include "util-binsearch.h"
#include "util-spm.h"
#include "util-hash.h"
#include "util-hashlist.h"
#include "util-bloomfilter.h"
@ -500,7 +500,6 @@ int main(int argc, char **argv)
SigTableSetup(); /* load the rule keywords */
TmqhSetup();
BinSearchInit();
CIDRInit();
SigParsePrepare();
//PatternMatchPrepare(mpm_ctx, MPM_B2G);
@ -591,6 +590,7 @@ int main(int argc, char **argv)
SCRuleVarsRegisterTests();
AppLayerParserRegisterTests();
ThreadMacrosRegisterTests();
UtilSpmSearchRegistertests();
SCClassConfRegisterTests();
if (list_unittests) {
UtListTests(regex_arg);

@ -0,0 +1,17 @@
#ifndef __UTIL_CLOCK_H__
#define __UTIL_CLOCK_H__
#include <time.h>
/* Feel free to add more macros */
#define CLOCK_INIT clock_t clo1, clo2; clo1 = clo2 = 0;
#define CLOCK_START clo1 = clock()
#define CLOCK_END clo2 = clock()
#define CLOCK_PRINT_SEC printf("Seconds spent: %.4fs\n", ((clo2 - clo1)/(double)CLOCKS_PER_SEC))
#define GET_CLOCK_END_SECS ((clo - clo1)/(double)CLOCKS_PER_SEC)
#endif /*__UTIL_CLOCK_H__ */

@ -0,0 +1,255 @@
/**
* Copyright (c) 2009 Open Information Security Foundation
*
* \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
*
* Boyer Moore algorithm has a really good performance. It need to arrays
* of context for each pattern that hold applicable shifts on the text
* to seach in, bassed on characters not available in the pattern
* and combinations of characters that start a sufix on the pattern.
* If possible, we should store the context of patterns that we are going
* to search for multiple times, so we don't spend time on rebuilding them.
*/
#include "suricata-common.h"
#include "suricata.h"
#include "util-spm-bm.h"
#include <time.h>
#include <limits.h>
#include <string.h>
/**
* \brief Array setup function for bad characters that split the pattern
* Remember that the result array should be the length of ALPHABET_SIZE
*
* \param str pointer to the pattern string
* \param size length of the string
* \param result pointer to an empty array that will hold the badchars
*/
inline void PreBmBc(const uint8_t *x, int32_t m, int32_t *bmBc) {
int32_t i;
for (i = 0; i < 256; ++i) {
bmBc[i] = m;
}
for (i = 0; i < m - 1; ++i) {
bmBc[(unsigned char)x[i]] = m - i - 1;
}
}
/**
* \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
*
* \param x pointer to the pattern string
* \param m length of the string
* \param suff pointer to an empty array that will hold the prefixes (shifts)
*/
inline void BoyerMooreSuffixes(const uint8_t *x, int32_t m, int32_t *suff) {
int32_t f = 0, g, i;
suff[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i) {
if (i > g && suff[i + m - 1 - f] < i - g)
suff[i] = suff[i + m - 1 - f];
else {
if (i < g)
g = i;
f = i;
while (g >= 0 && x[g] == x[g + m - 1 - f])
--g;
suff[i] = f - g;
}
}
}
/**
* \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
*
* \param x pointer to the pattern string
* \param m length of the string
* \param bmGs pointer to an empty array that will hold the prefixes (shifts)
*/
inline void PreBmGs(const uint8_t *x, int32_t m, int32_t *bmGs) {
int32_t i, j;
int32_t *suff;
suff = malloc(sizeof(int32_t) * (m + 1));
BoyerMooreSuffixes(x, m, suff);
for (i = 0; i < m; ++i)
bmGs[i] = m;
j = 0;
for (i = m - 1; i >= -1; --i)
if (suff[i] == i + 1)
for (; j < m - 1 - i; ++j)
if (bmGs[j] == m)
bmGs[j] = m - 1 - i;
for (i = 0; i <= m - 2; ++i)
bmGs[m - 1 - suff[i]] = m - 1 - i;
free(suff);
}
/**
* \brief Array setup function for bad characters that split the pattern
* Remember that the result array should be the length of ALPHABET_SIZE
*
* \param str pointer to the pattern string
* \param size length of the string
* \param result pointer to an empty array that will hold the badchars
*/
inline void PreBmBcNocase(const uint8_t *x, int32_t m, int32_t *bmBc) {
int32_t i;
for (i = 0; i < 256; ++i) {
bmBc[i] = m;
}
for (i = 0; i < m - 1; ++i) {
bmBc[u8_tolower((unsigned char)x[i])] = m - 1 - i;
}
}
inline void BoyerMooreSuffixesNocase(const uint8_t *x, int32_t m, int32_t *suff) {
int32_t f = 0, g, i;
suff[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i) {
if (i > g && suff[i + m - 1 - f] < i - g) {
suff[i] = suff[i + m - 1 - f];
} else {
if (i < g) {
g = i;
}
f = i;
while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
--g;
}
suff[i] = f - g;
}
}
}
/**
* \brief Array setup function for building prefixes (shift for valid prefixes)
* for boyermoore context case less
*
* \param x pointer to the pattern string
* \param m length of the string
* \param bmGs pointer to an empty array that will hold the prefixes (shifts)
*/
inline void PreBmGsNocase(const uint8_t *x, int32_t m, int32_t *bmGs) {
int32_t i, j;
int32_t* suff;
suff = malloc(sizeof(int32_t) * (m + 1));
BoyerMooreSuffixesNocase(x, m, suff);
for (i = 0; i < m; ++i) {
bmGs[i] = m;
}
j = 0;
for (i = m - 1; i >= 0; --i) {
if (suff[i] == i + 1) {
for (; j < m - 1 - i; ++j) {
if (bmGs[j] == m) {
bmGs[j] = m - 1 - i;
}
}
}
}
for (i = 0; i <= m - 2; ++i) {
bmGs[m - 1 - suff[i]] = m - 1 - i;
}
free(suff);
}
/**
* \brief Boyer Moore search algorithm
* Is better as the pattern length increases and for big buffers to search in.
* The algorithm needs a context of two arrays already prepared
* by prep_bad_chars() and prep_good_suffix()
*
* \param y pointer to the buffer to search in
* \param n length limit of the buffer
* \param x pointer to the pattern we ar searching for
* \param m length limit of the needle
* \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
* \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
*
* \retval ptr to start of the match; NULL if no match
*/
inline uint8_t *BoyerMoore(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc) {
int i, j, m1, m2;
#if 0
printf("\nBad:\n");
for (i=0;i<ALPHABET_SIZE;i++)
printf("%c,%d ", i, bmBc[i]);
printf("\ngood:\n");
for (i=0;i<m;i++)
printf("%c, %d ", x[i],bmBc[i]);
printf("\n");
#endif
j = 0;
while (j <= n - m ) {
for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
if (i < 0) {
return y + j;
j += bmGs[0];
} else {
// printf("%c", y[i+j]);
j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
// printf("%d, %d\n", m1, m2);
}
}
return NULL;
}
/**
* \brief Boyer Moore search algorithm
* Is better as the pattern length increases and for big buffers to search in.
* The algorithm needs a context of two arrays already prepared
* by prep_bad_chars() and prep_good_suffix()
*
* \param y pointer to the buffer to search in
* \param n length limit of the buffer
* \param x pointer to the pattern we ar searching for
* \param m length limit of the needle
* \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
* \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
*
* \retval ptr to start of the match; NULL if no match
*/
inline uint8_t *BoyerMooreNocase(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc) {
int i, j, m1, m2;
#if 0
printf("\nBad:\n");
for (i=0;i<ALPHABET_SIZE;i++)
printf("%c,%d ", i, bmBc[i]);
printf("\ngood:\n");
for (i=0;i<m;i++)
printf("%c, %d ", x[i],bmBc[i]);
printf("\n");
#endif
j = 0;
while (j <= n - m ) {
for (i = m - 1; i >= 0 && u8_tolower(x[i]) == u8_tolower(y[i + j]); --i);
if (i < 0) {
return y + j;
} else {
j += (m1=bmGs[i]) > (m2=bmBc[u8_tolower(y[i + j])] - m + 1 + i)?m1:m2;
}
}
return NULL;
}

@ -0,0 +1,19 @@
#ifndef __UTIL_SPM_BM__
#define __UTIL_SPM_BM__
#include "suricata-common.h"
#include "suricata.h"
#define ALPHABET_SIZE 256
inline void PreBmBc(const uint8_t *x, int32_t m, int32_t *bmBc);
inline void BoyerMooreSuffixes(const uint8_t *x, int32_t m, int32_t *suff);
inline void PreBmGs(const uint8_t *x, int32_t m, int32_t *bmGs);
inline uint8_t *BoyerMoore(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc);
inline void PreBmBcNocase(const uint8_t *x, int32_t m, int32_t *bmBc);
inline void BoyerMooreSuffixesNocase(const uint8_t *x, int32_t m, int32_t *suff);
inline void PreBmGsNocase(const uint8_t *x, int32_t m, int32_t *bmGs);
inline uint8_t *BoyerMooreNocase(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc);
#endif /* __UTIL_SPM_BM__ */

@ -0,0 +1,108 @@
/**
* Copyright (c) 2009 Open Information Security Foundation
*
* \author Victor Julien <victor@inliniac.net>
* \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
*
* bs is a bruteforce search. It will try to search the pattern
* from all characters until the available text len is less
* than the length of the pattern. It needs no context but it
* time cost is not good.
*/
#include "suricata-common.h"
#include "suricata.h"
#include "util-spm-bs.h"
#include <time.h>
#include <limits.h>
#include <string.h>
/**
* \brief Basic search improved. Limits are better handled, so
* it doesn't start searches that wont fit in the remaining buffer
*
* \param haystack pointer to the buffer to search in
* \param haystack_len length limit of the buffer
* \param neddle pointer to the pattern we ar searching for
* \param needle_len length limit of the needle
*
* \retval ptr to start of the match; NULL if no match
*/
inline uint8_t *BasicSearch(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len) {
const uint8_t *h, *n;
const uint8_t *hmax = haystack + haystack_len;
const uint8_t *nmax = needle + needle_len;
if (needle_len == 0 || needle_len > haystack_len)
return NULL;
for (n = needle; nmax - n <= hmax - haystack; haystack++) {
if (*haystack != *n) {
continue;
}
/* one byte needles */
if (needle_len == 1)
return (uint8_t *)haystack;
for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
if (*h != *n) {
break;
}
/* if we run out of needle we fully matched */
if (n == nmax - 1) {
return (uint8_t *)haystack;
}
}
n = needle;
}
return NULL;
}
/**
* \brief Basic search case less
*
* \param haystack pointer to the buffer to search in
* \param haystack_len length limit of the buffer
* \param neddle pointer to the pattern we ar searching for
* \param needle_len length limit of the needle
*
* \retval ptr to start of the match; NULL if no match
*/
inline uint8_t *BasicSearchNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len) {
const uint8_t *h, *n;
const uint8_t *hmax = haystack + haystack_len;
const uint8_t *nmax = needle + needle_len;
if (needle_len == 0 || needle_len > haystack_len)
return NULL;
n = needle;
for (n = needle; nmax - n <= hmax - haystack; haystack++) {
if (u8_tolower(*haystack) != u8_tolower(*n)) {
continue;
}
/* one byte needles */
if (needle_len == 1) {
return (uint8_t *)haystack;
}
for (h = haystack+1, n++; nmax - n <= hmax - h ; h++, n++) {
if (u8_tolower(*h) != u8_tolower(*n)) {
break;
}
/* if we run out of needle we fully matched */
if (n == nmax - 1) {
return (uint8_t *)haystack;
}
}
n = needle;
}
return NULL;
}
inline void BasicSearchInit (void) {
/* nothing no more */
}

@ -0,0 +1,12 @@
#ifndef __UTIL_SPM_BS__
#define __UTIL_SPM_BS__
#include "suricata-common.h"
#include "suricata.h"
inline uint8_t *BasicSearch(const uint8_t *, uint32_t, const uint8_t *, uint32_t);
inline uint8_t *BasicSearchNocase(const uint8_t *, uint32_t, const uint8_t *, uint32_t);
inline void BasicSearchInit (void);
#endif /* __UTIL_SPM_BS__ */

@ -0,0 +1,159 @@
/**
* Copyright (c) 2009 Open Information Security Foundation
*
* \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
*
* Bs2Bm use a simple context array to determine the charactes
* that are not present on the pattern. This way on partial matches
* broken by a char not present, we can skip to the next character
* making less checks
*/
#include "util-spm-bs2bm.h"
#include "suricata-common.h"
#include "suricata.h"
#include <time.h>
#include <limits.h>
#include <string.h>
/**
* \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
*
* \param neddle pointer to the pattern we ar searching for
* \param needle_len length limit of the needle
* \param badchars pointer to an empty array of bachars. The array prepared contains
* characters that can't be inside the needle_len. So the skips can be
* faster
*/
inline void Bs2BmBadchars(const uint8_t *needle, uint32_t needle_len, uint8_t *badchars) {
uint32_t i;
for (i = 0; i < ALPHABET_SIZE; i++)
badchars[i] = 1;
/* set to 0 the values where index as ascii is present
* because they are not badchars
*/
for (i = 0; i < needle_len; i++)
badchars[needle[i]] = 0;
}
/**
* \brief Array setup function for Bs2BmNocase of bad characters index (not found at the needle)
*
* \param neddle pointer to the pattern we ar searching for
* \param needle_len length limit of the needle
* \param badchars pointer to an empty array of bachars. The array prepared contains
* characters that can't be inside the needle_len. So the skips can be
* faster
*/
inline void Bs2BmBadcharsNocase(const uint8_t *needle, uint32_t needle_len, uint8_t *badchars) {
uint32_t i;
for (i = 0; i < ALPHABET_SIZE; i++)
badchars[i] = 1;
/* set to 0 the values where index as ascii is present
* because they are not badchars
*/
for (i = 0; i < needle_len; i++) {
badchars[u8_tolower(needle[i])] = 0;
}
}
/**
* \brief Basic search with a bad characters array. The array badchars contains
* flags at character's ascii index that can't be inside the needle. So the skips can be
* faster
*
* \param haystack pointer to the buffer to search in
* \param haystack_len length limit of the buffer
* \param neddle pointer to the pattern we ar searching for
* \param needle_len length limit of the needle
* \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
*
* \retval ptr to start of the match; NULL if no match
*/
inline uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len, uint8_t badchars[])
{
const uint8_t *h, *n;
const uint8_t *hmax = haystack + haystack_len;
const uint8_t *nmax = needle + needle_len;
if (needle_len == 0 || needle_len > haystack_len)
return NULL;
for (n = needle; nmax - n <= hmax - haystack; haystack++) {
if (*haystack != *n) {
continue;
}
/* one byte needles */
if (needle_len == 1)
return (uint8_t *)haystack;
for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
if (*h != *n) {
if (badchars[*h] == 1) {
/* skip it! */
haystack = h;
}
break;
}
/* if we run out of needle we fully matched */
if (n == nmax - 1 ) {
return (uint8_t *)haystack;
}
}
n = needle;
}
return NULL;
}
/**
* \brief Basic search case less with a bad characters array. The array badchars contains
* flags at character's ascii index that can't be inside the needle. So the skips can be
* faster
*
* \param haystack pointer to the buffer to search in
* \param haystack_len length limit of the buffer
* \param neddle pointer to the pattern we ar searching for
* \param needle_len length limit of the needle
* \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
*
* \retval ptr to start of the match; NULL if no match
*/
inline uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len, uint8_t badchars[])
{
const uint8_t *h, *n;
const uint8_t *hmax = haystack + haystack_len;
const uint8_t *nmax = needle + needle_len;
if (needle_len == 0 || needle_len > haystack_len)
return NULL;
for (n = needle; nmax - n <= hmax - haystack; haystack++) {
if (u8_tolower(*haystack) != u8_tolower(*n)) {
continue;
}
/* one byte needles */
if (needle_len == 1)
return (uint8_t *)haystack;
for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
if (u8_tolower(*h) != u8_tolower(*n)) {
if (badchars[u8_tolower(*h)] == 1) {
/* skip it! */
haystack = h;
}
break;
}
/* if we run out of needle we fully matched */
if (n == nmax - 1) {
return (uint8_t *)haystack;
}
}
n = needle;
}
return NULL;
}

@ -0,0 +1,15 @@
#ifndef __UTIL_SPM_BS2BM__
#define __UTIL_SPM_BS2BM__
#include "suricata-common.h"
#include "suricata.h"
#define ALPHABET_SIZE 256
inline void Bs2BmBadchars(const uint8_t *, uint32_t, uint8_t *);
inline void Bs2BmBadcharsNocase(const uint8_t *, uint32_t, uint8_t *);
inline uint8_t * Bs2Bm(const uint8_t *, uint32_t, const uint8_t *, uint32_t, uint8_t []);
inline uint8_t *Bs2BmNocase(const uint8_t *, uint32_t, const uint8_t *, uint32_t, uint8_t []);
#endif /* __UTIL_SPM_BS2BM__ */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,40 @@
/** Copyright (c) 2009 Open Information Security Foundation */
#ifndef __UTIL_SPM_H__
#define __UTIL_SPM_H__
#include "util-spm-bs.h"
#include "util-spm-bs2bm.h"
#include "util-spm-bm.h"
/** Default algorithm to use: Boyer Moore */
inline uint8_t *Bs2bmSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
inline uint8_t *Bs2bmNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
inline uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
inline uint8_t *BoyerMooreNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
/* Macros for automatic algorithm selection (use them only when you can't store the context) */
#define SpmSearch(text, textlen, needle, needlelen) ({\
uint8_t *mfound; \
if (needlelen < 4 && textlen < 512) \
mfound = BasicSearch(text, textlen, needle, needlelen); \
else if (needlelen < 4) \
mfound = BasicSearch(text, textlen, needle, needlelen); \
else \
mfound = BoyerMooreSearch(text, textlen, needle, needlelen); \
mfound; \
})
#define SpmNocaseSearch(text, textlen, needle, needlelen) ({\
uint8_t *mfound; \
if (needlelen < 4 && textlen < 512) \
mfound = BasicNocaseSearch(text, textlen, needle, needlelen); \
else if (needlelen < 4) \
mfound = BasicNocaseSearch(text, textlen, needle, needlelen); \
else \
mfound = BoyerMooreNocaseSearch(text, textlen, needle, needlelen); \
mfound; \
})
void UtilSpmSearchRegistertests(void);
#endif /* __UTIL_SPM_H__ */
Loading…
Cancel
Save