Adding single pattern matcher algorithms. If you cannot store a context for the patterns, use SpmSearch() macro. Adding unittests and stats

16 years ago · 705471e4ee
parent cae8e06cb9
commit 705471e4ee
25 changed files with 2946 additions and 25 deletions
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -101,7 +101,10 @@ detect-dce-opnum.c detect-dce-opnum.h \
 detect-dce-stub-data.c detect-dce-stub-data.h \
 util-print.c util-print.h \
 util-mpm.c util-mpm.h \
-util-binsearch.c util-binsearch.h \
+util-spm.c util-spm.h util-clock.h \
+util-spm-bs.c util-spm-bs.h \
+util-spm-bs2bm.c util-spm-bs2bm.h \
+util-spm-bm.c util-spm-bm.h \
 util-mpm-wumanber.c util-mpm-wumanber.h \
 util-mpm-b2g.c util-mpm-b2g.h \
 util-mpm-b3g.c util-mpm-b3g.h \
--- a/src/app-layer-dcerpc.c
+++ b/src/app-layer-dcerpc.c
@ -21,7 +21,7 @@
 #include "app-layer-protos.h"
 #include "app-layer-parser.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"

 #include "app-layer-dcerpc.h"
--- a/src/app-layer-ftp.c
+++ b/src/app-layer-ftp.c
@ -23,7 +23,7 @@
 #include "app-layer-parser.h"
 #include "app-layer-ftp.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"
 #include "util-debug.h"

--- a/src/app-layer-htp.c
+++ b/src/app-layer-htp.c
@ -24,7 +24,7 @@
 #include "app-layer-parser.h"
 #include "app-layer-htp.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"
 #include "util-debug.h"
 #include "app-layer-htp.h"
--- a/src/app-layer-http.c
+++ b/src/app-layer-http.c
@ -15,7 +15,7 @@
 #include "app-layer-protos.h"
 #include "app-layer-parser.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"
 #include "util-debug.h"

--- a/src/app-layer-parser.c
+++ b/src/app-layer-parser.c
@ -16,7 +16,7 @@
 #include "app-layer-protos.h"
 #include "app-layer-parser.h"

-#include "util-binsearch.h"
+#include "util-spm.h"

 #include "util-debug.h"

@ -302,7 +302,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
                pstate->store_len, delim_len);

    if (pstate->store_len == 0) {
-        uint8_t *ptr = BinSearch(input, input_len, delim, delim_len);
+        uint8_t *ptr = SpmSearch(input, input_len, (uint8_t*)delim, delim_len);
        if (ptr != NULL) {
            uint32_t len = ptr - input;
            SCLogDebug(" len %" PRIu32 "", len);
@ -333,7 +333,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
            pstate->store_len = input_len;
        }
    } else {
-        uint8_t *ptr = BinSearch(input, input_len, delim, delim_len);
+        uint8_t *ptr = SpmSearch(input, input_len, (uint8_t*)delim, delim_len);
        if (ptr != NULL) {
            uint32_t len = ptr - input;
            SCLogDebug("len %" PRIu32 " + %" PRIu32 " = %" PRIu32 "", len,
@ -378,7 +378,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
                    SCLogDebug("input_len < delim_len, checking pstate->store");

                    if (pstate->store_len >= delim_len) {
-                        ptr = BinSearch(pstate->store, pstate->store_len, delim,
+                        ptr = SpmSearch(pstate->store, pstate->store_len, (uint8_t*)delim,
                                        delim_len);
                        if (ptr != NULL) {
                            SCLogDebug("now we found the delim");
@ -427,7 +427,7 @@ int AlpParseFieldByDelimiter(AppLayerParserResult *output, AppLayerParserState *
            if (delim_len > input_len && delim_len <= pstate->store_len) {
                SCLogDebug("input_len < delim_len, checking pstate->store");

-                ptr = BinSearch(pstate->store, pstate->store_len, delim, delim_len);
+                ptr = SpmSearch(pstate->store, pstate->store_len, (uint8_t*)delim, delim_len);
                if (ptr != NULL) {
                    SCLogDebug("now we found the delim");

--- a/src/app-layer-smb.c
+++ b/src/app-layer-smb.c
@ -21,7 +21,7 @@
 #include "app-layer-protos.h"
 #include "app-layer-parser.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"

 #include "app-layer-smb.h"
--- a/src/app-layer-smb2.c
+++ b/src/app-layer-smb2.c
@ -20,7 +20,7 @@
 #include "app-layer-protos.h"
 #include "app-layer-parser.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"
 #include "util-debug.h"

--- a/src/app-layer-tls.c
+++ b/src/app-layer-tls.c
@ -26,7 +26,7 @@

 #include "app-layer-tls.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-unittest.h"
 #include "util-debug.h"
 #include "flow-private.h"
--- a/src/detect-flowbits.c
+++ b/src/detect-flowbits.c
@ -12,7 +12,7 @@
 #include "flow.h"
 #include "flow-bit.h"
 #include "detect-flowbits.h"
-#include "util-binsearch.h"
+#include "util-spm.h"

 #include "detect-parse.h"
 #include "detect-engine.h"
--- a/src/detect-flowint.c
+++ b/src/detect-flowint.c
@ -12,7 +12,7 @@
 #include "flow.h"
 #include "flow-var.h"
 #include "detect-flowint.h"
-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-var-name.h"
 #include "util-debug.h"
 #include "util-unittest.h"
--- a/src/detect-flowvar.c
+++ b/src/detect-flowvar.c
@ -10,7 +10,7 @@
 #include "flow.h"
 #include "flow-var.h"
 #include "detect-flowvar.h"
-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-var-name.h"
 #include "util-debug.h"

@ -68,7 +68,7 @@ int DetectFlowvarMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx, Packet *p

    FlowVar *fv = FlowVarGet(p->flow, fd->idx);
    if (fv != NULL) {
-        uint8_t *ptr = BinSearch(fv->data.fv_str.value,
+        uint8_t *ptr = SpmSearch(fv->data.fv_str.value,
                                 fv->data.fv_str.value_len,
                                 fd->content, fd->content_len);
        if (ptr != NULL)
--- a/src/detect-http-cookie.c
+++ b/src/detect-http-cookie.c
@ -20,7 +20,7 @@

 #include "util-debug.h"
 #include "util-unittest.h"
-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-print.h"

 #include "app-layer.h"
@ -114,7 +114,7 @@ int DetectHttpCookieMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx,

    SCLogDebug("we have a cookie header");

-    if (BinSearch((const uint8_t *)bstr_ptr(h->value), bstr_size(h->value), co->data,
+    if (SpmSearch((uint8_t *)bstr_ptr(h->value), bstr_size(h->value), co->data,
            co->data_len) != NULL)
    {
        SCLogDebug("match has been found in received request and given http_"
--- a/src/detect-http-method.c
+++ b/src/detect-http-method.c
@ -21,7 +21,7 @@

 #include "util-debug.h"
 #include "util-unittest.h"
-#include "util-binsearch.h"
+#include "util-spm.h"

 #include "app-layer.h"

@ -99,7 +99,7 @@ int DetectHttpMethodMatch(ThreadVars *t, DetectEngineThreadCtx *det_ctx,
        const uint8_t *meth_str = (const uint8_t *)bstr_ptr(tx->request_method);

        if (   (meth_str != NULL)
-            && BinSearch(meth_str, bstr_size(tx->request_method),
+            && SpmSearch((uint8_t*)meth_str, bstr_size(tx->request_method),
                         data->content, data->content_len) != NULL)
        {
            SCLogDebug("Matched raw HTTP method values.");
--- a/src/detect-pktvar.c
+++ b/src/detect-pktvar.c
@ -8,7 +8,7 @@
 #include "threads.h"
 #include "pkt-var.h"
 #include "detect-pktvar.h"
-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-debug.h"

 #define PARSE_REGEX         "(.*),(.*)"
@ -64,7 +64,7 @@ int DetectPktvarMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx, Packet *p,

    PktVar *pv = PktVarGet(p, pd->name);
    if (pv != NULL) {
-        uint8_t *ptr = BinSearch(pv->value, pv->value_len, pd->content, pd->content_len);
+        uint8_t *ptr = SpmSearch(pv->value, pv->value_len, pd->content, pd->content_len);
        if (ptr != NULL)
            ret = 1;
    }
--- a/src/suricata.c
+++ b/src/suricata.c
@ -13,7 +13,7 @@
 #include "threads.h"
 #include "threadvars.h"

-#include "util-binsearch.h"
+#include "util-spm.h"
 #include "util-hash.h"
 #include "util-hashlist.h"
 #include "util-bloomfilter.h"
@ -500,7 +500,6 @@ int main(int argc, char **argv)
    SigTableSetup(); /* load the rule keywords */
    TmqhSetup();

-    BinSearchInit();
    CIDRInit();
    SigParsePrepare();
    //PatternMatchPrepare(mpm_ctx, MPM_B2G);
@ -591,6 +590,7 @@ int main(int argc, char **argv)
        SCRuleVarsRegisterTests();
        AppLayerParserRegisterTests();
        ThreadMacrosRegisterTests();
+        UtilSpmSearchRegistertests();
        SCClassConfRegisterTests();
        if (list_unittests) {
            UtListTests(regex_arg);
--- a/src/util-clock.h
+++ b/src/util-clock.h
@ -0,0 +1,17 @@
+#ifndef __UTIL_CLOCK_H__
+#define __UTIL_CLOCK_H__
+
+#include <time.h>
+
+/* Feel free to add more macros */
+
+#define CLOCK_INIT          clock_t clo1, clo2; clo1 = clo2 = 0;
+#define CLOCK_START         clo1 = clock()
+
+#define CLOCK_END           clo2 = clock()
+
+#define CLOCK_PRINT_SEC     printf("Seconds spent: %.4fs\n", ((clo2 - clo1)/(double)CLOCKS_PER_SEC))
+
+#define GET_CLOCK_END_SECS  ((clo - clo1)/(double)CLOCKS_PER_SEC)
+
+#endif /*__UTIL_CLOCK_H__ */
--- a/src/util-spm-bm.c
+++ b/src/util-spm-bm.c
@ -0,0 +1,255 @@
+/**
+ * Copyright (c) 2009 Open Information Security Foundation
+ *
+ * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
+ *
+ * Boyer Moore algorithm has a really good performance. It need to arrays
+ * of context for each pattern that hold applicable shifts on the text
+ * to seach in, bassed on characters not available in the pattern
+ * and combinations of characters that start a sufix on the pattern.
+ * If possible, we should store the context of patterns that we are going
+ * to search for multiple times, so we don't spend time on rebuilding them.
+ */
+
+#include "suricata-common.h"
+#include "suricata.h"
+#include "util-spm-bm.h"
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+
+/**
+ * \brief Array setup function for bad characters that split the pattern
+ *        Remember that the result array should be the length of ALPHABET_SIZE
+ *
+ * \param str pointer to the pattern string
+ * \param size length of the string
+ * \param result pointer to an empty array that will hold the badchars
+ */
+inline void PreBmBc(const uint8_t *x, int32_t m, int32_t *bmBc) {
+    int32_t i;
+
+    for (i = 0; i < 256; ++i) {
+        bmBc[i] = m;
+    }
+    for (i = 0; i < m - 1; ++i) {
+        bmBc[(unsigned char)x[i]] = m - i - 1;
+    }
+}
+
+/**
+ * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
+ *
+ * \param x pointer to the pattern string
+ * \param m length of the string
+ * \param suff pointer to an empty array that will hold the prefixes (shifts)
+ */
+inline void BoyerMooreSuffixes(const uint8_t *x, int32_t m, int32_t *suff) {
+    int32_t f = 0, g, i;
+    suff[m - 1] = m;
+    g = m - 1;
+    for (i = m - 2; i >= 0; --i) {
+        if (i > g && suff[i + m - 1 - f] < i - g)
+            suff[i] = suff[i + m - 1 - f];
+        else {
+            if (i < g)
+                g = i;
+            f = i;
+            while (g >= 0 && x[g] == x[g + m - 1 - f])
+                --g;
+            suff[i] = f - g;
+        }
+    }
+}
+
+/**
+ * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
+ *
+ * \param x pointer to the pattern string
+ * \param m length of the string
+ * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
+ */
+inline void PreBmGs(const uint8_t *x, int32_t m, int32_t *bmGs) {
+    int32_t i, j;
+    int32_t *suff;
+
+    suff = malloc(sizeof(int32_t) * (m + 1));
+
+    BoyerMooreSuffixes(x, m, suff);
+
+    for (i = 0; i < m; ++i)
+        bmGs[i] = m;
+
+    j = 0;
+
+    for (i = m - 1; i >= -1; --i)
+        if (suff[i] == i + 1)
+            for (; j < m - 1 - i; ++j)
+                if (bmGs[j] == m)
+                    bmGs[j] = m - 1 - i;
+
+    for (i = 0; i <= m - 2; ++i)
+        bmGs[m - 1 - suff[i]] = m - 1 - i;
+    free(suff);
+}
+
+/**
+ * \brief Array setup function for bad characters that split the pattern
+ *        Remember that the result array should be the length of ALPHABET_SIZE
+ *
+ * \param str pointer to the pattern string
+ * \param size length of the string
+ * \param result pointer to an empty array that will hold the badchars
+ */
+inline void PreBmBcNocase(const uint8_t *x, int32_t m, int32_t *bmBc) {
+    int32_t i;
+
+    for (i = 0; i < 256; ++i) {
+        bmBc[i] = m;
+    }
+    for (i = 0; i < m - 1; ++i) {
+        bmBc[u8_tolower((unsigned char)x[i])] = m - 1 - i;
+    }
+}
+
+inline void BoyerMooreSuffixesNocase(const uint8_t *x, int32_t m, int32_t *suff) {
+    int32_t f = 0, g, i;
+
+    suff[m - 1] = m;
+    g = m - 1;
+    for (i = m - 2; i >= 0; --i) {
+        if (i > g && suff[i + m - 1 - f] < i - g) {
+            suff[i] = suff[i + m - 1 - f];
+        } else {
+            if (i < g) {
+                g = i;
+            }
+            f = i;
+            while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
+                --g;
+            }
+            suff[i] = f - g;
+        }
+    }
+}
+
+/**
+ * \brief Array setup function for building prefixes (shift for valid prefixes)
+ *        for boyermoore context case less
+ *
+ * \param x pointer to the pattern string
+ * \param m length of the string
+ * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
+ */
+inline void PreBmGsNocase(const uint8_t *x, int32_t m, int32_t *bmGs) {
+    int32_t i, j;
+    int32_t* suff;
+
+    suff = malloc(sizeof(int32_t) * (m + 1));
+
+    BoyerMooreSuffixesNocase(x, m, suff);
+
+    for (i = 0; i < m; ++i) {
+        bmGs[i] = m;
+    }
+    j = 0;
+    for (i = m - 1; i >= 0; --i) {
+        if (suff[i] == i + 1) {
+            for (; j < m - 1 - i; ++j) {
+                if (bmGs[j] == m) {
+                    bmGs[j] = m - 1 - i;
+                }
+            }
+        }
+    }
+    for (i = 0; i <= m - 2; ++i) {
+        bmGs[m - 1 - suff[i]] = m - 1 - i;
+    }
+
+    free(suff);
+}
+
+/**
+ * \brief Boyer Moore search algorithm
+ *        Is better as the pattern length increases and for big buffers to search in.
+ *        The algorithm needs a context of two arrays already prepared
+ *        by prep_bad_chars() and prep_good_suffix()
+ *
+ * \param y pointer to the buffer to search in
+ * \param n length limit of the buffer
+ * \param x pointer to the pattern we ar searching for
+ * \param m length limit of the needle
+ * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
+ * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
+ *
+ * \retval ptr to start of the match; NULL if no match
+ */
+inline uint8_t *BoyerMoore(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc) {
+   int i, j, m1, m2;
+#if 0
+    printf("\nBad:\n");
+    for (i=0;i<ALPHABET_SIZE;i++)
+        printf("%c,%d ", i, bmBc[i]);
+
+    printf("\ngood:\n");
+    for (i=0;i<m;i++)
+        printf("%c, %d ", x[i],bmBc[i]);
+    printf("\n");
+#endif
+   j = 0;
+   while (j <= n - m ) {
+      for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
+
+      if (i < 0) {
+         return y + j;
+         j += bmGs[0];
+      } else {
+ //        printf("%c", y[i+j]);
+         j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
+//            printf("%d, %d\n", m1, m2);
+      }
+   }
+   return NULL;
+}
+
+
+/**
+ * \brief Boyer Moore search algorithm
+ *        Is better as the pattern length increases and for big buffers to search in.
+ *        The algorithm needs a context of two arrays already prepared
+ *        by prep_bad_chars() and prep_good_suffix()
+ *
+ * \param y pointer to the buffer to search in
+ * \param n length limit of the buffer
+ * \param x pointer to the pattern we ar searching for
+ * \param m length limit of the needle
+ * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
+ * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
+ *
+ * \retval ptr to start of the match; NULL if no match
+ */
+inline uint8_t *BoyerMooreNocase(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc) {
+    int i, j, m1, m2;
+#if 0
+    printf("\nBad:\n");
+    for (i=0;i<ALPHABET_SIZE;i++)
+        printf("%c,%d ", i, bmBc[i]);
+
+    printf("\ngood:\n");
+    for (i=0;i<m;i++)
+        printf("%c, %d ", x[i],bmBc[i]);
+    printf("\n");
+#endif
+    j = 0;
+    while (j <= n - m ) {
+        for (i = m - 1; i >= 0 && u8_tolower(x[i]) == u8_tolower(y[i + j]); --i);
+
+        if (i < 0) {
+            return y + j;
+        } else {
+            j += (m1=bmGs[i]) > (m2=bmBc[u8_tolower(y[i + j])] - m + 1 + i)?m1:m2;
+        }
+   }
+   return NULL;
+}
+
--- a/src/util-spm-bm.h
+++ b/src/util-spm-bm.h
@ -0,0 +1,19 @@
+#ifndef __UTIL_SPM_BM__
+#define __UTIL_SPM_BM__
+
+#include "suricata-common.h"
+#include "suricata.h"
+
+#define ALPHABET_SIZE 256
+
+inline void PreBmBc(const uint8_t *x, int32_t m, int32_t *bmBc);
+inline void BoyerMooreSuffixes(const uint8_t *x, int32_t m, int32_t *suff);
+inline void PreBmGs(const uint8_t *x, int32_t m, int32_t *bmGs);
+inline uint8_t *BoyerMoore(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc);
+inline void PreBmBcNocase(const uint8_t *x, int32_t m, int32_t *bmBc);
+inline void BoyerMooreSuffixesNocase(const uint8_t *x, int32_t m, int32_t *suff);
+inline void PreBmGsNocase(const uint8_t *x, int32_t m, int32_t *bmGs);
+inline uint8_t *BoyerMooreNocase(uint8_t *x, int32_t m, uint8_t *y, int32_t n, int32_t *bmGs, int32_t *bmBc);
+
+#endif /* __UTIL_SPM_BM__ */
+
--- a/src/util-spm-bs.c
+++ b/src/util-spm-bs.c
@ -0,0 +1,108 @@
+/**
+ * Copyright (c) 2009 Open Information Security Foundation
+ *
+ * \author Victor Julien <victor@inliniac.net>
+ * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
+ *
+ * bs is a bruteforce search. It will try to search the pattern
+ * from all characters until the available text len is less
+ * than the length of the pattern. It needs no context but it
+ * time cost is not good.
+ */
+
+#include "suricata-common.h"
+#include "suricata.h"
+#include "util-spm-bs.h"
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+
+/**
+ * \brief Basic search improved. Limits are better handled, so
+ * it doesn't start searches that wont fit in the remaining buffer
+ *
+ * \param haystack pointer to the buffer to search in
+ * \param haystack_len length limit of the buffer
+ * \param neddle pointer to the pattern we ar searching for
+ * \param needle_len length limit of the needle
+ *
+ * \retval ptr to start of the match; NULL if no match
+ */
+inline uint8_t *BasicSearch(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len) {
+    const uint8_t *h, *n;
+    const uint8_t *hmax = haystack + haystack_len;
+    const uint8_t *nmax = needle + needle_len;
+
+    if (needle_len == 0 || needle_len > haystack_len)
+        return NULL;
+
+    for (n = needle; nmax - n <= hmax - haystack; haystack++) {
+        if (*haystack != *n) {
+            continue;
+        }
+        /* one byte needles */
+        if (needle_len == 1)
+            return (uint8_t *)haystack;
+
+        for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
+            if (*h != *n) {
+                break;
+            }
+            /* if we run out of needle we fully matched */
+            if (n == nmax - 1) {
+                return (uint8_t *)haystack;
+            }
+        }
+        n = needle;
+    }
+
+    return NULL;
+}
+
+/**
+ * \brief Basic search case less
+ *
+ * \param haystack pointer to the buffer to search in
+ * \param haystack_len length limit of the buffer
+ * \param neddle pointer to the pattern we ar searching for
+ * \param needle_len length limit of the needle
+ *
+ * \retval ptr to start of the match; NULL if no match
+ */
+inline uint8_t *BasicSearchNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len) {
+    const uint8_t *h, *n;
+    const uint8_t *hmax = haystack + haystack_len;
+    const uint8_t *nmax = needle + needle_len;
+
+    if (needle_len == 0 || needle_len > haystack_len)
+        return NULL;
+
+    n = needle;
+    for (n = needle; nmax - n <= hmax - haystack; haystack++) {
+        if (u8_tolower(*haystack) != u8_tolower(*n)) {
+            continue;
+        }
+        /* one byte needles */
+        if (needle_len == 1) {
+            return (uint8_t *)haystack;
+        }
+
+        for (h = haystack+1, n++; nmax - n <= hmax - h ; h++, n++) {
+            if (u8_tolower(*h) != u8_tolower(*n)) {
+                break;
+            }
+            /* if we run out of needle we fully matched */
+            if (n == nmax - 1) {
+                return (uint8_t *)haystack;
+            }
+        }
+        n = needle;
+    }
+
+    return NULL;
+}
+
+inline void BasicSearchInit (void) {
+    /* nothing no more */
+}
+
--- a/src/util-spm-bs.h
+++ b/src/util-spm-bs.h
@ -0,0 +1,12 @@
+#ifndef __UTIL_SPM_BS__
+#define __UTIL_SPM_BS__
+
+#include "suricata-common.h"
+#include "suricata.h"
+
+inline uint8_t *BasicSearch(const uint8_t *, uint32_t, const uint8_t *, uint32_t);
+inline uint8_t *BasicSearchNocase(const uint8_t *, uint32_t, const uint8_t *, uint32_t);
+inline void BasicSearchInit (void);
+
+#endif /* __UTIL_SPM_BS__ */
+
--- a/src/util-spm-bs2bm.c
+++ b/src/util-spm-bs2bm.c
@ -0,0 +1,159 @@
+/**
+ * Copyright (c) 2009 Open Information Security Foundation
+ *
+ * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
+ *
+ * Bs2Bm use a simple context array to determine the charactes
+ * that are not present on the pattern. This way on partial matches
+ * broken by a char not present, we can skip to the next character
+ * making less checks
+ */
+
+#include "util-spm-bs2bm.h"
+#include "suricata-common.h"
+#include "suricata.h"
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+
+/**
+ * \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
+ *
+ * \param neddle pointer to the pattern we ar searching for
+ * \param needle_len length limit of the needle
+ * \param badchars pointer to an empty array of bachars. The array prepared contains
+ *                 characters that can't be inside the needle_len. So the skips can be
+ *                 faster
+ */
+inline void Bs2BmBadchars(const uint8_t *needle, uint32_t needle_len, uint8_t *badchars) {
+    uint32_t i;
+    for (i = 0; i < ALPHABET_SIZE; i++)
+        badchars[i] = 1;
+
+    /* set to 0 the values where index as ascii is present
+     * because they are not badchars
+     */
+    for (i = 0; i < needle_len; i++)
+        badchars[needle[i]] = 0;
+}
+
+/**
+ * \brief Array setup function for Bs2BmNocase of bad characters index (not found at the needle)
+ *
+ * \param neddle pointer to the pattern we ar searching for
+ * \param needle_len length limit of the needle
+ * \param badchars pointer to an empty array of bachars. The array prepared contains
+ *                 characters that can't be inside the needle_len. So the skips can be
+ *                 faster
+ */
+inline void Bs2BmBadcharsNocase(const uint8_t *needle, uint32_t needle_len, uint8_t *badchars) {
+    uint32_t i;
+    for (i = 0; i < ALPHABET_SIZE; i++)
+        badchars[i] = 1;
+
+    /* set to 0 the values where index as ascii is present
+     * because they are not badchars
+     */
+    for (i = 0; i < needle_len; i++) {
+        badchars[u8_tolower(needle[i])] = 0;
+    }
+}
+
+
+/**
+ * \brief Basic search with a bad characters array. The array badchars contains
+ *        flags at character's ascii index that can't be inside the needle. So the skips can be
+ *        faster
+ *
+ * \param haystack pointer to the buffer to search in
+ * \param haystack_len length limit of the buffer
+ * \param neddle pointer to the pattern we ar searching for
+ * \param needle_len length limit of the needle
+ * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
+ *
+ * \retval ptr to start of the match; NULL if no match
+ */
+inline uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len, uint8_t badchars[])
+{
+    const uint8_t *h, *n;
+    const uint8_t *hmax = haystack + haystack_len;
+    const uint8_t *nmax = needle + needle_len;
+
+    if (needle_len == 0 || needle_len > haystack_len)
+        return NULL;
+
+    for (n = needle; nmax - n <= hmax - haystack; haystack++) {
+        if (*haystack != *n) {
+            continue;
+        }
+        /* one byte needles */
+        if (needle_len == 1)
+            return (uint8_t *)haystack;
+
+        for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
+            if (*h != *n) {
+                if (badchars[*h] == 1) {
+                    /* skip it! */
+                    haystack = h;
+                }
+                break;
+            }
+            /* if we run out of needle we fully matched */
+            if (n == nmax - 1 ) {
+                return (uint8_t *)haystack;
+            }
+        }
+        n = needle;
+    }
+
+    return NULL;
+}
+
+/**
+ * \brief Basic search case less with a bad characters array. The array badchars contains
+ *        flags at character's ascii index that can't be inside the needle. So the skips can be
+ *        faster
+ *
+ * \param haystack pointer to the buffer to search in
+ * \param haystack_len length limit of the buffer
+ * \param neddle pointer to the pattern we ar searching for
+ * \param needle_len length limit of the needle
+ * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
+ *
+ * \retval ptr to start of the match; NULL if no match
+ */
+inline uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint32_t needle_len, uint8_t badchars[])
+{
+    const uint8_t *h, *n;
+    const uint8_t *hmax = haystack + haystack_len;
+    const uint8_t *nmax = needle + needle_len;
+
+    if (needle_len == 0 || needle_len > haystack_len)
+        return NULL;
+
+    for (n = needle; nmax - n <= hmax - haystack; haystack++) {
+        if (u8_tolower(*haystack) != u8_tolower(*n)) {
+            continue;
+        }
+        /* one byte needles */
+        if (needle_len == 1)
+            return (uint8_t *)haystack;
+
+        for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
+            if (u8_tolower(*h) != u8_tolower(*n)) {
+                if (badchars[u8_tolower(*h)] == 1) {
+                    /* skip it! */
+                    haystack = h;
+                }
+                break;
+            }
+            /* if we run out of needle we fully matched */
+            if (n == nmax - 1) {
+                return (uint8_t *)haystack;
+            }
+        }
+        n = needle;
+    }
+
+    return NULL;
+}
--- a/src/util-spm-bs2bm.h
+++ b/src/util-spm-bs2bm.h
@ -0,0 +1,15 @@
+#ifndef __UTIL_SPM_BS2BM__
+#define __UTIL_SPM_BS2BM__
+
+#include "suricata-common.h"
+#include "suricata.h"
+
+#define ALPHABET_SIZE 256
+
+inline void Bs2BmBadchars(const uint8_t *, uint32_t, uint8_t *);
+inline void Bs2BmBadcharsNocase(const uint8_t *, uint32_t, uint8_t *);
+inline uint8_t * Bs2Bm(const uint8_t *, uint32_t, const uint8_t *, uint32_t, uint8_t []);
+inline uint8_t *Bs2BmNocase(const uint8_t *, uint32_t, const uint8_t *, uint32_t, uint8_t []);
+
+#endif /* __UTIL_SPM_BS2BM__ */
+
--- a/src/util-spm.c
+++ b/src/util-spm.c
--- a/src/util-spm.h
+++ b/src/util-spm.h
@ -0,0 +1,40 @@
+/** Copyright (c) 2009 Open Information Security Foundation */
+
+#ifndef __UTIL_SPM_H__
+#define __UTIL_SPM_H__
+
+#include "util-spm-bs.h"
+#include "util-spm-bs2bm.h"
+#include "util-spm-bm.h"
+
+/** Default algorithm to use: Boyer Moore */
+inline uint8_t *Bs2bmSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
+inline uint8_t *Bs2bmNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
+inline uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
+inline uint8_t *BoyerMooreNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint32_t needlelen);
+
+/* Macros for automatic algorithm selection (use them only when you can't store the context) */
+#define SpmSearch(text, textlen, needle, needlelen) ({\
+    uint8_t *mfound; \
+    if (needlelen < 4 && textlen < 512) \
+          mfound = BasicSearch(text, textlen, needle, needlelen); \
+    else if (needlelen < 4) \
+          mfound = BasicSearch(text, textlen, needle, needlelen); \
+    else \
+          mfound = BoyerMooreSearch(text, textlen, needle, needlelen); \
+    mfound; \
+    })
+
+#define SpmNocaseSearch(text, textlen, needle, needlelen) ({\
+    uint8_t *mfound; \
+    if (needlelen < 4 && textlen < 512) \
+          mfound = BasicNocaseSearch(text, textlen, needle, needlelen); \
+    else if (needlelen < 4) \
+          mfound = BasicNocaseSearch(text, textlen, needle, needlelen); \
+    else \
+          mfound = BoyerMooreNocaseSearch(text, textlen, needle, needlelen); \
+    mfound; \
+    })
+
+void UtilSpmSearchRegistertests(void);
+#endif /* __UTIL_SPM_H__ */