updates to ac-gfbs search. Disable handling < 65k states separately. Now any state count would be given same treatment

Anoop Saldanha 14 years ago committed by Victor Julien
parent efb4c27b1f
commit b69ac9514f

@ -749,63 +749,63 @@ static inline void SCACGfbsCreateModGotoTable(MpmCtx *mpm_ctx)
SCACGfbsCtx *ctx = (SCACGfbsCtx *)mpm_ctx->ctx;
if (ctx->state_count < 65536) {
/* Let us use uint16_t for all. That way we don't have to worry about
* alignment. Technically 8 bits is all we need to store ascii codes,
* but by avoiding it, we save a lot of time on handling alignment */
int size = (ctx->state_count * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 3 +
256 * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 2);
ctx->goto_table_mod = SCMalloc(size);
if (ctx->goto_table_mod == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
memset(ctx->goto_table_mod, 0, size);
mpm_ctx->memory_size += size;
/* buffer to hold pointers in the buffer, so that a state can use it
* directly to access its state data */
ctx->goto_table_mod_pointers = SCMalloc(ctx->state_count * sizeof(uint8_t *));
if (ctx->goto_table_mod_pointers == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
memset(ctx->goto_table_mod_pointers, 0,
ctx->state_count * sizeof(uint8_t *));
SC_AC_GFBS_STATE_TYPE_U16 temp_states[256];
uint16_t *curr_loc = (uint16_t *)ctx->goto_table_mod;
uint16_t *no_of_entries = NULL;
uint16_t *ascii_codes = NULL;
int32_t state = 0;
uint16_t ascii_code = 0;
uint16_t k = 0;
for (state = 0; state < ctx->state_count; state++) {
/* store the starting location in the buffer for this state */
ctx->goto_table_mod_pointers[state] = (uint8_t *)curr_loc;
no_of_entries = curr_loc++;
ascii_codes = curr_loc;
k = 0;
/* store all states that have non fail transitions in the temp buffer */
for (ascii_code = 0; ascii_code < 256; ascii_code++) {
if (ctx->goto_table[state][ascii_code] == SC_AC_GFBS_FAIL)
ascii_codes[k] = ascii_code;
temp_states[k] = ctx->goto_table[state][ascii_code];
/* if we have any non fail transitions from our previous for search,
* store the acii codes as well the corresponding states */
if (k > 0) {
no_of_entries[0] = k;
curr_loc += k;
memcpy(curr_loc, temp_states, k * sizeof(SC_AC_GFBS_STATE_TYPE_U16));
curr_loc += k;
} else {
//if (ctx->state_count < 65536) {
// /* Let us use uint16_t for all. That way we don't have to worry about
// * alignment. Technically 8 bits is all we need to store ascii codes,
// * but by avoiding it, we save a lot of time on handling alignment */
// int size = (ctx->state_count * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 3 +
// 256 * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 2);
// ctx->goto_table_mod = SCMalloc(size);
// if (ctx->goto_table_mod == NULL) {
// SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
// exit(EXIT_FAILURE);
// }
// memset(ctx->goto_table_mod, 0, size);
// mpm_ctx->memory_cnt++;
// mpm_ctx->memory_size += size;
// /* buffer to hold pointers in the buffer, so that a state can use it
// * directly to access its state data */
// ctx->goto_table_mod_pointers = SCMalloc(ctx->state_count * sizeof(uint8_t *));
// if (ctx->goto_table_mod_pointers == NULL) {
// SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
// exit(EXIT_FAILURE);
// }
// memset(ctx->goto_table_mod_pointers, 0,
// ctx->state_count * sizeof(uint8_t *));
// SC_AC_GFBS_STATE_TYPE_U16 temp_states[256];
// uint16_t *curr_loc = (uint16_t *)ctx->goto_table_mod;
// uint16_t *no_of_entries = NULL;
// uint16_t *ascii_codes = NULL;
// int32_t state = 0;
// uint16_t ascii_code = 0;
// uint16_t k = 0;
// for (state = 0; state < ctx->state_count; state++) {
// /* store the starting location in the buffer for this state */
// ctx->goto_table_mod_pointers[state] = (uint8_t *)curr_loc;
// no_of_entries = curr_loc++;
// ascii_codes = curr_loc;
// k = 0;
// /* store all states that have non fail transitions in the temp buffer */
// for (ascii_code = 0; ascii_code < 256; ascii_code++) {
// if (ctx->goto_table[state][ascii_code] == SC_AC_GFBS_FAIL)
// continue;
// ascii_codes[k] = ascii_code;
// temp_states[k] = ctx->goto_table[state][ascii_code];
// k++;
// }
// /* if we have any non fail transitions from our previous for search,
// * store the acii codes as well the corresponding states */
// if (k > 0) {
// no_of_entries[0] = k;
// curr_loc += k;
// memcpy(curr_loc, temp_states, k * sizeof(SC_AC_GFBS_STATE_TYPE_U16));
// curr_loc += k;
// }
// }
//} else {
/* Let us use uint32_t for all. That way we don't have to worry about
* alignment. Technically 8 bits is all we need to store ascii codes,
* but by avoiding it, we save a lot of time on handling alignment */
@ -861,7 +861,7 @@ static inline void SCACGfbsCreateModGotoTable(MpmCtx *mpm_ctx)
curr_loc += k;
@ -1131,13 +1131,13 @@ void SCACGfbsDestroyCtx(MpmCtx *mpm_ctx)
ctx->goto_table_mod = NULL;
if (ctx->state_count < 65536) {
mpm_ctx->memory_size -= (ctx->state_count * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 3 +
256 * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 2);
} else {
//if (ctx->state_count < 65536) {
// mpm_ctx->memory_size -= (ctx->state_count * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 3 +
// 256 * sizeof(SC_AC_GFBS_STATE_TYPE_U16) * 2);
//} else {
mpm_ctx->memory_size -= (ctx->state_count * sizeof(SC_AC_GFBS_STATE_TYPE_U32) * 3 +
256 * sizeof(SC_AC_GFBS_STATE_TYPE_U32) * 2);
if (ctx->goto_table_mod_pointers != NULL) {
@ -1177,116 +1177,116 @@ uint32_t SCACGfbsSearch(MpmCtx *mpm_ctx, MpmThreadCtx *mpm_thread_ctx,
SCACGfbsPatternList *pid_pat_list = ctx->pid_pat_list;
/* really hate the extra cmp here, but can't help it */
if (ctx->state_count < 65536) {
/* \todo Change it for stateful MPM. Supply the state using mpm_thread_ctx */
int32_t temp_state;
uint16_t *no_of_entries;
uint16_t *ascii_codes;
uint8_t **goto_table_mod_pointers = ctx->goto_table_mod_pointers;
int32_t *failure_table = ctx->failure_table;
int i;
/* \todo tried loop unrolling with register var, with no perf increase. Need
* to dig deeper */
/* with so many var declarations the register declaration here is useless */
register int32_t state = 0;
for (i = 0; i < buflen; i++) {
/* get the goto state transition */
no_of_entries = (uint16_t *)goto_table_mod_pointers[state];
if (no_of_entries[0] == 0) {
temp_state = SC_AC_GFBS_FAIL;
} else {
ascii_codes = no_of_entries + 1;
buf_local = u8_tolower(buf[i]);
if (state == 0) {
temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[buf_local];
} else {
int low = 0;
int high = no_of_entries[0];
int mid;
temp_state = SC_AC_GFBS_FAIL;
while (low <= high) {
mid = (low + high) / 2;
if (ascii_codes[mid] == buf_local) {
temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[mid];
} else if (ascii_codes[mid] < buf_local) {
low = mid + 1;
} else {
high = mid - 1;
while (temp_state == SC_AC_GFBS_FAIL) {
state = failure_table[state];
/* get the goto state transition */
no_of_entries = (uint16_t *)goto_table_mod_pointers[state];
if (no_of_entries[0] == 0) {
temp_state = SC_AC_GFBS_FAIL;
} else {
ascii_codes = no_of_entries + 1;
buf_local = u8_tolower(buf[i]);
if (state == 0) {
temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[buf_local];
} else {
int low = 0;
int high = no_of_entries[0];
int mid;
temp_state = SC_AC_GFBS_FAIL;
while (low <= high) {
mid = (low + high) / 2;
if (ascii_codes[mid] == buf_local) {
temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[mid];
} else if (ascii_codes[mid] < buf_local) {
low = mid + 1;
} else {
high = mid - 1;
} /* else - if (no_of_entries[0] == 0) */
} /* while (temp_state == SC_AC_GFBS_FAIL) */
state = temp_state;
if (ctx->output_table[state].no_of_entries != 0) {
uint32_t no_of_pid_entries = ctx->output_table[state].no_of_entries;
uint32_t *pids = ctx->output_table[state].pids;
uint32_t k = 0;
for (k = 0; k < no_of_pid_entries; k++) {
if (pids[k] & 0xFFFF0000) {
if (SCMemcmp(pid_pat_list[pids[k] & 0x0000FFFF].cs,
buf + i - pid_pat_list[pids[k] & 0x0000FFFF].patlen + 1,
pid_pat_list[pids[k] & 0x0000FFFF].patlen) != 0) {
/* inside loop */
if (pid_pat_list[pids[k] & 0x0000FFFF].case_state != 3) {
if (pmq->pattern_id_bitarray[(pids[k] & 0x0000FFFF) / 8] & (1 << ((pids[k] & 0x0000FFFF) % 8))) {
} else {
pmq->pattern_id_bitarray[(pids[k] & 0x0000FFFF) / 8] |= (1 << ((pids[k] & 0x0000FFFF) % 8));
pmq->pattern_id_array[pmq->pattern_id_array_cnt++] = (pids[k] & 0x0000FFFF);
} else {
if (pmq->pattern_id_bitarray[pids[k] / 8] & (1 << (pids[k] % 8))) {
} else {
pmq->pattern_id_bitarray[pids[k] / 8] |= (1 << (pids[k] % 8));
pmq->pattern_id_array[pmq->pattern_id_array_cnt++] = pids[k];
} /* if (ctx->output_table[state].no_of_entries != 0) */
} /* for (i = 0; i < buflen; i++) */
} else {
//if (ctx->state_count < 65536) {
// /* \todo Change it for stateful MPM. Supply the state using mpm_thread_ctx */
// int32_t temp_state;
// uint16_t *no_of_entries;
// uint16_t *ascii_codes;
// uint8_t **goto_table_mod_pointers = ctx->goto_table_mod_pointers;
// int32_t *failure_table = ctx->failure_table;
// int i;
// /* \todo tried loop unrolling with register var, with no perf increase. Need
// * to dig deeper */
// /* with so many var declarations the register declaration here is useless */
// register int32_t state = 0;
// for (i = 0; i < buflen; i++) {
// /* get the goto state transition */
// no_of_entries = (uint16_t *)goto_table_mod_pointers[state];
// if (no_of_entries[0] == 0) {
// temp_state = SC_AC_GFBS_FAIL;
// } else {
// ascii_codes = no_of_entries + 1;
// buf_local = u8_tolower(buf[i]);
// if (state == 0) {
// temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[buf_local];
// } else {
// int low = 0;
// int high = no_of_entries[0];
// int mid;
// temp_state = SC_AC_GFBS_FAIL;
// while (low <= high) {
// mid = (low + high) / 2;
// if (ascii_codes[mid] == buf_local) {
// temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[mid];
// break;
// } else if (ascii_codes[mid] < buf_local) {
// low = mid + 1;
// } else {
// high = mid - 1;
// }
// }
// }
// }
// while (temp_state == SC_AC_GFBS_FAIL) {
// state = failure_table[state];
// /* get the goto state transition */
// no_of_entries = (uint16_t *)goto_table_mod_pointers[state];
// if (no_of_entries[0] == 0) {
// temp_state = SC_AC_GFBS_FAIL;
// } else {
// ascii_codes = no_of_entries + 1;
// buf_local = u8_tolower(buf[i]);
// if (state == 0) {
// temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[buf_local];
// } else {
// int low = 0;
// int high = no_of_entries[0];
// int mid;
// temp_state = SC_AC_GFBS_FAIL;
// while (low <= high) {
// mid = (low + high) / 2;
// if (ascii_codes[mid] == buf_local) {
// temp_state = ((SC_AC_GFBS_STATE_TYPE_U16 *)(ascii_codes + no_of_entries[0]))[mid];
// break;
// } else if (ascii_codes[mid] < buf_local) {
// low = mid + 1;
// } else {
// high = mid - 1;
// }
// }
// }
// } /* else - if (no_of_entries[0] == 0) */
// } /* while (temp_state == SC_AC_GFBS_FAIL) */
// state = temp_state;
// if (ctx->output_table[state].no_of_entries != 0) {
// uint32_t no_of_pid_entries = ctx->output_table[state].no_of_entries;
// uint32_t *pids = ctx->output_table[state].pids;
// uint32_t k = 0;
// for (k = 0; k < no_of_pid_entries; k++) {
// if (pids[k] & 0xFFFF0000) {
// if (SCMemcmp(pid_pat_list[pids[k] & 0x0000FFFF].cs,
// buf + i - pid_pat_list[pids[k] & 0x0000FFFF].patlen + 1,
// pid_pat_list[pids[k] & 0x0000FFFF].patlen) != 0) {
// /* inside loop */
// if (pid_pat_list[pids[k] & 0x0000FFFF].case_state != 3) {
// continue;
// }
// }
// if (pmq->pattern_id_bitarray[(pids[k] & 0x0000FFFF) / 8] & (1 << ((pids[k] & 0x0000FFFF) % 8))) {
// ;
// } else {
// pmq->pattern_id_bitarray[(pids[k] & 0x0000FFFF) / 8] |= (1 << ((pids[k] & 0x0000FFFF) % 8));
// pmq->pattern_id_array[pmq->pattern_id_array_cnt++] = (pids[k] & 0x0000FFFF);
// }
// matches++;
// } else {
// if (pmq->pattern_id_bitarray[pids[k] / 8] & (1 << (pids[k] % 8))) {
// ;
// } else {
// pmq->pattern_id_bitarray[pids[k] / 8] |= (1 << (pids[k] % 8));
// pmq->pattern_id_array[pmq->pattern_id_array_cnt++] = pids[k];
// }
// matches++;
// }
// loop:
// ;
// }
// } /* if (ctx->output_table[state].no_of_entries != 0) */
// } /* for (i = 0; i < buflen; i++) */
//} else {
/* \todo Change it for stateful MPM. Supply the state using mpm_thread_ctx */
int32_t temp_state = 0;
uint32_t no_of_entries;
@ -1412,7 +1412,7 @@ uint32_t SCACGfbsSearch(MpmCtx *mpm_ctx, MpmThreadCtx *mpm_thread_ctx,
} /* if (ctx->output_table[state].no_of_entries != 0) */
} /* for (i = 0; i < buflen; i++) */
return matches;
