diff --git a/src/cuda-packet-batcher.c b/src/cuda-packet-batcher.c index 0e8c7fa8b4..4bc2fddb2b 100644 --- a/src/cuda-packet-batcher.c +++ b/src/cuda-packet-batcher.c @@ -129,15 +129,9 @@ static void SCCudaPBSetBatcherAlarmTimeHandler() */ static SigGroupHead *SCCudaPBGetSgh(DetectEngineCtx *de_ctx, Packet *p) { - int ds, f; + int f; SigGroupHead *sgh = NULL; - /* select the dsize_gh */ - if (p->payload_len <= 100) - ds = 0; - else - ds = 1; - /* select the flow_gh */ if (p->flowflags & FLOW_PKT_TOCLIENT) f = 0; @@ -145,7 +139,7 @@ static SigGroupHead *SCCudaPBGetSgh(DetectEngineCtx *de_ctx, Packet *p) f = 1; /* find the right mpm instance */ - DetectAddress *ag = DetectAddressLookupInHead(de_ctx->dsize_gh[ds].flow_gh[f].src_gh[p->proto], &p->src); + DetectAddress *ag = DetectAddressLookupInHead(de_ctx->flow_gh[f].src_gh[p->proto], &p->src); if (ag != NULL) { /* source group found, lets try a dst group */ ag = DetectAddressLookupInHead(ag->dst_gh,&p->dst); @@ -222,7 +216,7 @@ static void SCCudaPBQueueBuffer(SCCudaPBThreadCtx *tctx) SCMutexLock(&dq_inq->mutex_q); if (dq_inq->len == 0) { /* if we have no data in queue, wait... */ - SCondWait(&dq_inq->cond_q, &dq_inq->mutex_q); + SCCondWait(&dq_inq->cond_q, &dq_inq->mutex_q); } if (run_batcher == 0) { @@ -279,7 +273,8 @@ void *SCCudaPBTmThreadsSlot1(void *td) pthread_exit((void *) -1); } } - memset(&s->s.slot_pq, 0, sizeof(PacketQueue)); + memset(&s->s.slot_pre_pq, 0, sizeof(PacketQueue)); + memset(&s->s.slot_post_pq, 0, sizeof(PacketQueue)); TmThreadsSetFlag(tv, THV_INIT_DONE); while(run) { @@ -299,12 +294,11 @@ void *SCCudaPBTmThreadsSlot1(void *td) * the Batcher TM(which is waiting on a cond from the previous * feeder TM). Please handler the NULL packet case in the * function that you now call */ - r = s->s.SlotFunc(tv, p, s->s.slot_data, &s->s.slot_pq); + r = s->s.SlotFunc(tv, p, s->s.slot_data, NULL, NULL); } else { - r = s->s.SlotFunc(tv, p, s->s.slot_data, &s->s.slot_pq); + r = s->s.SlotFunc(tv, p, s->s.slot_data, NULL, NULL); /* handle error */ if (r == TM_ECODE_FAILED) { - TmqhReleasePacketsToPacketPool(&s->s.slot_pq); TmqhOutputPacketpool(tv, p); TmThreadsSetFlag(tv, THV_FAILED); break; @@ -498,7 +492,7 @@ TmEcode SCCudaPBThreadInit(ThreadVars *tv, void *initdata, void **data) * \retval TM_ECODE_OK On success. * \retval TM_ECODE_FAILED On failure. */ -TmEcode SCCudaPBBatchPackets(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq) +TmEcode SCCudaPBBatchPackets(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *post_pq) { #define ALIGN_UP(offset, alignment) \ (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1) @@ -971,7 +965,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[0]; p.payload_len = strlen(strings[0]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -979,7 +973,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[1]; p.payload_len = strlen(strings[1]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -987,7 +981,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[2]; p.payload_len = strlen(strings[2]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -995,7 +989,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[3]; p.payload_len = strlen(strings[3]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -1003,7 +997,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[4]; p.payload_len = strlen(strings[4]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -1011,7 +1005,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[5]; p.payload_len = strlen(strings[5]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -1019,7 +1013,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[6]; p.payload_len = strlen(strings[6]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -1027,7 +1021,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[7]; p.payload_len = strlen(strings[7]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -1035,7 +1029,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[8]; p.payload_len = strlen(strings[8]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; @@ -1043,7 +1037,7 @@ int SCCudaPBTest01(void) p.payload = (uint8_t *)strings[9]; p.payload_len = strlen(strings[9]); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 1); dq = &data_queues[tmq_inq->id]; @@ -1198,7 +1192,7 @@ int SCCudaPBTest02(void) string = "test_one"; p.payload = (uint8_t *)string; p.payload_len = strlen(string); - SCCudaPBBatchPackets(NULL, &p, tctx, NULL); + SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL); dq = &data_queues[tmq_outq->id]; result &= (dq->len == 0); dq = &data_queues[tmq_inq->id]; diff --git a/src/cuda-packet-batcher.h b/src/cuda-packet-batcher.h index 60586fb6a3..eb0579f0fe 100644 --- a/src/cuda-packet-batcher.h +++ b/src/cuda-packet-batcher.h @@ -124,7 +124,7 @@ void SCCudaPBCleanUpQueuesAndBuffers(void); void SCCudaPBSetUpQueuesAndBuffers(void); void SCCudaPBKillBatchingPackets(void); -TmEcode SCCudaPBBatchPackets(ThreadVars *, Packet *, void *, PacketQueue *); +TmEcode SCCudaPBBatchPackets(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *); TmEcode SCCudaPBThreadInit(ThreadVars *, void *, void **); TmEcode SCCudaPBThreadDeInit(ThreadVars *, void *); void SCCudaPBThreadExitStats(ThreadVars *, void *); diff --git a/src/decode.h b/src/decode.h index 40cd8b3005..7c783e5096 100644 --- a/src/decode.h +++ b/src/decode.h @@ -247,10 +247,6 @@ struct PacketQueue_; */ typedef struct Packet_ { - /* double linked list ptrs */ - struct Packet_ *next; - struct Packet_ *prev; - /* Addresses, Ports and protocol * these are on top so we can use * the Packet as a hash key */ @@ -360,6 +356,10 @@ typedef struct Packet_ /* decoder events */ PacketDecoderEvents events; + /* double linked list ptrs */ + struct Packet_ *next; + struct Packet_ *prev; + /* tunnel/encapsulation handling */ struct Packet_ *root; /* in case of tunnel this is a ptr * to the 'real' packet, the one we diff --git a/src/runmodes.c b/src/runmodes.c index 4effce00db..e08bcc4916 100644 --- a/src/runmodes.c +++ b/src/runmodes.c @@ -2312,7 +2312,7 @@ int RunModeFilePcapAuto(DetectEngineCtx *de_ctx, char *file) { #if defined(__SC_CUDA_SUPPORT__) if (PatternMatchDefaultMatcher() == MPM_B2G_CUDA) { ThreadVars *tv_decode1 = TmThreadCreatePacketHandler("Decode", - "pickup-queue", "simple", + "pickup-queue", "ringbuffer_srsw", "decode-queue1", "simple", "1slot"); if (tv_decode1 == NULL) { @@ -2363,7 +2363,7 @@ int RunModeFilePcapAuto(DetectEngineCtx *de_ctx, char *file) { ThreadVars *tv_stream1 = TmThreadCreatePacketHandler("Stream1", "cuda-pb-queue1", "simple", - "stream-queue1", "simple", + "stream-queue1", "ringbuffer_mrsw", "1slot"); if (tv_stream1 == NULL) { printf("ERROR: TmThreadsCreate failed for Stream1\n"); @@ -2386,8 +2386,8 @@ int RunModeFilePcapAuto(DetectEngineCtx *de_ctx, char *file) { } } else { ThreadVars *tv_decode1 = TmThreadCreatePacketHandler("Decode & Stream", - "pickup-queue", "simple", - "stream-queue1", "simple", + "pickup-queue", "ringbuffer_srsw", + "stream-queue1", "ringbuffer_mrsw", "varslot"); if (tv_decode1 == NULL) { printf("ERROR: TmThreadsCreate failed for Decode1\n"); diff --git a/src/tm-threads.h b/src/tm-threads.h index 20f0b5b879..91a8b0346b 100644 --- a/src/tm-threads.h +++ b/src/tm-threads.h @@ -34,10 +34,9 @@ enum { TVT_MAX, }; - typedef struct TmSlot_ { /* function pointers */ - TmEcode (*SlotFunc)(ThreadVars *, Packet *, void *, PacketQueue *); + TmEcode (*SlotFunc)(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *); TmEcode (*SlotThreadInit)(ThreadVars *, void *, void **); void (*SlotThreadExitPrintStats)(ThreadVars *, void *); @@ -46,10 +45,22 @@ typedef struct TmSlot_ { /* data storage */ void *slot_initdata; void *slot_data; - PacketQueue slot_pq; + + /**< queue filled by the SlotFunc with packets that will + * be processed futher _before_ the current packet. + * The locks in the queue are NOT used */ + PacketQueue slot_pre_pq; + + /**< queue filled by the SlotFunc with packets that will + * be processed futher _after_ the current packet. The + * locks in the queue are NOT used */ + PacketQueue slot_post_pq; /* linked list, only used by TmVarSlot */ struct TmSlot_ *slot_next; + + int id; /**< slot id, only used my TmVarSlot to know what the first + * slot is. */ } TmSlot; /* 1 function slot */ @@ -61,7 +72,6 @@ typedef struct Tm1Slot_ { typedef struct TmVarSlot_ { TmSlot *s; } TmVarSlot; - extern ThreadVars *tv_root[TVT_MAX]; extern SCMutex tv_root_lock; diff --git a/src/util-mpm-b2g-cuda.c b/src/util-mpm-b2g-cuda.c index 24b36651c1..87b973f070 100644 --- a/src/util-mpm-b2g-cuda.c +++ b/src/util-mpm-b2g-cuda.c @@ -79,7 +79,7 @@ int B2gCudaAddPatternCI(MpmCtx *, uint8_t *, uint16_t, uint16_t, uint16_t, int B2gCudaAddPatternCS(MpmCtx *, uint8_t *, uint16_t, uint16_t, uint16_t, uint32_t, uint32_t, uint8_t); int B2gCudaPreparePatterns(MpmCtx *mpm_ctx); -inline uint32_t B2gCudaSearchWrap(MpmCtx *, MpmThreadCtx *, +uint32_t B2gCudaSearchWrap(MpmCtx *, MpmThreadCtx *, PatternMatcherQueue *, uint8_t *, uint16_t); uint32_t B2gCudaSearch1(MpmCtx *, MpmThreadCtx *, PatternMatcherQueue *, @@ -441,8 +441,8 @@ void MpmB2gCudaRegister(void) void B2gCudaPrintInfo(MpmCtx *mpm_ctx) { +#ifdef DEBUG B2gCudaCtx *ctx = (B2gCudaCtx *)mpm_ctx->ctx; - SCLogDebug("MPM B2g Cuda Information:"); SCLogDebug("Memory allocs: %" PRIu32, mpm_ctx->memory_cnt); SCLogDebug("Memory alloced: %" PRIu32, mpm_ctx->memory_size); @@ -456,6 +456,7 @@ void B2gCudaPrintInfo(MpmCtx *mpm_ctx) SCLogDebug("Smallest: %" PRIu32, mpm_ctx->minlen); SCLogDebug("Largest: %" PRIu32, mpm_ctx->maxlen); SCLogDebug("Hash size: %" PRIu32, ctx->hash_size); +#endif return; } @@ -1940,7 +1941,7 @@ TmEcode B2gCudaMpmDispThreadDeInit(ThreadVars *tv, void *data) * \retval TM_ECODE_OK Always. */ TmEcode B2gCudaMpmDispatcher(ThreadVars *tv, Packet *incoming_buffer, - void *data, PacketQueue *pq) + void *data, PacketQueue *pq, PacketQueue *post_pq) { SCCudaPBPacketsBuffer *pb = (SCCudaPBPacketsBuffer *)incoming_buffer; B2gCudaMpmThreadCtxData *tctx = data; @@ -2044,7 +2045,7 @@ int B2gCudaResultsPostProcessing(Packet *p, MpmCtx *mpm_ctx, SCMutexUnlock(&p->cuda_mutex); break; } else { - SCondWait(&p->cuda_cond, &p->cuda_mutex); + SCCondWait(&p->cuda_cond, &p->cuda_mutex); SCMutexUnlock(&p->cuda_mutex); } } @@ -2154,7 +2155,8 @@ void *CudaMpmB2gThreadsSlot1(void *td) pthread_exit((void *) -1); } } - memset(&s->s.slot_pq, 0, sizeof(PacketQueue)); + memset(&s->s.slot_pre_pq, 0, sizeof(PacketQueue)); + memset(&s->s.slot_post_pq, 0, sizeof(PacketQueue)); TmThreadsSetFlag(tv, THV_INIT_DONE); while(run) { @@ -2166,7 +2168,7 @@ void *CudaMpmB2gThreadsSlot1(void *td) if (data == NULL) { //printf("%s: TmThreadsSlot1: p == NULL\n", tv->name); } else { - r = s->s.SlotFunc(tv, (Packet *)data, s->s.slot_data, &s->s.slot_pq); + r = s->s.SlotFunc(tv, (Packet *)data, s->s.slot_data, NULL, NULL); /* handle error */ /* output the packet */ @@ -2339,7 +2341,7 @@ static int B2gCudaTest01(void) pb->packets_address_buffer[0] = &p; p.payload_len = strlen(string); - B2gCudaMpmDispatcher(NULL, (Packet *)pb, tctx, NULL); + B2gCudaMpmDispatcher(NULL, (Packet *)pb, tctx, NULL, NULL); result &= (p.mpm_offsets[0] == 4); result &= (p.mpm_offsets[1] == 1); @@ -2533,7 +2535,7 @@ static int B2gCudaTest02(void) for (i = 0; i < no_of_pkts; i++) { p[i]->payload = (uint8_t *)strings[i]; p[i]->payload_len = strlen(strings[i]); - SCCudaPBBatchPackets(NULL, p[i], pb_tctx, NULL); + SCCudaPBBatchPackets(NULL, p[i], pb_tctx, NULL, NULL); } dq = &data_queues[tmq_outq->id]; @@ -2556,7 +2558,7 @@ static int B2gCudaTest02(void) if (b2g_tctx->b2g_cuda_search_kernel == 0) goto end; - B2gCudaMpmDispatcher(NULL, (Packet *)pb, b2g_tctx, NULL); + B2gCudaMpmDispatcher(NULL, (Packet *)pb, b2g_tctx, NULL, NULL); for (i = 0; i < no_of_pkts; i++) { for (j = 0; j < p[i]->mpm_offsets[0]; j++) @@ -2832,7 +2834,7 @@ static int B2gCudaTest03(void) for (i = 0; i < no_of_pkts; i++) { p[i]->payload = (uint8_t *)strings[i]; p[i]->payload_len = strlen(strings[i]); - SCCudaPBBatchPackets(NULL, p[i], pb_tctx, NULL); + SCCudaPBBatchPackets(NULL, p[i], pb_tctx, NULL, NULL); } dq = &data_queues[tmq_outq->id]; @@ -2855,7 +2857,7 @@ static int B2gCudaTest03(void) if (b2g_tctx->b2g_cuda_search_kernel == 0) goto end; - B2gCudaMpmDispatcher(NULL, (Packet *)pb, b2g_tctx, NULL); + B2gCudaMpmDispatcher(NULL, (Packet *)pb, b2g_tctx, NULL, NULL); for (i = 0; i < 10; i++) SigMatchSignatures(&de_tv, de_ctx, det_ctx, p[i]);