mirror of https://github.com/OISF/suricata
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1283 lines
46 KiB
C
1283 lines
46 KiB
C
/**
|
|
* Copyright (c) 2010 Open Information Security Foundation.
|
|
*
|
|
* \author Anoop Saldanha <poonaatsoc@gmail.com>
|
|
*
|
|
* \todo
|
|
* - Make cuda paramters user configurable.
|
|
* - Implement a gpu version of aho-corasick. That should get rid of a
|
|
* lot of post processing and pattern_chopping, and we don't have to
|
|
* deal with one or two byte patterns.
|
|
* - Currently a lot of packets(~17k) are getting stuck on the detection
|
|
* thread, which is a major bottleneck. Introduce bypass detection
|
|
* threads for these 15k non buffered packets and check how the alerts
|
|
* are affected by this(out of sequence handling by detection threads).
|
|
* - Use texture/shared memory. This should be handled along with AC.
|
|
* - Test the use of host-alloced page locked memory.
|
|
* - Test other optimizations like using the sgh held in the flow(if
|
|
* present in the flow), instead of retrieving the sgh inside the batcher
|
|
* thread.
|
|
*/
|
|
|
|
/* compile in, only if we have a CUDA enabled on this machine */
|
|
#ifdef __SC_CUDA_SUPPORT__
|
|
|
|
#include "suricata-common.h"
|
|
#include "suricata.h"
|
|
|
|
#include "detect.h"
|
|
#include "decode.h"
|
|
#include "flow.h"
|
|
#include "data-queue.h"
|
|
|
|
#include "threads.h"
|
|
#include "threadvars.h"
|
|
#include "tm-queuehandlers.h"
|
|
#include "tm-modules.h"
|
|
|
|
#include "cuda-packet-batcher.h"
|
|
#include "conf.h"
|
|
|
|
#include "util-error.h"
|
|
#include "util-debug.h"
|
|
#include "util-unittest.h"
|
|
|
|
#include "util-mpm-b2g-cuda.h"
|
|
#include "detect-engine-address.h"
|
|
#include "detect-engine-port.h"
|
|
#include "detect-engine.h"
|
|
#include "detect-parse.h"
|
|
#include "tm-threads.h"
|
|
#include "tmqh-packetpool.h"
|
|
|
|
/* \todo Make this user configurable through our yaml file. Also provide options
|
|
* where this can be dynamically updated based on the traffic */
|
|
#define SC_CUDA_PB_BATCHER_ALARM_TIME 1
|
|
|
|
/* holds the inq and outq between the cuda-packet-batcher TM and the cuda b2g mpm
|
|
* dispatcher thread */
|
|
static Tmq *tmq_inq = NULL;
|
|
static Tmq *tmq_outq = NULL;
|
|
|
|
/* holds the packet inq between the batcher TM and, the TM feeding it packets
|
|
* in the runmode sequence. We will need this to implement the alarm. We will
|
|
* have a SIG_ALRM delivered every SC_CUDA_PB_BATCHER_ALARM_TIME seconds, after
|
|
* which we willf set a flag informing the batcher TM to queue the buffer to the
|
|
* GPU and wake the batcher thread, in case it is waiting on a conditional for a
|
|
* packet from the previous TM in the runmode */
|
|
static Tmq *tmq_batcher_inq = NULL;
|
|
|
|
/* used to indicate if we want to stop buffering the packets anymore. We
|
|
* we will need this while we want to shut the engine down
|
|
* \todo give a better description */
|
|
static int run_batcher = 1;
|
|
|
|
/* indicates the maximum no of packets we are ready to buffer. Theoretically the
|
|
* maximum value held by this var can't exceed the value held by
|
|
* "max_pending_packets". Either ways we should make this user configurable like
|
|
* SC_CUDA_PB_BATCHER_ALARM_TIME. Also allow dynamic updates to this value based
|
|
* on the traffic
|
|
* \todo make this user configurable, as well allow dynamic update of this
|
|
* variable based on the traffic seen */
|
|
static uint32_t buffer_packet_threshhold = 2400;
|
|
|
|
/* flag used by the SIG_ALRM handler to indicate that the batcher TM should queue
|
|
* the buffer to be processed by the Cuda Mpm B2g Batcher Thread for further
|
|
* processing on the GPU */
|
|
static int queue_buffer = 0;
|
|
|
|
static int unittest_mode = 0;
|
|
|
|
/**
|
|
* \internal
|
|
* \brief The SIG_ALRM handler. We will set the "queue_buffer" flag thus
|
|
* informing the batcher TM that it needs to queue the buffer. We
|
|
* also signal the cond var for the batcher TM inq(the one it
|
|
* receives packets from), incase it is waiting on the conditional
|
|
* for a new packet from the previous TM in the runmodes list.
|
|
*
|
|
* \param signum The signal number that this function just woke up to. In
|
|
* our case it is SIG_ALRM.
|
|
*/
|
|
static void SCCudaPBSetQueueBufferFlag(int signum)
|
|
{
|
|
SCLogDebug("Cuda Packet Batche alarm generated after %d seconds. Set the"
|
|
"queue_buffer flag and signal the cuda TM inq.",
|
|
SC_CUDA_PB_BATCHER_ALARM_TIME);
|
|
queue_buffer = 1;
|
|
SCCondSignal(&((&trans_q[tmq_batcher_inq->id])->cond_q));
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \internal.
|
|
* \brief Set the SIG_ALRM handler
|
|
*/
|
|
static void SCCudaPBSetBatcherAlarmTimeHandler()
|
|
{
|
|
struct sigaction action;
|
|
|
|
SCLogDebug("Setting the SIGALRM handler for the Cuda Batcher TM");
|
|
action.sa_handler = SCCudaPBSetQueueBufferFlag;
|
|
sigemptyset(&(action.sa_mask));
|
|
sigaddset(&(action.sa_mask), SIGALRM);
|
|
action.sa_flags = 0;
|
|
sigaction(SIGALRM, &action, 0);
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \internal
|
|
* \brief Used to retrieve the Signature Group Head for a packet.
|
|
*
|
|
* \param de_ctx Pointer the detection engine context to search for the
|
|
* sgh for an incoming packet.
|
|
* \param p Pointer to the incoming packet for which we will have to
|
|
* search for a sgh.
|
|
*
|
|
* \retval sgh Pointer to the relevant matching sgh for the Packet.
|
|
*/
|
|
static SigGroupHead *SCCudaPBGetSgh(DetectEngineCtx *de_ctx, Packet *p)
|
|
{
|
|
int f;
|
|
SigGroupHead *sgh = NULL;
|
|
|
|
/* select the flow_gh */
|
|
if (p->flowflags & FLOW_PKT_TOCLIENT)
|
|
f = 0;
|
|
else
|
|
f = 1;
|
|
|
|
/* find the right mpm instance */
|
|
DetectAddress *ag = DetectAddressLookupInHead(de_ctx->flow_gh[f].src_gh[p->proto], &p->src);
|
|
if (ag != NULL) {
|
|
/* source group found, lets try a dst group */
|
|
ag = DetectAddressLookupInHead(ag->dst_gh,&p->dst);
|
|
if (ag != NULL) {
|
|
if (ag->port == NULL) {
|
|
SCLogDebug("we don't have ports");
|
|
sgh = ag->sh;
|
|
} else {
|
|
SCLogDebug("we have ports");
|
|
|
|
DetectPort *sport = DetectPortLookupGroup(ag->port,p->sp);
|
|
if (sport != NULL) {
|
|
DetectPort *dport = DetectPortLookupGroup(sport->dst_ph, p->dp);
|
|
if (dport != NULL) {
|
|
sgh = dport->sh;
|
|
} else {
|
|
SCLogDebug("no dst port group found for the packet with dp %"PRIu16, p->dp);
|
|
}
|
|
} else {
|
|
SCLogDebug("no src port group found for the packet with sp %"PRIu16, p->sp);
|
|
}
|
|
}
|
|
} else {
|
|
SCLogDebug("no dst address group found for the packet");
|
|
}
|
|
} else {
|
|
SCLogDebug("no src address group found for the packet");
|
|
}
|
|
|
|
return sgh;
|
|
}
|
|
|
|
/**
|
|
* \internal
|
|
* \brief Handles the queuing of the buffer from this batcher TM to the cuda
|
|
* mpm b2g dispatcher TM.
|
|
*
|
|
* \tctx The batcher thread context that holds the current operational buffer
|
|
* which has to be buffered by this function.
|
|
*/
|
|
static void SCCudaPBQueueBuffer(SCCudaPBThreadCtx *tctx)
|
|
{
|
|
SCCudaPBPacketsBuffer *pb = (SCCudaPBPacketsBuffer *)tctx->curr_pb;
|
|
uint32_t nop_in_buffer = pb->nop_in_buffer;
|
|
uint32_t *packets_offset_buffer = pb->packets_offset_buffer;
|
|
uint32_t offset = *(packets_offset_buffer + nop_in_buffer - 1);
|
|
SCCudaPBPacketDataForGPU *last_packet = (SCCudaPBPacketDataForGPU *)(pb->packets_buffer +
|
|
offset);
|
|
|
|
/* if we have no packets buffered in so far, get out */
|
|
if (pb->nop_in_buffer == 0) {
|
|
SCLogDebug("No packets buffered in so far in the cuda buffer. Returning");
|
|
return;
|
|
}
|
|
|
|
/* calculate the total length of all the packets buffered in */
|
|
pb->packets_buffer_len = pb->packets_offset_buffer[pb->nop_in_buffer - 1] +
|
|
sizeof(SCCudaPBPacketDataForGPUNonPayload) +
|
|
last_packet->payload_len;
|
|
|
|
pb->packets_total_payload_len = pb->packets_payload_offset_buffer[pb->nop_in_buffer - 1] +
|
|
last_packet->payload_len;
|
|
|
|
/* enqueue the buffer in the outq to be consumed by the dispatcher TM */
|
|
SCDQDataQueue *dq_outq = &data_queues[tmq_outq->id];
|
|
SCMutexLock(&dq_outq->mutex_q);
|
|
SCDQDataEnqueue(dq_outq, (SCDQGenericQData *)tctx->curr_pb);
|
|
SCCondSignal(&dq_outq->cond_q);
|
|
SCMutexUnlock(&dq_outq->mutex_q);
|
|
|
|
while (run_batcher) {
|
|
/* dequeue a new buffer */
|
|
SCDQDataQueue *dq_inq = &data_queues[tmq_inq->id];
|
|
SCMutexLock(&dq_inq->mutex_q);
|
|
if (dq_inq->len == 0) {
|
|
/* if we have no data in queue, wait... */
|
|
SCCondWait(&dq_inq->cond_q, &dq_inq->mutex_q);
|
|
}
|
|
|
|
if (run_batcher == 0) {
|
|
break;
|
|
}
|
|
|
|
if (dq_inq->len > 0) {
|
|
tctx->curr_pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq_inq);
|
|
tctx->curr_pb->nop_in_buffer = 0;
|
|
tctx->curr_pb->packets_buffer_len = 0;
|
|
tctx->curr_pb->packets_total_payload_len = 0;
|
|
SCMutexUnlock(&dq_inq->mutex_q);
|
|
SCLogDebug("Dequeued a new packet buffer for the cuda batcher TM");
|
|
break;
|
|
} else {
|
|
/* Should only happen on signals. */
|
|
SCMutexUnlock(&dq_inq->mutex_q);
|
|
SCLogDebug("Unable to Relooping in the quest to dequeue new buffer\n");
|
|
}
|
|
} /* while (run_batcher) */
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief Custom slot function used by the Batcher TM.
|
|
*
|
|
* \param td Pointer to the ThreadVars instance. In this case the batcher TM's
|
|
* ThreadVars instance.
|
|
*/
|
|
void *SCCudaPBTmThreadsSlot1(void *td)
|
|
{
|
|
ThreadVars *tv = (ThreadVars *)td;
|
|
Tm1Slot *s = (Tm1Slot *)tv->tm_slots;
|
|
Packet *p = NULL;
|
|
char run = 1;
|
|
TmEcode r = TM_ECODE_OK;
|
|
|
|
/* Set the thread name */
|
|
SCSetThreadName(tv->name);
|
|
|
|
if (tv->thread_setup_flags != 0) {
|
|
TmThreadSetupOptions(tv);
|
|
}
|
|
|
|
SCLogDebug("%s starting", tv->name);
|
|
|
|
if (s->s.SlotThreadInit != NULL) {
|
|
r = s->s.SlotThreadInit(tv, s->s.slot_initdata, &s->s.slot_data);
|
|
if (r != TM_ECODE_OK) {
|
|
EngineKill();
|
|
|
|
TmThreadsSetFlag(tv, THV_CLOSED);
|
|
pthread_exit((void *) -1);
|
|
}
|
|
}
|
|
memset(&s->s.slot_pre_pq, 0, sizeof(PacketQueue));
|
|
memset(&s->s.slot_post_pq, 0, sizeof(PacketQueue));
|
|
|
|
TmThreadsSetFlag(tv, THV_INIT_DONE);
|
|
while(run) {
|
|
TmThreadTestThreadUnPaused(tv);
|
|
|
|
/* input a packet */
|
|
p = tv->tmqh_in(tv);
|
|
|
|
if (p == NULL) {
|
|
printf("packet is NULL for TM: %s\n", tv->name);
|
|
/* the only different between the actual Slot1 function in
|
|
* tm-threads.c and this custom Slot1 function is this call
|
|
* here. We need to make the call here, even if we don't
|
|
* receive a packet from the previous stage in the runmodes.
|
|
* This is needed in cases where we the SIG_ALRM handler
|
|
* wants us to queue the buffer to the GPU and ends up waking
|
|
* the Batcher TM(which is waiting on a cond from the previous
|
|
* feeder TM). Please handler the NULL packet case in the
|
|
* function that you now call */
|
|
r = s->s.SlotFunc(tv, p, s->s.slot_data, NULL, NULL);
|
|
} else {
|
|
r = s->s.SlotFunc(tv, p, s->s.slot_data, NULL, NULL);
|
|
/* handle error */
|
|
if (r == TM_ECODE_FAILED) {
|
|
TmqhOutputPacketpool(tv, p);
|
|
TmThreadsSetFlag(tv, THV_FAILED);
|
|
break;
|
|
}
|
|
|
|
/* output the packet */
|
|
tv->tmqh_out(tv, p);
|
|
}
|
|
|
|
if (TmThreadsCheckFlag(tv, THV_KILL)) {
|
|
SCPerfUpdateCounterArray(tv->sc_perf_pca, &tv->sc_perf_pctx, 0);
|
|
run = 0;
|
|
}
|
|
}
|
|
|
|
if (s->s.SlotThreadExitPrintStats != NULL) {
|
|
s->s.SlotThreadExitPrintStats(tv, s->s.slot_data);
|
|
}
|
|
|
|
if (s->s.SlotThreadDeinit != NULL) {
|
|
r = s->s.SlotThreadDeinit(tv, s->s.slot_data);
|
|
if (r != TM_ECODE_OK) {
|
|
TmThreadsSetFlag(tv, THV_CLOSED);
|
|
pthread_exit((void *) -1);
|
|
}
|
|
}
|
|
|
|
SCLogDebug("%s ending", tv->name);
|
|
TmThreadsSetFlag(tv, THV_CLOSED);
|
|
pthread_exit((void *) 0);
|
|
}
|
|
|
|
/**
|
|
* \brief Used to de-allocate an instance of SCCudaPBPacketsBuffer.
|
|
*
|
|
* \param pb Pointer to the SCCudaPacketsBuffer instance to be de-alloced.
|
|
*/
|
|
void SCCudaPBDeAllocSCCudaPBPacketsBuffer(SCCudaPBPacketsBuffer *pb)
|
|
{
|
|
if (pb == NULL)
|
|
return;
|
|
|
|
if (pb->packets_buffer != NULL)
|
|
free(pb->packets_buffer);
|
|
if (pb->packets_offset_buffer != NULL)
|
|
free(pb->packets_offset_buffer);
|
|
if (pb->packets_payload_offset_buffer != NULL)
|
|
free(pb->packets_payload_offset_buffer);
|
|
if (pb->packets_address_buffer != NULL)
|
|
free(pb->packets_address_buffer);
|
|
|
|
free(pb);
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief Allocates a new instance of SCCudaPBPacketsBuffer.
|
|
*
|
|
* \param pb The newly created instance of SCCudaPBPacketsBuffer.
|
|
*/
|
|
SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void)
|
|
{
|
|
SCCudaPBPacketsBuffer *pb = malloc(sizeof(SCCudaPBPacketsBuffer));
|
|
if (pb == NULL) {
|
|
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(pb, 0, sizeof(SCCudaPBPacketsBuffer));
|
|
|
|
/* the buffer for the packets to be sent over to the gpu. We allot space for
|
|
* a minimum of SC_CUDA_PB_MIN_NO_OF_PACKETS, i.e. if each packet buffered
|
|
* is full to the brim */
|
|
pb->packets_buffer = malloc(sizeof(SCCudaPBPacketDataForGPU) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
if (pb->packets_buffer == NULL) {
|
|
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(pb->packets_buffer, 0, sizeof(SCCudaPBPacketDataForGPU) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
|
|
/* used to hold the offsets of the buffered packets in the packets_buffer */
|
|
pb->packets_offset_buffer = malloc(sizeof(uint32_t) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
if (pb->packets_offset_buffer == NULL) {
|
|
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(pb->packets_offset_buffer, 0, sizeof(uint32_t) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
|
|
/* used to hold the offsets of the packets payload */
|
|
pb->packets_payload_offset_buffer = malloc(sizeof(uint32_t) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
if (pb->packets_payload_offset_buffer == NULL) {
|
|
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(pb->packets_payload_offset_buffer, 0, sizeof(uint32_t) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
|
|
/* used to hold the packet addresses for all the packets buffered inside
|
|
* packets_buffer */
|
|
pb->packets_address_buffer = malloc(sizeof(Packet *) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
if (pb->packets_address_buffer == NULL) {
|
|
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(pb->packets_address_buffer, 0, sizeof(Packet *) *
|
|
SC_CUDA_PB_MIN_NO_OF_PACKETS);
|
|
|
|
return pb;
|
|
}
|
|
|
|
/**
|
|
* \brief Registration function for the Cuda Packet Batcher TM.
|
|
*/
|
|
void TmModuleCudaPacketBatcherRegister(void)
|
|
{
|
|
tmm_modules[TMM_CUDA_PACKET_BATCHER].name = "CudaPacketBatcher";
|
|
tmm_modules[TMM_CUDA_PACKET_BATCHER].ThreadInit = SCCudaPBThreadInit;
|
|
tmm_modules[TMM_CUDA_PACKET_BATCHER].Func = SCCudaPBBatchPackets;
|
|
tmm_modules[TMM_CUDA_PACKET_BATCHER].ThreadExitPrintStats = SCCudaPBThreadExitStats;
|
|
tmm_modules[TMM_CUDA_PACKET_BATCHER].ThreadDeinit = SCCudaPBThreadDeInit;
|
|
tmm_modules[TMM_CUDA_PACKET_BATCHER].RegisterTests = SCCudaPBRegisterTests;
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief The cuda batcher TM init function.
|
|
*
|
|
* \param tv The cuda packet batcher TM ThreadVars instance.
|
|
* \param initdata The initialization data needed by this cuda batcher TM.
|
|
* \param data Pointer to a ponter memory location that would be updated
|
|
* with the newly created thread ctx instance.
|
|
*
|
|
* \retval TM_ECODE_OK On success.
|
|
* \retval TM_ECODE_FAILED On failure.
|
|
*/
|
|
TmEcode SCCudaPBThreadInit(ThreadVars *tv, void *initdata, void **data)
|
|
{
|
|
SCCudaPBThreadCtx *tctx = NULL;
|
|
|
|
if (initdata == NULL) {
|
|
SCLogError(SC_ERR_INVALID_ARGUMENTS, "Invalid argument. initdata NULL "
|
|
"for the cuda batcher TM init thread function");
|
|
return TM_ECODE_FAILED;
|
|
}
|
|
|
|
tctx = malloc(sizeof(SCCudaPBThreadCtx));
|
|
if (tctx == NULL) {
|
|
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
memset(tctx, 0, sizeof(SCCudaPBThreadCtx));
|
|
|
|
/* the detection engine context. We will need it to retrieve the sgh,
|
|
* when we start receiving and batching the packets */
|
|
tctx->de_ctx = initdata;
|
|
|
|
/* the first packet buffer from the queue */
|
|
tctx->curr_pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(&data_queues[tmq_inq->id]);
|
|
|
|
*data = tctx;
|
|
|
|
/* we will need the cuda packet batcher TM's inq for further use later. Read
|
|
* the comments associated with this var definition, for its use */
|
|
tmq_batcher_inq = tv->inq;
|
|
|
|
/* set the SIG_ALRM handler */
|
|
SCCudaPBSetBatcherAlarmTimeHandler();
|
|
|
|
/* if we are running unittests, don't set the alarm handler. It will only
|
|
* cause a seg fault if the tests take too long */
|
|
if (!unittest_mode) {
|
|
/* Set the alarm time limit during which the batcher thread would
|
|
* buffer packets */
|
|
alarm(SC_CUDA_PB_BATCHER_ALARM_TIME);
|
|
}
|
|
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
/**
|
|
* \brief Batches packets into the packets buffer.
|
|
*
|
|
* \param tv Pointer to the ThreadVars instance, in this case the cuda packet
|
|
* batcher TM's TV instance.
|
|
* \param p Pointer the the packet to be buffered.
|
|
* \param data Pointer the the batcher TM thread ctx.
|
|
* \param pq Pointer to the packetqueue. We don't need this.
|
|
*
|
|
* \retval TM_ECODE_OK On success.
|
|
* \retval TM_ECODE_FAILED On failure.
|
|
*/
|
|
TmEcode SCCudaPBBatchPackets(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *post_pq)
|
|
{
|
|
#define ALIGN_UP(offset, alignment) \
|
|
(offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)
|
|
|
|
/* ah. we have been signalled that we crossed the time limit within which we
|
|
* need to buffer packets. Let us queue the buffer to the GPU */
|
|
if (queue_buffer) {
|
|
SCLogDebug("Cuda packet buffer TIME limit exceeded. Buffering packet "
|
|
"buffer and reseting the alarm");
|
|
queue_buffer = 0;
|
|
SCCudaPBQueueBuffer(data);
|
|
/* if we are running unittests, don't set the alarm handler. It will only
|
|
* cause a seg fault if the tests take too long */
|
|
if (!unittest_mode) {
|
|
alarm(SC_CUDA_PB_BATCHER_ALARM_TIME);
|
|
}
|
|
}
|
|
|
|
/* this is possible, since we are using a custom slot function that calls this
|
|
* function, even if it receives no packet from the packet queue */
|
|
if (p == NULL) {
|
|
SCLogDebug("packet NULL inside Cuda batcher TM");
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
/* we set it for every incoming packet. We will set this depending on whether
|
|
* we end up buffering the packet or not */
|
|
p->cuda_mpm_enabled = 0;
|
|
|
|
/* packets that are too big are handled by the cpu */
|
|
if (p->payload_len > SC_CUDA_PB_MAX_PAYLOAD_SIZE) {
|
|
SCLogDebug("p->payload_len %"PRIu16" > %d, inspecting on the CPU.",
|
|
p->payload_len, SC_CUDA_PB_MAX_PAYLOAD_SIZE);
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
SCCudaPBThreadCtx *tctx = data;
|
|
/* the packets buffer */
|
|
SCCudaPBPacketsBuffer *pb = (SCCudaPBPacketsBuffer *)tctx->curr_pb;
|
|
/* the previous packet which has been buffered into the packets_buffer */
|
|
SCCudaPBPacketDataForGPU *prev_buff_packet = NULL;
|
|
/* holds the position in the packets_buffer where the curr packet would
|
|
* be buffered in */
|
|
SCCudaPBPacketDataForGPU *curr_packet = NULL;
|
|
/* the sgh to which the incoming packet belongs */
|
|
SigGroupHead *sgh = NULL;
|
|
|
|
if (p->flow != NULL) {
|
|
/* Get the stored sgh from the flow (if any). Make sure we're not using
|
|
* the sgh for icmp error packets part of the same stream. */
|
|
if (p->proto == p->flow->proto) { /* filter out icmp */
|
|
if (p->flowflags & FLOW_PKT_TOSERVER && p->flow->flags & FLOW_SGH_TOSERVER) {
|
|
sgh = p->flow->sgh_toserver;
|
|
} else if (p->flowflags & FLOW_PKT_TOCLIENT && p->flow->flags & FLOW_SGH_TOCLIENT) {
|
|
sgh = p->flow->sgh_toclient;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (sgh == NULL) {
|
|
/* get the signature group head to which this packet belongs. If it belongs
|
|
* to no sgh, we don't need to buffer this packet.
|
|
* \todo Get rid of this, once we get the sgh from the flow */
|
|
sgh = SCCudaPBGetSgh(tctx->de_ctx, p);
|
|
if (sgh == NULL) {
|
|
SCLogDebug("No SigGroupHead match for this packet");
|
|
return TM_ECODE_OK;
|
|
}
|
|
}
|
|
|
|
/* if the payload is less than the maximum content length in this sgh we
|
|
* don't need to run the PM on this packet. Chuck the packet out */
|
|
if (sgh->mpm_content_maxlen > p->payload_len) {
|
|
SCLogDebug("not mpm-inspecting as pkt payload is smaller than "
|
|
"the largest content length we need to match");
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
/* if one of these conditions fail we don't have to run the mpm on this
|
|
* packet. Firstly if the payload_len is == 0, we don't have a payload
|
|
* to match against. Next if we don't have a mpm_context against this
|
|
* sgh, indicating we don't have any patterns in this sgh, again we don't
|
|
* have anything to run the PM against. Finally if the flow doesn't want
|
|
* to analyze packets for this flow, we can chuck this packet out as well */
|
|
if ( !(p->payload_len > 0 && sgh->mpm_ctx != NULL &&
|
|
!(p->flags & PKT_NOPAYLOAD_INSPECTION)) ) {
|
|
SCLogDebug("Either p->payload_len <= 0 or mpm_ctx for the packet is NULL "
|
|
"or PKT_NOPAYLOAD_INSPECTION set for this packet");
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
/* the cuda b2g context */
|
|
B2gCudaCtx *ctx = sgh->mpm_ctx->ctx;
|
|
|
|
/* if we have a 1 byte search kernel set we don't buffer this packet for
|
|
* cuda matching and instead run this non-cuda mpm function to be run on
|
|
* the packet */
|
|
if (ctx->Search == B2gCudaSearch1) {
|
|
SCLogDebug("The packet has a one byte patterns. run mpm "
|
|
"separately");
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
#ifdef B2G_CUDA_SEARCH2
|
|
/* if we have a 2 byte search kernel set we don't buffer this packet for
|
|
* cuda matching and instead run this non-cuda mpm function to be run on the
|
|
* packet */
|
|
if (ctx->Search == B2gCudaSearch2) {
|
|
SCLogDebug("The packet has two byte patterns. run mpm "
|
|
"separately");
|
|
return TM_ECODE_OK;
|
|
}
|
|
#endif
|
|
|
|
/* we have passed all the criterions for buffering the packet. Set the
|
|
* flag indicating that the packet goes through cuda mpm */
|
|
p->cuda_mpm_enabled = 1;
|
|
|
|
/* first packet to be buffered in */
|
|
if (pb->nop_in_buffer == 0) {
|
|
curr_packet = (SCCudaPBPacketDataForGPU *)pb->packets_buffer;
|
|
|
|
/* buffer is not empty */
|
|
} else {
|
|
prev_buff_packet = (SCCudaPBPacketDataForGPU *)(pb->packets_buffer +
|
|
pb->packets_offset_buffer[pb->nop_in_buffer - 1]);
|
|
curr_packet = (SCCudaPBPacketDataForGPU *)((uint8_t *)prev_buff_packet +
|
|
sizeof(SCCudaPBPacketDataForGPUNonPayload) +
|
|
prev_buff_packet->payload_len) ;
|
|
int diff = (int)((uint8_t *)curr_packet - pb->packets_buffer);
|
|
/* \todo Feel it is the wrong option taken by nvidia by setting CUdeviceptr
|
|
* to unsigned int. Keep this option for now. We will get back to this
|
|
* once nvidia responds to the filed bug */
|
|
ALIGN_UP(diff, sizeof(CUdeviceptr));
|
|
curr_packet = (SCCudaPBPacketDataForGPU *)(pb->packets_buffer + diff);
|
|
}
|
|
|
|
/* store the data in the packets_buffer for this packet, which would be passed
|
|
* over to the GPU for processing */
|
|
curr_packet->m = ((B2gCudaCtx *)(sgh->mpm_ctx->ctx))->m;
|
|
curr_packet->table = ((B2gCudaCtx *)(sgh->mpm_ctx->ctx))->cuda_B2G;
|
|
curr_packet->payload_len = p->payload_len;
|
|
memcpy(curr_packet->payload, p->payload, p->payload_len);
|
|
|
|
/* store the address of the packet just buffered at the same index. The
|
|
* dispatcher thread will need this address to communicate the results back
|
|
* to the packet */
|
|
pb->packets_address_buffer[pb->nop_in_buffer] = p;
|
|
|
|
/* if it is the first packet to be buffered, the offset is 0. If it is not,
|
|
* then take the offset for the buffer from curr_packet */
|
|
if (pb->nop_in_buffer == 0) {
|
|
pb->packets_offset_buffer[pb->nop_in_buffer] = 0;
|
|
pb->packets_payload_offset_buffer[pb->nop_in_buffer] = 0;
|
|
} else {
|
|
pb->packets_offset_buffer[pb->nop_in_buffer] = (uint8_t *)curr_packet - pb->packets_buffer;
|
|
pb->packets_payload_offset_buffer[pb->nop_in_buffer] =
|
|
pb->packets_payload_offset_buffer[pb->nop_in_buffer - 1] +
|
|
prev_buff_packet->payload_len;
|
|
}
|
|
|
|
/* indicates the no of packets added so far into the buffer */
|
|
pb->nop_in_buffer++;
|
|
|
|
/* we have hit the threshhold for the total no of packets held in the buffer.
|
|
* We will change this in the future, instead relying on the remaining space
|
|
* left in the buffer or we have been informed that we have hit the time limit
|
|
* to queue the buffer */
|
|
if ( (pb->nop_in_buffer == buffer_packet_threshhold) || queue_buffer) {
|
|
queue_buffer = 0;
|
|
SCLogDebug("Either we have hit the threshold limit for packets(i.e.) we "
|
|
"have %d packets limit) OR we have exceeded the buffering "
|
|
"time limit. Buffering the packet buffer and reseting the "
|
|
"alarm.", buffer_packet_threshhold);
|
|
SCCudaPBQueueBuffer(tctx);
|
|
/* if we are running unittests, don't set the alarm handler. It will only
|
|
* cause a seg fault if the tests take too long */
|
|
if (!unittest_mode) {
|
|
alarm(SC_CUDA_PB_BATCHER_ALARM_TIME);
|
|
}
|
|
}
|
|
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
void SCCudaPBThreadExitStats(ThreadVars *tv, void *data)
|
|
{
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief The thread de-init function for the cuda packet batcher TM.
|
|
*
|
|
* \param tv Pointer to the cuda packet batcher TM ThreadVars instance.
|
|
* \param data Pointer the the Thread ctx for the cuda packet batcher TM.
|
|
*
|
|
* \retval TM_ECODE_OK On success.
|
|
* \retval TM_ECODE_FAILED On failure. Although we won't be returning this here.
|
|
*/
|
|
TmEcode SCCudaPBThreadDeInit(ThreadVars *tv, void *data)
|
|
{
|
|
SCCudaPBThreadCtx *tctx = data;
|
|
|
|
if (tctx != NULL) {
|
|
if (tctx->curr_pb != NULL) {
|
|
SCCudaPBDeAllocSCCudaPBPacketsBuffer(tctx->curr_pb);
|
|
tctx->curr_pb = NULL;
|
|
}
|
|
free(tctx);
|
|
}
|
|
|
|
return TM_ECODE_OK;
|
|
}
|
|
|
|
/**
|
|
* \brief Sets up the queues and buffers needed by the cuda batcher TM function.
|
|
*/
|
|
void SCCudaPBSetUpQueuesAndBuffers(void)
|
|
{
|
|
/* the b2g dispatcher thread would have to use the reverse for incoming
|
|
* and outgoing queues */
|
|
char *inq_name = "cuda_batcher_mpm_inqueue";
|
|
char *outq_name = "cuda_batcher_mpm_outqueue";
|
|
int i = 0;
|
|
|
|
/* set the incoming queue for the cuda_packet_batcher TM and the cuda B2g
|
|
* dispatcher */
|
|
tmq_inq = TmqGetQueueByName(inq_name);
|
|
if (tmq_inq == NULL) {
|
|
tmq_inq = TmqCreateQueue(inq_name);
|
|
if (tmq_inq == NULL) {
|
|
return;
|
|
}
|
|
}
|
|
tmq_inq->reader_cnt++;
|
|
tmq_inq->writer_cnt++;
|
|
|
|
/* set the outgoing queue from the cuda_packet_batcher TM and the cuda B2g
|
|
* dispatcher */
|
|
tmq_outq = TmqGetQueueByName(outq_name);
|
|
if (tmq_outq == NULL) {
|
|
tmq_outq = TmqCreateQueue(outq_name);
|
|
if (tmq_outq == NULL) {
|
|
return;
|
|
}
|
|
}
|
|
tmq_outq->reader_cnt++;
|
|
tmq_outq->writer_cnt++;
|
|
|
|
/* allocate the packet buffer */
|
|
/* \todo need to work out the right no of packet buffers that we need to
|
|
* queue. I doubt we will need more than 4(as long as we don't run it on
|
|
* low traffic line). We don't want to get into the business of creating
|
|
* new ones, when we run out of buffers, since malloc for a huge chunk
|
|
* like this will take time. We need to figure out a value based on
|
|
* various other parameters like alarm time and buffer threshold value */
|
|
for (i = 0; i < 10; i++) {
|
|
SCCudaPBPacketsBuffer *pb = SCCudaPBAllocSCCudaPBPacketsBuffer();
|
|
/* dump the buffer into the inqueue for this batcher TM. the batcher
|
|
* thread would be the first consumer for these buffers */
|
|
SCDQDataEnqueue(&data_queues[tmq_inq->id], (SCDQGenericQData *)pb);
|
|
}
|
|
|
|
/* \todo This needs to be changed ASAP. This can't exceed max_pending_packets.
|
|
* Also we need to make this user configurable and allow dynamic updaes
|
|
* based on live traffic */
|
|
buffer_packet_threshhold = 2400;
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief Clean up all the buffers queued in. Need to write more on this.
|
|
*/
|
|
void SCCudaPBCleanUpQueuesAndBuffers(void)
|
|
{
|
|
SCCudaPBPacketsBuffer *pb = NULL;
|
|
SCDQDataQueue *dq = NULL;
|
|
|
|
if (tmq_inq == NULL || tmq_outq == NULL) {
|
|
SCLogError(SC_ERR_INVALID_ARGUMENTS, "Invalid arguments. tmq_inq or "
|
|
"tmq_outq NULL");
|
|
return;
|
|
}
|
|
|
|
/* clean all the buffers present in the inq */
|
|
dq = &data_queues[tmq_inq->id];
|
|
SCMutexLock(&dq->mutex_q);
|
|
while ( (pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq)) != NULL) {
|
|
if (pb->packets_buffer != NULL)
|
|
free(pb->packets_buffer);
|
|
if (pb->packets_offset_buffer != NULL)
|
|
free(pb->packets_offset_buffer);
|
|
if (pb->packets_payload_offset_buffer != NULL)
|
|
free(pb->packets_payload_offset_buffer);
|
|
|
|
free(pb);
|
|
}
|
|
SCMutexUnlock(&dq->mutex_q);
|
|
SCCondSignal(&dq->cond_q);
|
|
|
|
/* clean all the buffers present in the outq */
|
|
dq = &data_queues[tmq_outq->id];
|
|
SCMutexLock(&dq->mutex_q);
|
|
while ( (pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq)) != NULL) {
|
|
if (pb->packets_buffer != NULL)
|
|
free(pb->packets_buffer);
|
|
if (pb->packets_offset_buffer != NULL)
|
|
free(pb->packets_offset_buffer);
|
|
if (pb->packets_payload_offset_buffer != NULL)
|
|
free(pb->packets_payload_offset_buffer);
|
|
|
|
free(pb);
|
|
}
|
|
SCMutexUnlock(&dq->mutex_q);
|
|
SCCondSignal(&dq->cond_q);
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief Function used to set the packet threshhold limit in the packets buffer.
|
|
*
|
|
* \param threshhold_override The threshhold limit for the packets_buffer.
|
|
*/
|
|
void SCCudaPBSetBufferPacketThreshhold(uint32_t threshhold_override)
|
|
{
|
|
buffer_packet_threshhold = threshhold_override;
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* \brief Used to inform the cuda packet batcher that packet batching shouldn't
|
|
* be done anymore and set the flag to indicate this. We also need to
|
|
* signal the cuda batcher data inq, in case it is waiting on the inq
|
|
* for a new free packet buffer.
|
|
*/
|
|
void SCCudaPBKillBatchingPackets(void)
|
|
{
|
|
run_batcher = 0;
|
|
SCDQDataQueue *dq = &data_queues[tmq_inq->id];
|
|
SCCondSignal(&dq->cond_q);
|
|
|
|
return;
|
|
}
|
|
|
|
void SCCudaPBRunningTests(int status)
|
|
{
|
|
unittest_mode = status;
|
|
}
|
|
|
|
/***********************************Unittests**********************************/
|
|
|
|
#ifdef UNITTESTS
|
|
|
|
int SCCudaPBTest01(void)
|
|
{
|
|
#define ALIGN_UP(offset, alignment) \
|
|
(offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)
|
|
|
|
uint8_t raw_eth[] = {
|
|
0x00, 0x25, 0x00, 0x9e, 0xfa, 0xfe, 0x00, 0x02,
|
|
0xcf, 0x74, 0xfe, 0xe1, 0x08, 0x00, 0x45, 0x00,
|
|
0x01, 0xcc, 0xcb, 0x91, 0x00, 0x00, 0x34, 0x06,
|
|
0xdf, 0xa8, 0xd1, 0x55, 0xe3, 0x67, 0xc0, 0xa8,
|
|
0x64, 0x8c, 0x00, 0x50, 0xc0, 0xb7, 0xd1, 0x11,
|
|
0xed, 0x63, 0x81, 0xa9, 0x9a, 0x05, 0x80, 0x18,
|
|
0x00, 0x75, 0x0a, 0xdd, 0x00, 0x00, 0x01, 0x01,
|
|
0x08, 0x0a, 0x09, 0x8a, 0x06, 0xd0, 0x12, 0x21,
|
|
0x2a, 0x3b, 0x48, 0x54, 0x54, 0x50, 0x2f, 0x31,
|
|
0x2e, 0x31, 0x20, 0x33, 0x30, 0x32, 0x20, 0x46,
|
|
0x6f, 0x75, 0x6e, 0x64, 0x0d, 0x0a, 0x4c, 0x6f,
|
|
0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x3a, 0x20,
|
|
0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x77,
|
|
0x77, 0x77, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c,
|
|
0x65, 0x2e, 0x65, 0x73, 0x2f, 0x0d, 0x0a, 0x43,
|
|
0x61, 0x63, 0x68, 0x65, 0x2d, 0x43, 0x6f, 0x6e,
|
|
0x74, 0x72, 0x6f, 0x6c, 0x3a, 0x20, 0x70, 0x72,
|
|
0x69, 0x76, 0x61, 0x74, 0x65, 0x0d, 0x0a, 0x43,
|
|
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x54,
|
|
0x79, 0x70, 0x65, 0x3a, 0x20, 0x74, 0x65, 0x78,
|
|
0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x20,
|
|
0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3d,
|
|
0x55, 0x54, 0x46, 0x2d, 0x38, 0x0d, 0x0a, 0x44,
|
|
0x61, 0x74, 0x65, 0x3a, 0x20, 0x4d, 0x6f, 0x6e,
|
|
0x2c, 0x20, 0x31, 0x34, 0x20, 0x53, 0x65, 0x70,
|
|
0x20, 0x32, 0x30, 0x30, 0x39, 0x20, 0x30, 0x38,
|
|
0x3a, 0x34, 0x38, 0x3a, 0x33, 0x31, 0x20, 0x47,
|
|
0x4d, 0x54, 0x0d, 0x0a, 0x53, 0x65, 0x72, 0x76,
|
|
0x65, 0x72, 0x3a, 0x20, 0x67, 0x77, 0x73, 0x0d,
|
|
0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
|
0x2d, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x3a,
|
|
0x20, 0x32, 0x31, 0x38, 0x0d, 0x0a, 0x0d, 0x0a,
|
|
0x3c, 0x48, 0x54, 0x4d, 0x4c, 0x3e, 0x3c, 0x48,
|
|
0x45, 0x41, 0x44, 0x3e, 0x3c, 0x6d, 0x65, 0x74,
|
|
0x61, 0x20, 0x68, 0x74, 0x74, 0x70, 0x2d, 0x65,
|
|
0x71, 0x75, 0x69, 0x76, 0x3d, 0x22, 0x63, 0x6f,
|
|
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x74, 0x79,
|
|
0x70, 0x65, 0x22, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
|
0x65, 0x6e, 0x74, 0x3d, 0x22, 0x74, 0x65, 0x78,
|
|
0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x63,
|
|
0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3d, 0x75,
|
|
0x74, 0x66, 0x2d, 0x38, 0x22, 0x3e, 0x0a, 0x3c,
|
|
0x54, 0x49, 0x54, 0x4c, 0x45, 0x3e, 0x33, 0x30,
|
|
0x32, 0x20, 0x4d, 0x6f, 0x76, 0x65, 0x64, 0x3c,
|
|
0x2f, 0x54, 0x49, 0x54, 0x4c, 0x45, 0x3e, 0x3c,
|
|
0x2f, 0x48, 0x45, 0x41, 0x44, 0x3e, 0x3c, 0x42,
|
|
0x4f, 0x44, 0x59, 0x3e, 0x0a, 0x3c, 0x48, 0x31,
|
|
0x3e, 0x33, 0x30, 0x32, 0x20, 0x4d, 0x6f, 0x76,
|
|
0x65, 0x64, 0x3c, 0x2f, 0x48, 0x31, 0x3e, 0x0a,
|
|
0x54, 0x68, 0x65, 0x20, 0x64, 0x6f, 0x63, 0x75,
|
|
0x6d, 0x65, 0x6e, 0x74, 0x20, 0x68, 0x61, 0x73,
|
|
0x20, 0x6d, 0x6f, 0x76, 0x65, 0x64, 0x0a, 0x3c,
|
|
0x41, 0x20, 0x48, 0x52, 0x45, 0x46, 0x3d, 0x22,
|
|
0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x77,
|
|
0x77, 0x77, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c,
|
|
0x65, 0x2e, 0x65, 0x73, 0x2f, 0x22, 0x3e, 0x68,
|
|
0x65, 0x72, 0x65, 0x3c, 0x2f, 0x41, 0x3e, 0x2e,
|
|
0x0d, 0x0a, 0x3c, 0x2f, 0x42, 0x4f, 0x44, 0x59,
|
|
0x3e, 0x3c, 0x2f, 0x48, 0x54, 0x4d, 0x4c, 0x3e,
|
|
0x0d, 0x0a };
|
|
|
|
int result = 0;
|
|
SCCudaPBThreadCtx *tctx = NULL;
|
|
|
|
Packet p;
|
|
DecodeThreadVars dtv;
|
|
ThreadVars tv;
|
|
ThreadVars tv_cuda_PB;
|
|
DetectEngineCtx *de_ctx = NULL;
|
|
|
|
SCCudaPBPacketsBuffer *pb = NULL;
|
|
SCCudaPBPacketDataForGPU *buff_packet = NULL;
|
|
SCDQDataQueue *dq = NULL;
|
|
|
|
uint32_t i = 0;
|
|
|
|
char *strings[] = {"test_one",
|
|
"test_two",
|
|
"test_three",
|
|
"test_four",
|
|
"test_five",
|
|
"test_six",
|
|
"test_seven",
|
|
"test_eight",
|
|
"test_nine",
|
|
"test_ten"};
|
|
|
|
uint32_t packets_payload_offset_buffer[sizeof(strings)/sizeof(char *)];
|
|
memset(packets_payload_offset_buffer, 0, sizeof(packets_payload_offset_buffer));
|
|
uint32_t packets_offset_buffer[sizeof(strings)/sizeof(char *)];
|
|
memset(packets_offset_buffer, 0, sizeof(packets_offset_buffer));
|
|
|
|
uint32_t packets_total_payload_len = 0;
|
|
uint32_t packets_buffer_len = 0;
|
|
|
|
for (i = 0; i < sizeof(strings)/sizeof(char *); i++) {
|
|
packets_total_payload_len += strlen(strings[i]);
|
|
}
|
|
|
|
for (i = 1; i < sizeof(strings)/sizeof(char *); i++) {
|
|
packets_payload_offset_buffer[i] = packets_payload_offset_buffer[i - 1] + strlen(strings[i - 1]);
|
|
packets_offset_buffer[i] = packets_offset_buffer[i - 1] +
|
|
sizeof(SCCudaPBPacketDataForGPUNonPayload) + strlen(strings[i - 1]);
|
|
ALIGN_UP(packets_offset_buffer[i], sizeof(CUdeviceptr));
|
|
}
|
|
packets_buffer_len += packets_offset_buffer[(sizeof(strings)/sizeof(char *)) - 1] +
|
|
sizeof(SCCudaPBPacketDataForGPUNonPayload) + strlen(strings[(sizeof(strings)/sizeof(char *)) - 1]);
|
|
|
|
memset(&p, 0, sizeof(Packet));
|
|
memset(&dtv, 0, sizeof(DecodeThreadVars));
|
|
memset(&tv, 0, sizeof(ThreadVars));
|
|
memset(&tv_cuda_PB, 0, sizeof(ThreadVars));
|
|
|
|
FlowInitConfig(FLOW_QUIET);
|
|
DecodeEthernet(&tv, &dtv, &p, raw_eth, sizeof(raw_eth), NULL);
|
|
|
|
de_ctx = DetectEngineCtxInit();
|
|
if (de_ctx == NULL) {
|
|
goto end;
|
|
}
|
|
|
|
de_ctx->mpm_matcher = MPM_B2G_CUDA;
|
|
de_ctx->flags |= DE_QUIET;
|
|
|
|
de_ctx->sig_list = SigInit(de_ctx, "alert tcp any any -> any any (msg:\"Bamboo\"; "
|
|
"content:test; sid:1;)");
|
|
if (de_ctx->sig_list == NULL) {
|
|
printf("signature parsing failed\n");
|
|
goto end;
|
|
}
|
|
SigGroupBuild(de_ctx);
|
|
|
|
result = 1;
|
|
|
|
SCCudaPBSetUpQueuesAndBuffers();
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 10);
|
|
SCCudaPBRunningTests(1);
|
|
SCCudaPBThreadInit(&tv_cuda_PB, de_ctx, (void *)&tctx);
|
|
SCCudaPBSetBufferPacketThreshhold(sizeof(strings)/sizeof(char *));
|
|
|
|
p.payload = (uint8_t *)strings[0];
|
|
p.payload_len = strlen(strings[0]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[1];
|
|
p.payload_len = strlen(strings[1]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[2];
|
|
p.payload_len = strlen(strings[2]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[3];
|
|
p.payload_len = strlen(strings[3]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[4];
|
|
p.payload_len = strlen(strings[4]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[5];
|
|
p.payload_len = strlen(strings[5]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[6];
|
|
p.payload_len = strlen(strings[6]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[7];
|
|
p.payload_len = strlen(strings[7]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[8];
|
|
p.payload_len = strlen(strings[8]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
p.payload = (uint8_t *)strings[9];
|
|
p.payload_len = strlen(strings[9]);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 1);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 8);
|
|
|
|
dq = &data_queues[tmq_outq->id];
|
|
pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq);
|
|
if (pb == NULL) {
|
|
result = 0;
|
|
goto end;
|
|
}
|
|
result &= (dq->len == 0);
|
|
result &= (pb->nop_in_buffer == 10);
|
|
if (result == 0)
|
|
goto end;
|
|
|
|
for (i = 0; i < pb->nop_in_buffer; i++) {
|
|
buff_packet = (SCCudaPBPacketDataForGPU *)(pb->packets_buffer + pb->packets_offset_buffer[i]);
|
|
result &= (strlen(strings[i]) == buff_packet->payload_len);
|
|
result &= (memcmp(strings[i], buff_packet->payload, buff_packet->payload_len) == 0);
|
|
if (result == 0)
|
|
goto end;
|
|
result &= (packets_payload_offset_buffer[i] == pb->packets_payload_offset_buffer[i]);
|
|
result &= (packets_offset_buffer[i] == pb->packets_offset_buffer[i]);
|
|
}
|
|
result &= (packets_total_payload_len == pb->packets_total_payload_len);
|
|
result &= (packets_buffer_len == pb->packets_buffer_len);
|
|
|
|
end:
|
|
SCCudaPBCleanUpQueuesAndBuffers();
|
|
if (de_ctx) {
|
|
SigGroupCleanup(de_ctx);
|
|
SigCleanSignatures(de_ctx);
|
|
DetectEngineCtxFree(de_ctx);
|
|
}
|
|
|
|
SCCudaPBThreadDeInit(NULL, tctx);
|
|
return result;
|
|
}
|
|
|
|
int SCCudaPBTest02(void)
|
|
{
|
|
uint8_t raw_eth[] = {
|
|
0x00, 0x25, 0x00, 0x9e, 0xfa, 0xfe, 0x00, 0x02,
|
|
0xcf, 0x74, 0xfe, 0xe1, 0x08, 0x00, 0x45, 0x00,
|
|
0x01, 0xcc, 0xcb, 0x91, 0x00, 0x00, 0x34, 0x06,
|
|
0xdf, 0xa8, 0xd1, 0x55, 0xe3, 0x67, 0xc0, 0xa8,
|
|
0x64, 0x8c, 0x00, 0x50, 0xc0, 0xb7, 0xd1, 0x11,
|
|
0xed, 0x63, 0x81, 0xa9, 0x9a, 0x05, 0x80, 0x18,
|
|
0x00, 0x75, 0x0a, 0xdd, 0x00, 0x00, 0x01, 0x01,
|
|
0x08, 0x0a, 0x09, 0x8a, 0x06, 0xd0, 0x12, 0x21,
|
|
0x2a, 0x3b, 0x48, 0x54, 0x54, 0x50, 0x2f, 0x31,
|
|
0x2e, 0x31, 0x20, 0x33, 0x30, 0x32, 0x20, 0x46,
|
|
0x6f, 0x75, 0x6e, 0x64, 0x0d, 0x0a, 0x4c, 0x6f,
|
|
0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x3a, 0x20,
|
|
0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x77,
|
|
0x77, 0x77, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c,
|
|
0x65, 0x2e, 0x65, 0x73, 0x2f, 0x0d, 0x0a, 0x43,
|
|
0x61, 0x63, 0x68, 0x65, 0x2d, 0x43, 0x6f, 0x6e,
|
|
0x74, 0x72, 0x6f, 0x6c, 0x3a, 0x20, 0x70, 0x72,
|
|
0x69, 0x76, 0x61, 0x74, 0x65, 0x0d, 0x0a, 0x43,
|
|
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x54,
|
|
0x79, 0x70, 0x65, 0x3a, 0x20, 0x74, 0x65, 0x78,
|
|
0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x20,
|
|
0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3d,
|
|
0x55, 0x54, 0x46, 0x2d, 0x38, 0x0d, 0x0a, 0x44,
|
|
0x61, 0x74, 0x65, 0x3a, 0x20, 0x4d, 0x6f, 0x6e,
|
|
0x2c, 0x20, 0x31, 0x34, 0x20, 0x53, 0x65, 0x70,
|
|
0x20, 0x32, 0x30, 0x30, 0x39, 0x20, 0x30, 0x38,
|
|
0x3a, 0x34, 0x38, 0x3a, 0x33, 0x31, 0x20, 0x47,
|
|
0x4d, 0x54, 0x0d, 0x0a, 0x53, 0x65, 0x72, 0x76,
|
|
0x65, 0x72, 0x3a, 0x20, 0x67, 0x77, 0x73, 0x0d,
|
|
0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
|
0x2d, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x3a,
|
|
0x20, 0x32, 0x31, 0x38, 0x0d, 0x0a, 0x0d, 0x0a,
|
|
0x3c, 0x48, 0x54, 0x4d, 0x4c, 0x3e, 0x3c, 0x48,
|
|
0x45, 0x41, 0x44, 0x3e, 0x3c, 0x6d, 0x65, 0x74,
|
|
0x61, 0x20, 0x68, 0x74, 0x74, 0x70, 0x2d, 0x65,
|
|
0x71, 0x75, 0x69, 0x76, 0x3d, 0x22, 0x63, 0x6f,
|
|
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x74, 0x79,
|
|
0x70, 0x65, 0x22, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
|
0x65, 0x6e, 0x74, 0x3d, 0x22, 0x74, 0x65, 0x78,
|
|
0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x63,
|
|
0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3d, 0x75,
|
|
0x74, 0x66, 0x2d, 0x38, 0x22, 0x3e, 0x0a, 0x3c,
|
|
0x54, 0x49, 0x54, 0x4c, 0x45, 0x3e, 0x33, 0x30,
|
|
0x32, 0x20, 0x4d, 0x6f, 0x76, 0x65, 0x64, 0x3c,
|
|
0x2f, 0x54, 0x49, 0x54, 0x4c, 0x45, 0x3e, 0x3c,
|
|
0x2f, 0x48, 0x45, 0x41, 0x44, 0x3e, 0x3c, 0x42,
|
|
0x4f, 0x44, 0x59, 0x3e, 0x0a, 0x3c, 0x48, 0x31,
|
|
0x3e, 0x33, 0x30, 0x32, 0x20, 0x4d, 0x6f, 0x76,
|
|
0x65, 0x64, 0x3c, 0x2f, 0x48, 0x31, 0x3e, 0x0a,
|
|
0x54, 0x68, 0x65, 0x20, 0x64, 0x6f, 0x63, 0x75,
|
|
0x6d, 0x65, 0x6e, 0x74, 0x20, 0x68, 0x61, 0x73,
|
|
0x20, 0x6d, 0x6f, 0x76, 0x65, 0x64, 0x0a, 0x3c,
|
|
0x41, 0x20, 0x48, 0x52, 0x45, 0x46, 0x3d, 0x22,
|
|
0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x77,
|
|
0x77, 0x77, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c,
|
|
0x65, 0x2e, 0x65, 0x73, 0x2f, 0x22, 0x3e, 0x68,
|
|
0x65, 0x72, 0x65, 0x3c, 0x2f, 0x41, 0x3e, 0x2e,
|
|
0x0d, 0x0a, 0x3c, 0x2f, 0x42, 0x4f, 0x44, 0x59,
|
|
0x3e, 0x3c, 0x2f, 0x48, 0x54, 0x4d, 0x4c, 0x3e,
|
|
0x0d, 0x0a };
|
|
|
|
int result = 0;
|
|
const char *string = NULL;
|
|
SCCudaPBThreadCtx *tctx = NULL;
|
|
|
|
Packet p;
|
|
DecodeThreadVars dtv;
|
|
ThreadVars tv;
|
|
ThreadVars tv_cuda_PB;
|
|
DetectEngineCtx *de_ctx = NULL;
|
|
|
|
SCCudaPBPacketsBuffer *pb = NULL;
|
|
SCDQDataQueue *dq = NULL;
|
|
|
|
|
|
memset(&p, 0, sizeof(Packet));
|
|
memset(&dtv, 0, sizeof(DecodeThreadVars));
|
|
memset(&tv, 0, sizeof(ThreadVars));
|
|
memset(&tv_cuda_PB, 0, sizeof(ThreadVars));
|
|
|
|
FlowInitConfig(FLOW_QUIET);
|
|
DecodeEthernet(&tv, &dtv, &p, raw_eth, sizeof(raw_eth), NULL);
|
|
|
|
de_ctx = DetectEngineCtxInit();
|
|
if (de_ctx == NULL) {
|
|
goto end;
|
|
}
|
|
|
|
de_ctx->mpm_matcher = MPM_B2G_CUDA;
|
|
de_ctx->flags |= DE_QUIET;
|
|
|
|
de_ctx->sig_list = SigInit(de_ctx, "alert tcp any 5555 -> any any (msg:\"Bamboo\"; "
|
|
"content:test; sid:1;)");
|
|
if (de_ctx->sig_list == NULL) {
|
|
printf("signature parsing failed\n");
|
|
goto end;
|
|
}
|
|
SigGroupBuild(de_ctx);
|
|
|
|
SCCudaPBSetUpQueuesAndBuffers();
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 10);
|
|
SCCudaPBRunningTests(1);
|
|
SCCudaPBThreadInit(&tv_cuda_PB, de_ctx, (void *)&tctx);
|
|
|
|
result = 1;
|
|
|
|
string = "test_one";
|
|
p.payload = (uint8_t *)string;
|
|
p.payload_len = strlen(string);
|
|
SCCudaPBBatchPackets(NULL, &p, tctx, NULL, NULL);
|
|
dq = &data_queues[tmq_outq->id];
|
|
result &= (dq->len == 0);
|
|
dq = &data_queues[tmq_inq->id];
|
|
result &= (dq->len == 9);
|
|
|
|
pb = tctx->curr_pb;
|
|
result &= (pb->nop_in_buffer == 0);
|
|
|
|
end:
|
|
SCCudaPBCleanUpQueuesAndBuffers();
|
|
if (de_ctx) {
|
|
SigGroupCleanup(de_ctx);
|
|
SigCleanSignatures(de_ctx);
|
|
DetectEngineCtxFree(de_ctx);
|
|
}
|
|
|
|
SCCudaPBThreadDeInit(NULL, tctx);
|
|
return result;
|
|
}
|
|
|
|
#endif /* UNITTESTS */
|
|
|
|
void SCCudaPBRegisterTests(void)
|
|
{
|
|
|
|
#ifdef UNITTESTS
|
|
UtRegisterTest("SCCudaPBTest01", SCCudaPBTest01, 1);
|
|
UtRegisterTest("SCCudaPBTest02", SCCudaPBTest02, 1);
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
#endif /* __SC_CUDA_SUPPORT__ */
|