diff --git a/src/cuda-packet-batcher.c b/src/cuda-packet-batcher.c index 1259248bc2..26b8e34e4a 100644 --- a/src/cuda-packet-batcher.c +++ b/src/cuda-packet-batcher.c @@ -50,6 +50,7 @@ #include "detect-parse.h" #include "tm-threads.h" #include "tmqh-packetpool.h" +#include "util-mpm.h" /* \todo Make this user configurable through our yaml file. Also provide options * where this can be dynamically updated based on the traffic */ @@ -80,7 +81,10 @@ static int run_batcher = 1; * on the traffic * \todo make this user configurable, as well allow dynamic update of this * variable based on the traffic seen */ -static uint32_t buffer_packet_threshhold = 2400; +static uint32_t buffer_packet_threshhold = 0; + +/* the profile used by the cuda batcher */ +static MpmCudaConf *profile = NULL; /* flag used by the SIG_ALRM handler to indicate that the batcher TM should queue * the buffer to be processed by the Cuda Mpm B2g Batcher Thread for further @@ -302,10 +306,10 @@ void *SCCudaPBTmThreadsSlot1(void *td) * tm-threads.c and this custom Slot1 function is this call * here. We need to make the call here, even if we don't * receive a packet from the previous stage in the runmodes. - * This is needed in cases where we the SIG_ALRM handler + * This is needed in cases where the SIG_ALRM handler * wants us to queue the buffer to the GPU and ends up waking * the Batcher TM(which is waiting on a cond from the previous - * feeder TM). Please handler the NULL packet case in the + * feeder TM). Please handle the NULL packet case in the * function that you now call */ r = s->s.SlotFunc(tv, p, s->s.slot_data, NULL, NULL); } else { @@ -400,8 +404,8 @@ SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void) } /* the buffer for the packets to be sent over to the gpu. We allot space for - * a minimum of SC_CUDA_PB_MIN_NO_OF_PACKETS, i.e. if each packet buffered - * is full to the brim */ + * profile->packet_buffer_limit packets, assuming a size of + * profile->packet_size_limit for each packet */ SCCudaHlModuleData *data = NULL; data = SCCudaHlGetModuleData(SCCudaHlGetModuleHandle("SC_CUDA_PACKET_BATCHER")); if (data == NULL) { @@ -412,7 +416,7 @@ SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void) return NULL; } - if (SCCudaHlGetCudaContext(&data->cuda_context, data->handle) == -1) { + if (SCCudaHlGetCudaContext(&data->cuda_context, "mpm", data->handle) == -1) { SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Error getting cuda context"); return NULL; } @@ -422,38 +426,67 @@ SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void) "Error pushing cuda context to allocate memory"); } - if (SCCudaMemHostAlloc((void**)&pb->packets_buffer, - sizeof(SCCudaPBPacketDataForGPU) * - SC_CUDA_PB_MIN_NO_OF_PACKETS, - CU_MEMHOSTALLOC_PORTABLE | - CU_MEMHOSTALLOC_WRITECOMBINED) == -1) { - SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory"); - exit(EXIT_FAILURE); + if (profile->page_locked) { + if (SCCudaMemHostAlloc((void**)&pb->packets_buffer, + profile->packet_buffer_limit * + (profile->packet_size_limit + + sizeof(SCCudaPBPacketDataForGPUNonPayload)), + CU_MEMHOSTALLOC_PORTABLE | + CU_MEMHOSTALLOC_WRITECOMBINED) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory"); + exit(EXIT_FAILURE); + } + } else { + pb->packets_buffer = malloc(profile->packet_buffer_limit * + (profile->packet_size_limit + + sizeof(SCCudaPBPacketDataForGPUNonPayload))); + if (pb->packets_buffer == NULL) { + SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); + exit(EXIT_FAILURE); + } } - memset(pb->packets_buffer, 0, sizeof(SCCudaPBPacketDataForGPU) * - SC_CUDA_PB_MIN_NO_OF_PACKETS); - - /* used to hold the offsets of the buffered packets in the packets_buffer */ - if (SCCudaMemHostAlloc((void**)&pb->packets_offset_buffer, - sizeof(uint32_t) * SC_CUDA_PB_MIN_NO_OF_PACKETS, - CU_MEMHOSTALLOC_PORTABLE | - CU_MEMHOSTALLOC_WRITECOMBINED) == -1) { - SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory"); - exit(EXIT_FAILURE); + memset(pb->packets_buffer, 0, profile->packet_buffer_limit * + (profile->packet_size_limit + sizeof(SCCudaPBPacketDataForGPUNonPayload))); + + if (profile->page_locked) { + /* used to hold the offsets of the buffered packets in the packets_buffer */ + if (SCCudaMemHostAlloc((void**)&pb->packets_offset_buffer, + sizeof(uint32_t) * profile->packet_buffer_limit, + CU_MEMHOSTALLOC_PORTABLE | + CU_MEMHOSTALLOC_WRITECOMBINED) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory"); + exit(EXIT_FAILURE); + } + } else { + pb->packets_offset_buffer = malloc(sizeof(uint32_t) * + profile->packet_buffer_limit); + if (pb->packets_offset_buffer == NULL) { + SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); + exit(EXIT_FAILURE); + } } - memset(pb->packets_offset_buffer, 0, sizeof(uint32_t) * - SC_CUDA_PB_MIN_NO_OF_PACKETS); - - /* used to hold the offsets of the packets payload */ - if (SCCudaMemHostAlloc((void**)&pb->packets_payload_offset_buffer, - sizeof(uint32_t) * SC_CUDA_PB_MIN_NO_OF_PACKETS, - CU_MEMHOSTALLOC_PORTABLE | - CU_MEMHOSTALLOC_WRITECOMBINED) == -1) { - SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory"); - exit(EXIT_FAILURE); + memset(pb->packets_offset_buffer, 0, + sizeof(uint32_t) * profile->packet_buffer_limit); + + if (profile->page_locked) { + /* used to hold the offsets of the packets payload */ + if (SCCudaMemHostAlloc((void**)&pb->packets_payload_offset_buffer, + sizeof(uint32_t) * profile->packet_buffer_limit, + CU_MEMHOSTALLOC_PORTABLE | + CU_MEMHOSTALLOC_WRITECOMBINED) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory"); + exit(EXIT_FAILURE); + } + } else { + pb->packets_payload_offset_buffer = malloc(sizeof(uint32_t) * + profile->packet_buffer_limit); + if (pb->packets_payload_offset_buffer == NULL) { + SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); + exit(EXIT_FAILURE); + } } - memset(pb->packets_payload_offset_buffer, 0, sizeof(uint32_t) * - SC_CUDA_PB_MIN_NO_OF_PACKETS); + memset(pb->packets_payload_offset_buffer, 0, + sizeof(uint32_t) * profile->packet_buffer_limit); SCLogDebug("Allocated pagelocked CUDA memory"); if (SCCudaCtxPopCurrent(NULL) == -1) { @@ -463,13 +496,13 @@ SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void) /* used to hold the packet addresses for all the packets buffered inside * packets_buffer */ pb->packets_address_buffer = malloc(sizeof(Packet *) * - SC_CUDA_PB_MIN_NO_OF_PACKETS); + profile->packet_buffer_limit); if (pb->packets_address_buffer == NULL) { SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); exit(EXIT_FAILURE); } memset(pb->packets_address_buffer, 0, sizeof(Packet *) * - SC_CUDA_PB_MIN_NO_OF_PACKETS); + profile->packet_buffer_limit); return pb; } @@ -541,7 +574,7 @@ TmEcode SCCudaPBThreadInit(ThreadVars *tv, void *initdata, void **data) if (!unittest_mode) { /* Set the alarm time limit during which the batcher thread would * buffer packets */ - alarm(SC_CUDA_PB_BATCHER_ALARM_TIME); + alarm(profile->batching_timeout); } return TM_ECODE_OK; @@ -570,11 +603,13 @@ TmEcode SCCudaPBBatchPackets(ThreadVars *tv, Packet *p, void *data, PacketQueue SCLogDebug("Cuda packet buffer TIME limit exceeded. Buffering packet " "buffer and reseting the alarm"); queue_buffer = 0; + SCLogDebug("Cuda packet buffer TIME limit exceeded. Buffering packet " + "buffer and reseting the alarm"); SCCudaPBQueueBuffer(data); /* if we are running unittests, don't set the alarm handler. It will only * cause a seg fault if the tests take too long */ if (!unittest_mode) { - alarm(SC_CUDA_PB_BATCHER_ALARM_TIME); + alarm(profile->batching_timeout); } } @@ -730,7 +765,7 @@ TmEcode SCCudaPBBatchPackets(ThreadVars *tv, Packet *p, void *data, PacketQueue * to queue the buffer */ if ( (pb->nop_in_buffer == buffer_packet_threshhold) || queue_buffer) { queue_buffer = 0; - SCLogDebug("Either we have hit the threshold limit for packets(i.e.) we " + SCLogDebug("Either we have hit the threshold limit for packets(i.e. we " "have %d packets limit) OR we have exceeded the buffering " "time limit. Buffering the packet buffer and reseting the " "alarm.", buffer_packet_threshhold); @@ -738,7 +773,7 @@ TmEcode SCCudaPBBatchPackets(ThreadVars *tv, Packet *p, void *data, PacketQueue /* if we are running unittests, don't set the alarm handler. It will only * cause a seg fault if the tests take too long */ if (!unittest_mode) { - alarm(SC_CUDA_PB_BATCHER_ALARM_TIME); + alarm(profile->batching_timeout); } } @@ -826,6 +861,8 @@ void SCCudaPBSetUpQueuesAndBuffers(void) * page-locked memory */ SCCudaHlRegisterModule("SC_CUDA_PACKET_BATCHER"); + profile = SCCudaHlGetProfile("mpm"); + /* allocate the packet buffer */ /* \todo need to work out the right no of packet buffers that we need to * queue. I doubt we will need more than 4(as long as we don't run it on @@ -833,7 +870,15 @@ void SCCudaPBSetUpQueuesAndBuffers(void) * new ones, when we run out of buffers, since malloc for a huge chunk * like this will take time. We need to figure out a value based on * various other parameters like alarm time and buffer threshold value */ - for (i = 0; i < 10; i++) { + for (i = 0; i < profile->packet_buffers; i++) { + if (profile->page_locked) { + SCLogDebug("Allocating \"%d\" page_locked cuda packet buffers", + profile->packet_buffers); + } else { + SCLogDebug("Allocating \"%d\" non-page_locked cuda packet buffers", + profile->packet_buffers); + } + SCCudaPBPacketsBuffer *pb = SCCudaPBAllocSCCudaPBPacketsBuffer(); /* dump the buffer into the inqueue for this batcher TM. the batcher * thread would be the first consumer for these buffers */ @@ -843,7 +888,7 @@ void SCCudaPBSetUpQueuesAndBuffers(void) /* \todo This needs to be changed ASAP. This can't exceed max_pending_packets. * Also we need to make this user configurable and allow dynamic updaes * based on live traffic */ - buffer_packet_threshhold = 2400; + buffer_packet_threshhold = profile->packet_buffer_limit; return; } @@ -870,21 +915,33 @@ void SCCudaPBCleanUpQueuesAndBuffers(void) SCMutexLock(&dq->mutex_q); while ( (pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq)) != NULL) { if (pb->packets_buffer != NULL) { - if (SCCudaMemFreeHost(pb->packets_buffer) == -1) { - SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " - "packets_buffer"); + if (profile->page_locked) { + if (SCCudaMemFreeHost(pb->packets_buffer) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " + "packets_buffer"); + } + } else { + free(pb->packets_buffer); } } if (pb->packets_offset_buffer != NULL) { - if (SCCudaMemFreeHost(pb->packets_offset_buffer) == -1) { - SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " - "packets_offset_buffer"); + if (profile->page_locked) { + if (SCCudaMemFreeHost(pb->packets_offset_buffer) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " + "packets_offset_buffer"); + } + } else { + free(pb->packets_offset_buffer); } } if (pb->packets_payload_offset_buffer != NULL) { - if (SCCudaMemFreeHost(pb->packets_payload_offset_buffer) == -1) { - SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " - "packets_payload_offset_buffer"); + if (profile->page_locked) { + if (SCCudaMemFreeHost(pb->packets_payload_offset_buffer) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " + "packets_payload_offset_buffer"); + } + } else { + free(pb->packets_payload_offset_buffer); } } @@ -939,6 +996,17 @@ void SCCudaPBSetBufferPacketThreshhold(uint32_t threshhold_override) return; } +/** + * \brief Function used to set the profile for cuda packet batcher. Used + * for unittests alone. + */ +void SCCudaPBSetProfile(char *profile_name) +{ + profile = SCCudaHlGetProfile("mpm"); + + return; +} + /** * \brief Used to inform the cuda packet batcher that packet batching shouldn't * be done anymore and set the flag to indicate this. We also need to diff --git a/src/cuda-packet-batcher.h b/src/cuda-packet-batcher.h index 2c53c4520a..587476859b 100644 --- a/src/cuda-packet-batcher.h +++ b/src/cuda-packet-batcher.h @@ -138,6 +138,7 @@ void TmModuleCudaPacketBatcherRegister(void); void *SCCudaPBTmThreadsSlot1(void *); void SCCudaPBRunningTests(int); +void SCCudaPBSetProfile(char *); #endif /* __SC_CUDA_SUPPORT__ */ diff --git a/src/detect.c b/src/detect.c index de8a9c80fb..3cf2d0dec4 100644 --- a/src/detect.c +++ b/src/detect.c @@ -3732,7 +3732,7 @@ int SigGroupBuild (DetectEngineCtx *de_ctx) { de_ctx->cuda_rc_mod_handle = SCCudaHlRegisterModule("SC_RULES_CONTENT_B2G_CUDA"); if (de_ctx->mpm_matcher == MPM_B2G_CUDA) { CUcontext dummy_context; - if (SCCudaHlGetCudaContext(&dummy_context, + if (SCCudaHlGetCudaContext(&dummy_context, "mpm", de_ctx->cuda_rc_mod_handle) == -1) { SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error getting a cuda context for the " "module SC_RULES_CONTENT_B2G_CUDA"); diff --git a/src/suricata.c b/src/suricata.c index 035f398795..e834a48ba1 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -792,6 +792,11 @@ int main(int argc, char **argv) * logging module. */ SCLogLoadConfig(); +#ifdef __SC_CUDA_SUPPORT__ + /* load the cuda configuration */ + SCCudaHlGetYamlConf(); +#endif /* __SC_CUDA_SUPPORT__ */ + /* Load the Host-OS lookup. */ SCHInfoLoadFromConfig(); diff --git a/src/util-cuda-handlers.c b/src/util-cuda-handlers.c index ae477b67ad..305c9cfed8 100644 --- a/src/util-cuda-handlers.c +++ b/src/util-cuda-handlers.c @@ -51,7 +51,7 @@ * cuda modules against a cuda_context, although it is highly unlikely we * would need this feature. * - * We also need to use a mutex for module_datas. + * We also need to use a mutex for module_data. */ #include "suricata-common.h" @@ -70,14 +70,123 @@ #include "util-debug.h" #include "util-unittest.h" #include "packet-queue.h" +#include "util-mpm.h" /* macros decides if cuda is enabled for the platform or not */ #ifdef __SC_CUDA_SUPPORT__ -static SCCudaHlModuleData *module_datas = NULL; +static SCCudaHlModuleData *module_data = NULL; static uint8_t module_handle = 1; +/* holds the parsed cuda configuration from our yaml file */ +static SCCudaHlCudaProfile *cuda_profiles = NULL; + +/* used by unittests only */ +static SCCudaHlCudaProfile *backup_cuda_profiles = NULL; + +/** + * \brief Needed by unittests. Backup the existing cuda profile in handlers. + */ +void SCCudaHlBackupRegisteredProfiles(void) +{ + backup_cuda_profiles = cuda_profiles; + cuda_profiles = NULL; + + return; +} + +/** + * \brief Needed by unittests. Restore the previous backup of handlers' + * cuda profile. + */ +void SCCudaHlRestoreBackupRegisteredProfiles(void) +{ + cuda_profiles = backup_cuda_profiles; + + return; +} + +/** + * \brief Parse the "cuda" subsection config from our conf file. + */ +void SCCudaHlGetYamlConf(void) +{ + SCCudaHlCudaProfile *profile = NULL; + + /* "mpm" profile, found under "cuda.mpm" in the conf file */ + profile = malloc(sizeof(SCCudaHlCudaProfile)); + if (profile == NULL) { + SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); + exit(EXIT_FAILURE); + } + memset(profile, 0, sizeof(SCCudaHlCudaProfile)); + profile->name = "mpm"; + profile->data = MpmCudaConfParse(); + if (cuda_profiles == NULL) { + cuda_profiles = profile; + } else { + profile->next = cuda_profiles; + cuda_profiles = profile; + } + + return; +} + +/** + * \brief Get a particular cuda profile specified as arg. + * + * \param profile_name Name of the the profile to retrieve. + * + * \retval Data associated with the profile. + */ +void *SCCudaHlGetProfile(char *profile_name) +{ + SCCudaHlCudaProfile *profile = cuda_profiles; + + if (cuda_profiles == NULL ) { + SCLogInfo("No cuda profile registered"); + return NULL; + } + + if (profile_name == NULL) { + SCLogError(SC_ERR_INVALID_ARGUMENTS, "argument profile NULL"); + return NULL; + } + + while (profile != NULL && strcasecmp(profile->name, profile_name) != 0) { + profile = profile->next; + } + + if (profile != NULL) + return profile->data; + else + return NULL; +} + +/** + * \brief Clean the cuda profiles, held in cuda_profiles. + */ +void SCCudaHlCleanProfiles(void) +{ + SCCudaHlCudaProfile *profile = cuda_profiles; + SCCudaHlCudaProfile *profile_next = NULL; + + while (profile != NULL) { + profile_next = profile->next; + if (profile->data != NULL) { + if (strcasecmp(profile->name, "mpm") == 0) { + MpmCudaConfCleanup(profile->data); + } + } + free(profile); + profile = profile_next; + } + cuda_profiles = NULL; + + return; +} + /** * \internal * \brief Returns a SCCudaHlModuleData instance from the global data store @@ -89,7 +198,7 @@ static uint8_t module_handle = 1; */ SCCudaHlModuleData *SCCudaHlGetModuleData(uint8_t handle) { - SCCudaHlModuleData *data = module_datas; + SCCudaHlModuleData *data = module_data; if (data == NULL) return NULL; @@ -189,15 +298,16 @@ static int SCCudaHlGetUniqueHandle(void) * in the argument. If a cuda_context is already present for * a handle, it is returned. * - * \param p_context Pointer to a cuda context instance that should be updated - * with a cuda context. - * \param handle A unique handle which identifies a module. Obtained from - * a call to SCCudaHlGetUniqueHandle(). + * \param p_context Pointer to a cuda context instance that should be updated + * with a cuda context. + * \param cuda_profile The cuda profile, supplied as a string. + * \param handle A unique handle which identifies a module. Obtained from + * a call to SCCudaHlGetUniqueHandle(). * * \retval 0 On success. * \retval -1 On failure. */ -int SCCudaHlGetCudaContext(CUcontext *p_context, int handle) +int SCCudaHlGetCudaContext(CUcontext *p_context, char *cuda_profile, int handle) { SCCudaHlModuleData *data = NULL; SCCudaDevices *devices = NULL; @@ -227,23 +337,23 @@ int SCCudaHlGetCudaContext(CUcontext *p_context, int handle) return 0; } - /* Get default log level and format. */ - char *cuda_device_id_str = NULL; - int cuda_device_id = SC_CUDA_DEFAULT_DEVICE; - if (ConfGet("cuda.device_id", &cuda_device_id_str) == 1) { - cuda_device_id = atoi(cuda_device_id_str); - if (!SCCudaIsCudaDeviceIdValid(cuda_device_id)) { - SCLogError(SC_ERR_CUDA_ERROR, "Invalid device id \"%s\" supplied " - "in the conf file", cuda_device_id_str); - cuda_device_id = SC_CUDA_DEFAULT_DEVICE; + int device_id = SC_CUDA_DEFAULT_DEVICE; + if (cuda_profile != NULL) { + /* Get default log level and format. */ + MpmCudaConf *profile = SCCudaHlGetProfile(cuda_profile); + if (profile != NULL) { + if (SCCudaIsCudaDeviceIdValid(profile->device_id)) { + device_id = profile->device_id; + } else { + SCLogError(SC_ERR_CUDA_ERROR, "Invalid device id \"%d\" supplied. " + "Using the first device.", profile->device_id); + } } - } else { - cuda_device_id = SC_CUDA_DEFAULT_DEVICE; } /* Get the device list for this CUDA platform and create a new cuda context */ devices = SCCudaGetDeviceList(); - if (SCCudaCtxCreate(p_context, 0, devices->devices[cuda_device_id]->device) == -1) + if (SCCudaCtxCreate(p_context, 0, devices->devices[device_id]->device) == -1) goto error; data->cuda_context = p_context[0]; @@ -565,7 +675,7 @@ int SCCudaHlRegisterDispatcherFunc(void *(*SCCudaHlDispFunc)(void *), int handle */ const char *SCCudaHlGetModuleName(int handle) { - SCCudaHlModuleData *data = module_datas; + SCCudaHlModuleData *data = module_data; while (data != NULL && data->handle != handle) { data = data->next; @@ -587,7 +697,7 @@ const char *SCCudaHlGetModuleName(int handle) */ int SCCudaHlGetModuleHandle(const char *name) { - SCCudaHlModuleData *data = module_datas; + SCCudaHlModuleData *data = module_data; while (data != NULL && strcmp(data->name, name) != 0) { @@ -615,7 +725,7 @@ int SCCudaHlGetModuleHandle(const char *name) */ int SCCudaHlRegisterModule(const char *name) { - SCCudaHlModuleData *data = module_datas; + SCCudaHlModuleData *data = module_data; SCCudaHlModuleData *new_data = NULL; while (data != NULL && @@ -624,9 +734,8 @@ int SCCudaHlRegisterModule(const char *name) } if (data != NULL) { - SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Module \"%s\" already " - "registered. Returning the handle for the already " - "registered module", name); + SCLogInfo("Module \"%s\" already registered. Returning the handle " + "for the already registered module", name); return data->handle; } @@ -646,13 +755,13 @@ int SCCudaHlRegisterModule(const char *name) new_data->handle = SCCudaHlGetUniqueHandle(); /* first module to be registered */ - if (module_datas == NULL) { - module_datas = new_data; + if (module_data == NULL) { + module_data = new_data; return new_data->handle; } /* add this new module_data instance to the global module_data list */ - data = module_datas; + data = module_data; while (data->next != NULL) data = data->next; data->next = new_data; @@ -723,10 +832,10 @@ int SCCudaHlDeRegisterModule(const char *name) } /* find the previous module data instance */ - if (module_datas == data) { - module_datas = module_datas->next; + if (module_data == data) { + module_data = module_data->next; } else { - prev_data = module_datas; + prev_data = module_data; while (prev_data->next != data) prev_data = prev_data->next; prev_data->next = data->next; @@ -746,7 +855,7 @@ int SCCudaHlDeRegisterModule(const char *name) */ void SCCudaHlDeRegisterAllRegisteredModules(void) { - SCCudaHlModuleData *data = module_datas; + SCCudaHlModuleData *data = module_data; SCCudaHlModuleData *next_data = NULL; next_data = data; @@ -759,7 +868,7 @@ void SCCudaHlDeRegisterAllRegisteredModules(void) data = next_data; } - module_datas = NULL; + module_data = NULL; return; } @@ -805,7 +914,7 @@ int SCCudaHlTestEnvCudaContextInit(void) { CUcontext context; int module_handle = SCCudaHlRegisterModule("SC_RULES_CONTENT_B2G_CUDA"); - if (SCCudaHlGetCudaContext(&context, module_handle) == -1) { + if (SCCudaHlGetCudaContext(&context, NULL, module_handle) == -1) { printf("Error getting a cuda context"); } if (SCCudaHlPushCudaContextFromModule("SC_RULES_CONTENT_B2G_CUDA") == -1) { diff --git a/src/util-cuda-handlers.h b/src/util-cuda-handlers.h index cae7d595dd..2d135abaaf 100644 --- a/src/util-cuda-handlers.h +++ b/src/util-cuda-handlers.h @@ -61,7 +61,25 @@ typedef struct SCCudaHlModuleData_ { struct SCCudaHlModuleData_ *next; } SCCudaHlModuleData; -int SCCudaHlGetCudaContext(CUcontext *, int); +/** + * \brief Used to hold the cuda configuration from our conf yaml file + */ +typedef struct SCCudaHlCudaProfile_ { + /* profile name. Should be unique */ + char *name; + /* the data associated with this profile */ + void *data; + + struct SCCudaHlCudaProfile_ *next; +} SCCudaHlCudaProfile; + +void SCCudaHlGetYamlConf(void); +void *SCCudaHlGetProfile(char *); +void SCCudaHlCleanProfiles(void); +void SCCudaHlBackupRegisteredProfiles(void); +void SCCudaHlRestoreBackupRegisteredProfiles(void); + +int SCCudaHlGetCudaContext(CUcontext *, char *, int); int SCCudaHlGetCudaModule(CUmodule *, const char *, int); int SCCudaHlGetCudaModuleFromFile(CUmodule *, const char *, int); int SCCudaHlGetCudaDevicePtr(CUdeviceptr *, const char *, size_t, void *, int); diff --git a/src/util-mpm-b2g-cuda.c b/src/util-mpm-b2g-cuda.c index 8b84d96333..b92303f6b6 100644 --- a/src/util-mpm-b2g-cuda.c +++ b/src/util-mpm-b2g-cuda.c @@ -1232,7 +1232,7 @@ void B2gCudaDestroyCtx(MpmCtx *mpm_ctx) "module_data if we are having a module_handle"); goto error; } - if (SCCudaHlGetCudaContext(&dummy_context, ctx->module_handle) == -1) { + if (SCCudaHlGetCudaContext(&dummy_context, "mpm", ctx->module_handle) == -1) { SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error getting a cuda context for the " "module %s", module_data->name); goto error; @@ -1700,6 +1700,7 @@ typedef struct B2gCudaMpmThreadCtxData_ { */ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data) { + MpmCudaConf *profile = NULL; SCCudaHlModuleData *module_data = (SCCudaHlModuleData *)initdata; if (PatternMatchDefaultMatcher() != MPM_B2G_CUDA) @@ -1718,7 +1719,7 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data) tctx->b2g_cuda_module_handle = module_data->handle; - if (SCCudaHlGetCudaContext(&tctx->b2g_cuda_context, module_data->handle) == -1) { + if (SCCudaHlGetCudaContext(&tctx->b2g_cuda_context, "mpm", module_data->handle) == -1) { SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error getting a cuda context"); goto error; } @@ -1777,19 +1778,35 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data) tctx->b2g_cuda_search_kernel_arg_total = offset; + profile = SCCudaHlGetProfile("mpm"); + /* buffer to hold the b2g cuda mpm match results for 4000 packets. The - * extra 2 bytes(the 1 in 1481 instead of 1480) is to hold the no of - * matches for the payload. The remaining 1480 positions in the buffer - * is to hold the match offsets */ - if (SCCudaMemHostAlloc((void**)&tctx->results_buffer, sizeof(uint16_t) * 1481 * - SC_CUDA_PB_MIN_NO_OF_PACKETS, CU_MEMHOSTALLOC_PORTABLE) == -1){ - SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory\n"); - exit(EXIT_FAILURE); + * extra 2 bytes(the extra + 1 ) is to hold the no of + * matches for the payload. The remaining profile->packet_size_limit + * positions in the buffer is to hold the match offsets */ + if (profile->page_locked) { + if (SCCudaMemHostAlloc((void**)&tctx->results_buffer, + sizeof(uint16_t) * (profile->packet_size_limit + 1) * + profile->packet_buffer_limit, + CU_MEMHOSTALLOC_PORTABLE) == -1){ + SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory\n"); + exit(EXIT_FAILURE); + } + } else { + tctx->results_buffer = malloc(sizeof(uint16_t) * + (profile->packet_size_limit + 1) * + profile->packet_buffer_limit); + if (tctx->results_buffer == NULL) { + SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); + exit(EXIT_FAILURE); + } } if (SCCudaHlGetCudaDevicePtr(&tctx->cuda_results_buffer, "MPM_B2G_RESULTS", - sizeof(uint16_t) * 1481 * SC_CUDA_PB_MIN_NO_OF_PACKETS, + sizeof(uint16_t) * + (profile->packet_size_limit + 1) * + profile->packet_buffer_limit, NULL, module_data->handle) == -1) { goto error; } @@ -1802,22 +1819,23 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data) if (SCCudaHlGetCudaDevicePtr(&tctx->cuda_packets_buffer, "MPM_B2G_PACKETS_BUFFER", - (sizeof(SCCudaPBPacketDataForGPU) * - SC_CUDA_PB_MIN_NO_OF_PACKETS), + profile->packet_buffer_limit * + (profile->packet_size_limit + + sizeof(SCCudaPBPacketDataForGPUNonPayload)), NULL, module_data->handle) == -1) { goto error; } if (SCCudaHlGetCudaDevicePtr(&tctx->cuda_packets_offset_buffer, "MPM_B2G_PACKETS_BUFFER_OFFSETS", - sizeof(uint32_t) * SC_CUDA_PB_MIN_NO_OF_PACKETS, + sizeof(uint32_t) * profile->packet_buffer_limit, NULL, module_data->handle) == -1) { goto error; } if (SCCudaHlGetCudaDevicePtr(&tctx->cuda_packets_payload_offset_buffer, "MPM_B2G_PACKETS_PAYLOAD_BUFFER_OFFSETS", - sizeof(uint32_t) * SC_CUDA_PB_MIN_NO_OF_PACKETS, + sizeof(uint32_t) * profile->packet_buffer_limit, NULL, module_data->handle) == -1) { goto error; } @@ -1882,6 +1900,7 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data) TmEcode B2gCudaMpmDispThreadDeInit(ThreadVars *tv, void *data) { B2gCudaMpmThreadCtxData *tctx = data; + MpmCudaConf *profile = NULL; if (tctx == NULL) { SCLogError(SC_ERR_INVALID_ARGUMENTS, "Invalid arguments. data NULL\n"); @@ -1898,16 +1917,22 @@ TmEcode B2gCudaMpmDispThreadDeInit(ThreadVars *tv, void *data) "module_data if we are having a module_handle"); goto error; } - if (SCCudaHlGetCudaContext(&dummy_context, tctx->b2g_cuda_module_handle) == -1) { + if (SCCudaHlGetCudaContext(&dummy_context, "mpm", tctx->b2g_cuda_module_handle) == -1) { SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error getting a cuda context for the " "module %s", module_data->name); goto error; } SCCudaCtxPushCurrent(dummy_context); - if (SCCudaMemFreeHost(tctx->results_buffer) == -1) - SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " - "results_buffer\n"); + profile = SCCudaHlGetProfile("mpm"); + if (profile->page_locked) { + if (SCCudaMemFreeHost(tctx->results_buffer) == -1) { + SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: " + "results_buffer\n"); + } + } else { + free(tctx->results_buffer); + } SCCudaHlFreeCudaDevicePtr("MPM_B2G_RESULTS", tctx->b2g_cuda_module_handle); SCCudaHlFreeCudaDevicePtr("MPM_B2G_PACKETS_BUFFER", tctx->b2g_cuda_module_handle); SCCudaHlFreeCudaDevicePtr("MPM_B2G_PACKETS_BUFFER_OFFSETS", @@ -2291,7 +2316,7 @@ static int B2gCudaTest01(void) /* get the cuda context and push it */ CUcontext dummy_context; - if (SCCudaHlGetCudaContext(&dummy_context, module_handle) == -1) { + if (SCCudaHlGetCudaContext(&dummy_context, "mpm", module_handle) == -1) { SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error getting a cuda context for the " "module SC_RULES_CONTENT_B2G_CUDA"); } @@ -2323,6 +2348,7 @@ static int B2gCudaTest01(void) result = 1; + SCCudaPBSetProfile("mpm"); pb = SCCudaPBAllocSCCudaPBPacketsBuffer(); SCCudaPBPacketDataForGPU *curr_packet = (SCCudaPBPacketDataForGPU *)pb->packets_buffer; @@ -2500,6 +2526,7 @@ static int B2gCudaTest02(void) } SigGroupBuild(de_ctx); + SCCudaPBSetProfile("mpm"); SCCudaPBSetUpQueuesAndBuffers(); /* get the queues used by the batcher thread */ @@ -2800,6 +2827,7 @@ static int B2gCudaTest03(void) SigGroupBuild(de_ctx); DetectEngineThreadCtxInit(&de_tv, (void *)de_ctx, (void *)&det_ctx); + SCCudaPBSetProfile("mpm"); SCCudaPBSetUpQueuesAndBuffers(); /* get the queues used by the batcher thread */ diff --git a/src/util-mpm.c b/src/util-mpm.c index c9f8cdb2ab..6d38ecb9ed 100644 --- a/src/util-mpm.c +++ b/src/util-mpm.c @@ -456,6 +456,717 @@ uint32_t MpmGetBloomSize(const char *conf_val) SCReturnInt(bloom_value); } + +#ifdef __SC_CUDA_SUPPORT__ + +/** + * \brief Parse the "mpm" profile under the cuda subsection of our conf file. + * + * \retval profile Pointer to a struct containing the parsed data. + */ +MpmCudaConf *MpmCudaConfParse(void) +{ + ConfNode *cuda_node = NULL; + ConfNode *seq_node = NULL; + + MpmCudaConf *profile = NULL; + + const char *packet_buffer_limit = NULL; + const char *packet_size_limit = NULL; + const char *packet_buffers = NULL; + const char *batching_timeout = NULL; + const char *page_locked = NULL; + const char *device_id = NULL; + + if ((profile = malloc(sizeof(MpmCudaConf))) == NULL) { + SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory"); + exit(EXIT_FAILURE); + } + memset(profile, 0, sizeof(MpmCudaConf)); + profile->packet_buffer_limit = MPM_PACKET_BUFFER_LIMIT; + profile->packet_size_limit = MPM_PACKET_SIZE_LIMIT; + profile->packet_buffers = MPM_PACKET_BUFFERS; + profile->batching_timeout = MPM_BATCHING_TIMEOUT; + profile->page_locked = MPM_PAGE_LOCKED; + profile->device_id = SC_CUDA_DEFAULT_DEVICE; + + cuda_node = ConfGetNode("cuda"); + if (cuda_node == NULL) { + SCLogInfo("No conf found for \"cuda\" in yaml file. Use default conf"); + goto end; + } + + TAILQ_FOREACH(seq_node, &cuda_node->head, next) { + if (strcasecmp(seq_node->val, "mpm") == 0) { + packet_buffer_limit = ConfNodeLookupChildValue + (seq_node->head.tqh_first, "packet_buffer_limit"); + packet_size_limit = ConfNodeLookupChildValue + (seq_node->head.tqh_first, "packet_size_limit"); + packet_buffers = ConfNodeLookupChildValue + (seq_node->head.tqh_first, "packet_buffers"); + batching_timeout = ConfNodeLookupChildValue + (seq_node->head.tqh_first, "batching_timeout"); + page_locked = ConfNodeLookupChildValue + (seq_node->head.tqh_first, "page_locked"); + device_id = ConfNodeLookupChildValue + (seq_node->head.tqh_first, "device_id"); + + /* packet_buffer_size */ + if (packet_buffer_limit == NULL || strcasecmp(packet_buffer_limit, "") == 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.packet_buffer_limit. Either NULL or empty"); + } else { + profile->packet_buffer_limit = atoi(packet_buffer_limit); + if (profile->packet_buffer_limit <= 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.packet_buffer_limit - %s", packet_buffer_limit); + profile->packet_buffer_limit = MPM_PACKET_BUFFER_LIMIT; + } + } + + /* packet_size_limit */ + if (packet_size_limit == NULL || strcasecmp(packet_size_limit, "") == 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.packet_size_limit. Either NULL or empty"); + } else { + profile->packet_size_limit = atoi(packet_size_limit); + if (profile->packet_size_limit <= 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.packet_size_limit - %s", packet_size_limit); + profile->packet_size_limit = MPM_PACKET_SIZE_LIMIT; + } + } + + /* packet_buffers */ + if (packet_buffers == NULL || strcasecmp(packet_buffers, "") == 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.packet_buffers. Either NULL or empty"); + } else { + profile->packet_buffers = atoi(packet_buffers); + if (profile->packet_buffers <= 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.packet_buffers - %s", packet_buffers); + profile->packet_buffers = MPM_PACKET_BUFFERS; + } + } + + /* batching_timeout */ + if (batching_timeout == NULL || strcasecmp(batching_timeout, "") == 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.batching_timeout. Either NULL or empty"); + } else { + profile->batching_timeout = atoi(batching_timeout); + if (profile->batching_timeout <= 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.batching_timeout - %s", batching_timeout); + profile->batching_timeout = MPM_BATCHING_TIMEOUT; + } + } + + /* page_locked */ + if (page_locked == NULL || strcasecmp(page_locked, "") == 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.page_locked. Either NULL or empty"); + } else { + if (strcasecmp(page_locked, "enabled") == 0) { + profile->page_locked = MPM_PAGE_LOCKED; + } else if (strcasecmp(page_locked, "disabled") == 0) { + profile->page_locked = !MPM_PAGE_LOCKED; + } else { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.page_locked - %s", page_locked); + } + } + + /* device_id */ + if (device_id == NULL || strcasecmp(device_id, "") == 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.device_id Either NULL or empty"); + profile->device_id = SC_CUDA_DEFAULT_DEVICE; + continue; + } else { + profile->device_id = atoi(device_id); + if (profile->device_id < 0) { + SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid entry for " + "cuda.mpm.device_id - %s", device_id); + profile->device_id = SC_CUDA_DEFAULT_DEVICE; + continue; + } + } + } /* if (strcasecmp(seq_node->val, "mpm") == 0) */ + } /* TAILQ_FOREACH(seq_node, &cuda_node->head, next) */ + + end: + SCLogDebug("Configuration for \"cuda.mpm\"\n" + "packet_buffer_size: %u\n" + "packet_size_limit: %d\n" + "packet_buffers: %d\n" + "batching_timeout: %d\n" + "page_locked: %d\n" + "device_id: %d\n", + profile->packet_buffer_limit, profile->packet_size_limit, + profile->packet_buffers, profile->batching_timeout, + profile->page_locked, profile->device_id); + + return profile; +} + +/** + * \brief Cleanup the parsed "mpm" profile cuda conf. + */ +void MpmCudaConfCleanup(MpmCudaConf *conf) +{ + if (conf != NULL) + free(conf); + + return; +} + +#endif /* __SC_CUDA_SUPPORT */ + +/************************************Unittests*********************************/ + +static int MpmInitYamlConf(char *conf) +{ + ConfCreateContextBackup(); + ConfInit(); + return ConfYamlLoadString(conf, strlen(conf)); +} + +static void MpmDeInitYamlConf(void) +{ + ConfDeInit(); + ConfRestoreContextBackup(); + + return; +} + +static int MpmTest01(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit: 4000\n" + " packet_size_limit: 1500\n" + " packet_buffers: 10\n" + " batching_timeout: 1\n" + " page_locked: enabled\n" + " device_id: 0\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == 4000); + result &= (profile->packet_size_limit == 1500); + result &= (profile->packet_buffers == 10); + result &= (profile->batching_timeout == 1); + result &= (profile->page_locked == 1); + result &= (profile->device_id == 0); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest02(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit: 4001\n" + " packet_size_limit: 1500\n" + " packet_buffers: 12\n" + " batching_timeout: 10\n" + " page_locked: disabled\n" + " device_id: 5\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == 4001); + result &= (profile->packet_size_limit == 1500); + result &= (profile->packet_buffers == 12); + result &= (profile->batching_timeout == 10); + result &= (profile->page_locked == 0); + result &= (profile->device_id == 5); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest03(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit: 0\n" + " packet_size_limit: 0\n" + " packet_buffers: 0\n" + " batching_timeout: 0\n" + " page_locked: enbled\n" + " device_id: -1\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest04(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit: -1\n" + " packet_size_limit: -1\n" + " packet_buffers: -1\n" + " batching_timeout: -1\n" + " page_locked: enbled\n" + " device_id: -1\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest05(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit:\n" + " packet_size_limit:\n" + " packet_buffers:\n" + " batching_timeout: 2\n" + " page_locked: enabled\n" + " device_id: 1\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == 2); + result &= (profile->page_locked == 1); + result &= (profile->device_id == 1); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest06(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit: \n" + " packet_size_limit: \n" + " packet_buffers: \n" + " batching_timeout: \n" + " page_locked: \n" + " device_id: \n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest07(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_buffer_limit:\n" + " packet_size_limit:\n" + " packet_buffers:\n" + " batching_timeout:\n" + " page_locked:\n" + " device_id:\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest08(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n" + " packet_size_limit: 2000\n" + " page_locked: disabled\n" + " device_id: 4\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == 2000); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == !MPM_PAGE_LOCKED); + result &= (profile->device_id == 4); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest09(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n" + " - mpm:\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest10(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n" + "cuda:\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + +static int MpmTest11(void) +{ + char *conf = + "%YAML 1.1\n" + "---\n"; + + DetectEngineCtx *de_ctx = NULL; + int result = 0; + + if (MpmInitYamlConf(conf) == -1) + return 0; + + de_ctx = DetectEngineCtxInit(); + if (de_ctx == NULL) + goto end; + + SCCudaHlBackupRegisteredProfiles(); + SCCudaHlGetYamlConf(); + MpmCudaConf *profile = SCCudaHlGetProfile("mpm"); + if (profile == NULL) { + printf("Error retrieving mpm profile\n"); + goto end; + } + + result = (profile->packet_buffer_limit == MPM_PACKET_BUFFER_LIMIT); + result &= (profile->packet_size_limit == MPM_PACKET_SIZE_LIMIT); + result &= (profile->packet_buffers == MPM_PACKET_BUFFERS); + result &= (profile->batching_timeout == MPM_BATCHING_TIMEOUT); + result &= (profile->page_locked == MPM_PAGE_LOCKED); + result &= (profile->device_id == SC_CUDA_DEFAULT_DEVICE); + + end: + SCCudaHlCleanProfiles(); + + if (de_ctx != NULL) + DetectEngineCtxFree(de_ctx); + MpmDeInitYamlConf(); + SCCudaHlRestoreBackupRegisteredProfiles(); + + return result; +} + void MpmRegisterTests(void) { #ifdef UNITTESTS uint16_t i; @@ -467,6 +1178,17 @@ void MpmRegisterTests(void) { printf("Warning: mpm %s has no unittest registration function...", mpm_table[i].name); } } + + UtRegisterTest("MpmTest01", MpmTest01, 1); + UtRegisterTest("MpmTest02", MpmTest02, 1); + UtRegisterTest("MpmTest03", MpmTest03, 1); + UtRegisterTest("MpmTest04", MpmTest04, 1); + UtRegisterTest("MpmTest05", MpmTest05, 1); + UtRegisterTest("MpmTest06", MpmTest06, 1); + UtRegisterTest("MpmTest07", MpmTest07, 1); + UtRegisterTest("MpmTest08", MpmTest08, 1); + UtRegisterTest("MpmTest09", MpmTest09, 1); + UtRegisterTest("MpmTest10", MpmTest10, 1); + UtRegisterTest("MpmTest11", MpmTest11, 1); #endif } - diff --git a/src/util-mpm.h b/src/util-mpm.h index 289c852020..14c2126fbd 100644 --- a/src/util-mpm.h +++ b/src/util-mpm.h @@ -50,6 +50,13 @@ pattern matcher algorithms */ #define BLOOMSIZE_HIGH 2048 /**< High bloomfilter size for the multi pattern matcher algorithms */ + +#define MPM_PACKET_BUFFER_LIMIT 2400 +#define MPM_PACKET_SIZE_LIMIT 1500 +#define MPM_PACKET_BUFFERS 10 +#define MPM_BATCHING_TIMEOUT 1 +#define MPM_PAGE_LOCKED 1 + enum { MPM_NOTSET = 0, @@ -178,12 +185,35 @@ MpmCtx *MpmFactoryGetMpmCtxForProfile(int32_t); void MpmFactoryDeRegisterAllMpmCtxProfiles(void); int32_t MpmFactoryIsMpmCtxAvailable(MpmCtx *); +/* macros decides if cuda is enabled for the platform or not */ +#ifdef __SC_CUDA_SUPPORT__ + +/** + * \brief Cuda configuration for "mpm" profile. We can further extend this + * to have conf for specific mpms. For now its common for all mpms. + */ +typedef struct MpmCudaConf_ { + int32_t packet_buffer_limit; + int16_t packet_size_limit; + int8_t packet_buffers; + int8_t batching_timeout; + int8_t page_locked; + int8_t device_id; +} MpmCudaConf; + +#endif /* __SC_CUDA_SUPPORT__ */ + int PmqSetup(PatternMatcherQueue *, uint32_t, uint32_t); void PmqMerge(PatternMatcherQueue *src, PatternMatcherQueue *dst); void PmqReset(PatternMatcherQueue *); void PmqCleanup(PatternMatcherQueue *); void PmqFree(PatternMatcherQueue *); +#ifdef __SC_CUDA_SUPPORT__ +MpmCudaConf *MpmCudaConfParse(void); +void MpmCudaConfCleanup(MpmCudaConf *); +#endif /* __SC_CUDA_SUPPORT */ + void MpmTableSetup(void); void MpmRegisterTests(void); @@ -197,4 +227,3 @@ uint32_t MpmGetHashSize(const char *); uint32_t MpmGetBloomSize(const char *); #endif /* __UTIL_MPM_H__ */ - diff --git a/suricata.yaml b/suricata.yaml index 71d02e36b7..d72d2300b5 100644 --- a/suricata.yaml +++ b/suricata.yaml @@ -152,11 +152,32 @@ threading: # detect_thread_ratio: 1.5 -# Select the cuda device to use. The device_id identifies the device to be used -# if one has multiple devices on the system. To find out device_id associated -# with the card(s) on the system run "suricata --list-cuda-cards". +# Cuda configuration. cuda: - device_id: 0 + # The "mpm" profile. On not specifying any of these parameters, the engine's + # internal default values are used, which are same as the ones specified here. + - mpm: + # Threshold limit for no of packets buffered to the GPU. Once we hit this + # limit, we pass the buffer to the gpu. + packet_buffer_limit: 2400 + # The maximum length for a packet that we would buffer to the gpu. + # Anything over this is MPM'ed on the CPU. All entries > 0 are valid. + packet_size_limit: 1500 + # No of packet buffers we initialize. All entries > 0 are valid. + packet_buffers: 10 + # The timeout limit for batching of packets in secs. If we don't fill the + # buffer within this timeout limit, we pass the currently filled buffer to the gpu. + # All entries > 0 are valid. + batching_timeout: 1 + # Specifies whether to use page_locked memory whereever possible. Accepted values + # are "enabled" and "disabled". + page_locked: enabled + # The device to use for the mpm. Currently we don't support load balancing + # on multiple gpus. In case you have multiple devices on your system, you + # can specify the device to use, using this conf. By default we hold 0, to + # specify the first device cuda sees. To find out device_id associated with + # the card(s) on the system run "suricata --list-cuda-cards". + device_id: 0 # Select the multi pattern algorithm you want to run for scan/search the # in the engine. The supported algorithms are b2g, b2gc, b2gm, b3g, wumanber,