CUDA: Update the inspection engine to inform the cuda module that it

doesn't need the gpu results and to release the packet for the next run.

Previously the inspection engine wouldn't inform the cuda module, if it
didn't need the results.  As a consequence, when the packet is next taken
for re-use, and if the packet is still being processed by the cuda module,
the engine would wait till the cuda module frees the packet.

This commits updates this functionality to inform the cuda module to
release the packet for the afore-mentioned case.
pull/1171/head
Anoop Saldanha 11 years ago committed by Victor Julien
parent 60c46170b0
commit b334b8a6e9

@ -1572,6 +1572,10 @@ next:
PACKET_PROFILING_DETECT_END(p, PROF_DETECT_RULES);
end:
#ifdef __SC_CUDA_SUPPORT__
CudaReleasePacket(p);
#endif
/* see if we need to increment the inspect_id and reset the de_state */
if (has_state && AppLayerParserProtocolSupportsTxs(p->proto, alproto)) {
PACKET_PROFILING_DETECT_START(p, PROF_DETECT_STATEFUL);

@ -1691,6 +1691,18 @@ void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
}
void CudaReleasePacket(Packet *p)
{
if (p->cuda_pkt_vars.cuda_mpm_enabled == 1) {
p->cuda_pkt_vars.cuda_mpm_enabled = 0;
SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
p->cuda_pkt_vars.cuda_done = 0;
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
}
return;
}
/* \todos
* - Use texture memory - Can we fit all the arrays into a 3d texture.
* Texture memory definitely offers slightly better performance even
@ -1890,6 +1902,13 @@ static void *SCACCudaDispatcher(void *arg)
for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) {
Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset];
SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
if (p->cuda_pkt_vars.cuda_mpm_enabled == 0) {
p->cuda_pkt_vars.cuda_done = 0;
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
continue;
}
p->cuda_pkt_vars.cuda_gpu_matches =
cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)];
if (p->cuda_pkt_vars.cuda_gpu_matches != 0) {
@ -1900,7 +1919,6 @@ static void *SCACCudaDispatcher(void *arg)
d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4);
}
SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
p->cuda_pkt_vars.cuda_done = 1;
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
SCCondSignal(&p->cuda_pkt_vars.cuda_cond);

@ -205,6 +205,8 @@ uint32_t SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx,
PatternMatcherQueue *pmq);
void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx);
void CudaReleasePacket(Packet *p);
#endif /* __SC_CUDA_SUPPORT__ */

Loading…
Cancel
Save