fixed cuda build: portability issues and nvcc version check

15 years ago · 2f1262b446
parent 736f09c4bc
commit 2f1262b446
3 changed files with 56 additions and 30 deletions
--- a/configure.in
+++ b/configure.in
@ -795,7 +795,7 @@ AC_CHECK_HEADER(pcap.h,,[AC_ERROR(pcap.h not found ...)])
        if test "$with_cuda_includes" != "no"; then
            CPPFLAGS="${CPPFLAGS} -I${with_cuda_includes}"
        else
-            CPPFLAGS="${CPPFLAGS} -I/usr/include/cuda"
+            CPPFLAGS="${CPPFLAGS} -I/usr/local/cuda/include"
        fi
        if test "$with_cuda_libraries" != "no"; then
@ -819,24 +819,34 @@ AC_CHECK_HEADER(pcap.h,,[AC_ERROR(pcap.h not found ...)])
            exit 1
        fi
-        AC_PATH_PROG([NVCC], [nvcc], , [$PATH:$NVCC_DIR])
+        AC_PATH_PROG([NVCC], [nvcc], no, [$PATH:$NVCC_DIR])
-        if test "$NVCC" = "no"; then
+        if test "x$NVCC" = "xno"; then
            echo
            echo "   ERROR! CUDA nvcc compiler not found: use --with-cuda-nvcc=DIR"
            echo
            exit 1
        fi
-        AC_PATH_PROG([PYTHON], [python], no)
+        AC_MSG_CHECKING(for nvcc version)
-        if test "$PYTHON" = "no"; then
+        NVCCVER=`$NVCC --version | grep "release" | sed 's/.*release \(@<:@0-9@:>@\)\.\(@<:@0-9@:>@\).*/\1\2/'`
        AC_MSG_RESULT($NVCCVER)
        if test "$NVCCVER" -lt 31; then
            echo
-            echo "   ERROR! Compiling CUDA source requires python interpreter"
+            echo "   Warning! Your CUDA nvcc version might be outdated."
            echo "   If compilation fails try the latest CUDA toolkit from"
            echo "   www.nvidia.com/object/cuda_develop.html"
            echo
        fi
        AM_PATH_PYTHON(,, no)
        if test "x$PYTHON" = "xno"; then
            echo
            echo "   ERROR! Compiling CUDA kernels requires python."
            echo
            exit 1
        fi
    ])
-    AM_CONDITIONAL([BUILD_CUDA], [test "${NVCC}" != ""])
+    AM_CONDITIONAL([BUILD_CUDA], [test "x$enable_cuda" = "xyes"])
 # Check for libcap-ng
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -276,27 +276,43 @@ suricata_SOURCES += cuda-ptxdump.h
 suricata_CUDA_KERNELS = \
 util-mpm-b2g-cuda-kernel.cu
 SMVERSIONS = 10 11 12 13 20
 NVCCFLAGS=-O2
-# FIXME
+SUFFIXES = \
-PTXS =
+.ptx_sm_10 \
-PTXS += $(suricata_CUDA_KERNELS:.cu=_sm_10.ptx)
+.ptx_sm_11 \
-PTXS += $(suricata_CUDA_KERNELS:.cu=_sm_11.ptx)
+.ptx_sm_12 \
-PTXS += $(suricata_CUDA_KERNELS:.cu=_sm_12.ptx)
+.ptx_sm_13 \
-PTXS += $(suricata_CUDA_KERNELS:.cu=_sm_13.ptx)
+.ptx_sm_20 \
-PTXS += $(suricata_CUDA_KERNELS:.cu=_sm_20.ptx)
+.ptx_sm_21
-# template to build for different compute capabilities
+PTXS =  $(suricata_CUDA_KERNELS:.cu=.ptx_sm_10)
-define BUILDTEMPLATE
+PTXS += $(suricata_CUDA_KERNELS:.cu=.ptx_sm_11)
-# PTXS += $(patsubst %.cu, %_sm_$(1).ptx, $(suricata_CUDA_KERNELS))
+PTXS += $(suricata_CUDA_KERNELS:.cu=.ptx_sm_12)
-%_sm_$(1).ptx: %.cu
+PTXS += $(suricata_CUDA_KERNELS:.cu=.ptx_sm_13)
-	$(NVCC) $(NVCCFLAGS) -o $$@ -arch=sm_$(1) -ptx $$<
+PTXS += $(suricata_CUDA_KERNELS:.cu=.ptx_sm_20)
-endef
+PTXS += $(suricata_CUDA_KERNELS:.cu=.ptx_sm_21)
-$(foreach SMVER,$(SMVERSIONS),$(eval $(call BUILDTEMPLATE,$(SMVER))))
+
 .cu.ptx_sm_10:
 	$(NVCC) $(NVCCFLAGS) -o $@ -arch=sm_10 -ptx $<
 .cu.ptx_sm_11:
 	$(NVCC) $(NVCCFLAGS) -o $@ -arch=sm_11 -ptx $<
 .cu.ptx_sm_12:
 	$(NVCC) $(NVCCFLAGS) -o $@ -arch=sm_12 -ptx $<
 .cu.ptx_sm_13:
 	$(NVCC) $(NVCCFLAGS) -o $@ -arch=sm_13 -ptx $<
 .cu.ptx_sm_20:
 	$(NVCC) $(NVCCFLAGS) -o $@ -arch=sm_20 -ptx $<
 .cu.ptx_sm_21:
 	$(NVCC) $(NVCCFLAGS) -o $@ -arch=sm_21 -ptx $<
 cuda-ptxdump.h: $(PTXS)
-	python ptxdump.py cuda-ptxdump $(PTXS)
+	$(PYTHON) ptxdump.py cuda-ptxdump $(PTXS)
 CLEANFILES = $(PTXS) cuda-ptxdump.h
 endif
--- a/src/ptxdump.py
+++ b/src/ptxdump.py
@ -29,9 +29,9 @@ out_h = sys.argv[1] + ".h"
 out = open(out_h, 'w')
 out.writelines(header)
-out.writelines("#ifdef __SC_CUDA_SUPPORT__ \n")
+out.writelines("#ifdef __SC_CUDA_SUPPORT__\n")
-out.writelines("#ifndef __ptxdump_h__ \n")
+out.writelines("#ifndef __ptxdump_h__\n")
-out.writelines("#define __ptxdump_h__ \n\n")
+out.writelines("#define __ptxdump_h__\n\n")
 # write char arrays
 for file in sys.argv[2:]:
@ -49,7 +49,7 @@ for file in sys.argv[2:]:
        if newlinecnt == 16:
            newlinecnt = 0
            out.write("\n")
-    out.write("0x00\n};\n")
+    out.write("0x00\n};\n\n")
    print(sys.argv[0] + ": CUmodule " + varname + " packed successfully")
@ -62,8 +62,8 @@ out.writelines('\tSCLogError(SC_ERR_FATAL, "Error in SCCudaPtxDumpGetModule, mod
 out.writelines("\texit(EXIT_FAILURE);\n")
 out.writelines("};\n")
-out.writelines("#endif // __ptxdump_h__ \n")
+out.writelines("#endif /* __ptxdump_h__ */\n")
-out.writelines("#endif // __SC_CUDA_SUPPORT__\n")
+out.writelines("#endif /* __SC_CUDA_SUPPORT__ */\n")
 print(sys.argv[0] + ": " + out_h + " written successfully")