Minimal example for compiling GPU and CPU code into a single host shared library,

Chris Sullivan · Chris Sullivan · commit 068dd86feb6c · 2017-01-25T22:02:06.000-05:00
and compiling a host executable CUDA binary that asynchronously runs code on an nVidia device.
diff --git a/SConscript b/SConscript
@@ -0,0 +1,17 @@
+Import('env')
+
+env.Append(LIBPATH='#/lib')
+env.Append(RPATH=[Literal('\\$$ORIGIN')])
+env.Append(RPATH=[Literal('\\$$ORIGIN/../lib')])
+
+libmixed_cpu_and_gpu = env.SConscript('libmixed_cpu_and_gpu/SConscript', exports='env')
+env.Append(LIBPATH=[libmixed_cpu_and_gpu[0].dir])
+env.Append(LIBS=[libmixed_cpu_and_gpu[0].name])
+
+minimal = env.Program('minimal','minimal.cu')
+
+env.Install('#/lib',[libmixed_cpu_and_gpu])
+env.Install('#/bin',[minimal])
+
+Clean('#','#/lib')
+Clean('#','#/bin')
diff --git a/SConstruct b/SConstruct
@@ -0,0 +1,38 @@
+import os
+
+import SCons
+
+exec open("build-env.py")
+env = Environment(ENV = os.environ)
+env.Append(CCFLAGS=['-std=c++1y','-pthread','-g'])
+env.Append(LINKFLAGS=['-pthread'])
+
+def append_lib(self, *libs):
+    for lib in libs:
+        if isinstance(lib, str):
+            libs = File(lib)
+
+        if isinstance(lib, SCons.Node.FS.File):
+            self.Append(LIBPATH=[lib.dir])
+            self.Append(LIBS=[lib.name])
+        else:
+            self.append_lib(*lib)
+type(env).append_lib = append_lib
+
+if 'PYTHON_VERSION' in ARGUMENTS:
+    env['PYTHON_VERSION'] = ARGUMENTS['PYTHON_VERSION']
+
+# More readable output
+if not ARGUMENTS.get('VERBOSE'):
+    env['CXXCOMSTR'] = 'Compiling C++ object $TARGETS'
+    env['CCCOMSTR'] = 'Compiling C object $TARGETS'
+    env['ARCOMSTR'] = 'Packing static library $TARGETS'
+    env['RANLIBCOMSTR'] = 'Indexing static library $TARGETS'
+    env['SHCCCOMSTR'] = 'Compiling shared C object $TARGETS'
+    env['SHCXXCOMSTR'] = 'Compiling shared C++ object $TARGETS'
+    env['LINKCOMSTR'] = 'Linking $TARGETS'
+    env['SHLINKCOMSTR'] = 'Linking shared $TARGETS'
+
+env.SConscript('SConscript', exports='env', duplicate=True,
+               variant_dir='build')
+Clean('.','build')
diff --git a/build-env.py b/build-env.py
@@ -0,0 +1,230 @@
+EnsureSConsVersion(1,2)
+
+import os
+
+import inspect
+import platform
+
+def get_cuda_paths():
+  """Determines CUDA {bin,lib,include} paths
+  
+  returns (bin_path,lib_path,inc_path)
+  """
+
+  # determine defaults
+  if os.name == 'nt':
+    bin_path = 'C:/CUDA/bin'
+    lib_path = 'C:/CUDA/lib'
+    inc_path = 'C:/CUDA/include'
+  elif os.name == 'posix':
+    bin_path = '/usr/local/cuda/bin'
+    lib_path = '/usr/local/cuda/lib'
+    inc_path = '/usr/local/cuda/include'
+  else:
+    raise ValueError, 'Error: unknown OS.  Where is nvcc installed?'
+
+  if platform.platform()[:6] != 'Darwin' and \
+      platform.machine()[-2:] == '64':
+    lib_path += '64'
+
+  # override with environement variables
+  if 'CUDA_BIN_PATH' in os.environ:
+    bin_path = os.path.abspath(os.environ['CUDA_BIN_PATH'])
+  if 'CUDA_LIB_PATH' in os.environ:
+    lib_path = os.path.abspath(os.environ['CUDA_LIB_PATH'])
+  if 'CUDA_INC_PATH' in os.environ:
+    inc_path = os.path.abspath(os.environ['CUDA_INC_PATH'])
+
+  return (bin_path,lib_path,inc_path)
+
+def getTools():
+  result = []
+  if os.name == 'nt':
+    result = ['default', 'msvc']
+  elif os.name == 'posix':
+    result = ['default', 'gcc']
+  else:
+    result = ['default']
+  return result;
+
+
+OldEnvironment = Environment;
+
+
+# this dictionary maps the name of a compiler program to a dictionary mapping the name of
+# a compiler switch of interest to the specific switch implementing the feature
+gCompilerOptions = {
+    'gcc' : {'warn_all' : '-Wall', 'warn_errors' : '-Werror', 'optimization' : '-O3', 'inplace':'-fPIC', 'debug' : '-g',  'exception_handling' : '',      'omp' : '-fopenmp'},
+    'g++' : {'warn_all' : '-Wall', 'warn_errors' : '-Werror', 'optimization' : '-O3', 'inplace':'-fPIC', 'debug' : '-g',  'exception_handling' : '',      'omp' : '-fopenmp'},
+    'cl'  : {'warn_all' : '/Wall', 'warn_errors' : '/WX',     'optimization' : '/Ox', 'debug' : ['/Zi', '-D_DEBUG', '/MTd'], 'exception_handling' : '/EHsc', 'omp' : '/openmp'}
+  }
+
+
+# this dictionary maps the name of a linker program to a dictionary mapping the name of
+# a linker switch of interest to the specific switch implementing the feature
+gLinkerOptions = {
+    'gcc'   : {'debug' : ''},
+    'g++'   : {'debug' : ''},
+    'link'  : {'debug' : '/debug' }
+  }
+
+
+def getCFLAGS(mode, warn, warnings_as_errors, CC):
+  result = []
+  if mode == 'release':
+    # turn on optimization
+    result.append(gCompilerOptions[CC]['optimization'])
+    result.append(gCompilerOptions[CC]['inplace'])
+  elif mode == 'debug':
+    # turn on debug mode
+    result.append(gCompilerOptions[CC]['debug'])
+    result.append(gCompilerOptions[CC]['inplace'])
+    result.append('-DTHRUST_DEBUG')
+
+  if warn:
+    # turn on all warnings
+    result.append(gCompilerOptions[CC]['warn_all'])
+
+  if warnings_as_errors:
+    # treat warnings as errors
+    result.append(gCompilerOptions[CC]['warn_errors'])
+
+  # avoid problems specific to windows
+  if CC == 'cl':
+    # avoid min/max problems due to windows.h
+    result.append('/DNOMINMAX')
+    # suppress warnings due to "decorated name length exceeded"
+    result.append('/wd4503')
+
+  return result
+
+
+def getCXXFLAGS(mode, warn, warnings_as_errors, CXX):
+  result = []
+  if mode == 'release':
+    # turn on optimization
+    result.append(gCompilerOptions[CXX]['optimization'])
+    result.append(gCompilerOptions[CXX]['inplace'])
+  elif mode == 'debug':
+    # turn on debug mode
+    result.append(gCompilerOptions[CXX]['debug'])
+  # enable exception handling
+  result.append(gCompilerOptions[CXX]['exception_handling'])
+
+  if warn:
+    # turn on all warnings
+    result.append(gCompilerOptions[CXX]['warn_all'])
+
+  if warnings_as_errors:
+    # treat warnings as errors
+    result.append(gCompilerOptions[CXX]['warn_errors'])
+
+  return result
+
+
+def getNVCCFLAGS(mode, arch):   
+  result = ['-arch=' + arch]
+  
+  if platform.platform()[:6] == 'Darwin':
+    if platform.machine()[-2:] == '64':
+      result.append('-m64')
+    else:
+      result.append('-m32')
+  
+  if mode == 'debug':
+    # turn on debug mode
+    # XXX make this work when we've debugged nvcc -G
+    result.append('-g')
+    result.append('-G')
+    pass
+
+  result.append('-std=c++11')
+
+  return result
+
+
+def getLINKFLAGS(mode, LINK):
+  result = []
+  if mode == 'debug':
+    # turn on debug mode
+    result.append(gLinkerOptions[LINK]['debug'])
+
+  return result
+
+
+def Environment(*args, **keywords):
+  # allow the user discretion to choose the MSVC version
+  vars = Variables()
+  if os.name == 'nt':
+    vars.Add(EnumVariable('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('8.0', '9.0', '10.0')))
+
+  # add a variable to handle RELEASE/DEBUG mode
+  vars.Add(EnumVariable('mode', 'Release versus debug mode', 'release',
+                        allowed_values = ('release', 'debug')))
+
+  # add a variable to handle compute capability
+  vars.Add(EnumVariable('arch', 'Compute capability code generation', 'sm_35',
+                        allowed_values = ('sm_10', 'sm_11', 'sm_12', 'sm_13', 'sm_20', 'sm_21', 'sm_30', 'sm_35')))
+
+  # add a variable to handle warnings
+  if os.name == 'posix':
+    vars.Add(BoolVariable('Wall', 'Enable all compilation warnings', 1))
+  else:
+    vars.Add(BoolVariable('Wall', 'Enable all compilation warnings', 0))
+
+  # add a variable to treat warnings as errors
+  vars.Add(BoolVariable('Werror', 'Treat warnings as errors', 0))
+
+  # create an Environment
+  env = OldEnvironment(*args, tools = getTools(), variables = vars, **keywords)
+
+  # get the absolute path to the directory containing
+  # this source file
+  thisFile = inspect.getabsfile(Environment)
+  thisDir = os.path.dirname(thisFile)
+
+  # enable nvcc
+  env.Tool('nvcc', toolpath = [os.path.join(thisDir)])
+
+  # get C compiler switches
+  env.Append(CFLAGS = getCFLAGS(env['mode'], env['Wall'], env['Werror'], env.subst('$CC')))
+
+  # get CXX compiler switches
+  env.Append(CXXFLAGS = getCXXFLAGS(env['mode'], env['Wall'], env['Werror'], env.subst('$CXX')))
+
+  # get NVCC compiler switches
+  env.Append(NVCCFLAGS = getNVCCFLAGS(env['mode'], env['arch']))
+
+  # get linker switches
+  env.Append(LINKFLAGS = getLINKFLAGS(env['mode'], env.subst('$LINK')))
+   
+  # get CUDA paths
+  (cuda_exe_path,cuda_lib_path,cuda_inc_path) = get_cuda_paths()
+  env.Append(LIBPATH = [cuda_lib_path])
+  env.Append(CPPPATH = [cuda_inc_path])
+
+  # link against the standard library
+  # we don't have to do this on Windows
+  if os.name == 'posix':
+    env.Append(LIBS = ['stdc++'])
+
+  # link against backend-specific runtimes
+  # XXX we shouldn't have to link against cudart unless we're using the
+  #     cuda runtime, but cudafe inserts some dependencies when compiling .cu files
+  # XXX ideally this gets handled in nvcc.py if possible
+  env.Append(LIBS = ['cuda','cudart'])
+
+  # import the LD_LIBRARY_PATH so we can run commands which depend
+  # on shared libraries
+  # XXX we should probably just copy the entire environment
+  if os.name == 'posix':
+    if env['PLATFORM'] == "darwin":
+      env['ENV']['DYLD_LIBRARY_PATH'] = os.environ['DYLD_LIBRARY_PATH']
+    else:
+      env['ENV']['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
+
+  # generate help text
+  Help(vars.GenerateHelpText(env))
+
+  return env
+
diff --git a/libmixed_cpu_and_gpu/SConscript b/libmixed_cpu_and_gpu/SConscript
@@ -0,0 +1,11 @@
+Import('env')
+
+env.Append(CPPPATH=[Dir('include')])
+
+env.Append(NVCCFLAGS=['--expt-extended-lambda'])
+cuda_objects = env.Object(Glob('src/*.cu'),OBJSUFFIX='.cuda.o')
+
+lib = env.SharedLibrary('mixed_cpu_and_gpu',Glob('src/*.cc'),
+                        LIBS=[cuda_objects])
+
+Return('lib')
diff --git a/libmixed_cpu_and_gpu/include/a_cpu_class.hh b/libmixed_cpu_and_gpu/include/a_cpu_class.hh
@@ -0,0 +1,16 @@
+#pragma once
+
+class a_cpu_class {
+
+ public:
+
+  a_cpu_class() { ; }
+  ~a_cpu_class() { ; }
+
+  unsigned int get_member() const;
+  void set_member(unsigned int val);
+
+ private:
+  unsigned int member = 0;
+
+};
diff --git a/libmixed_cpu_and_gpu/include/a_mixed_gpu_and_cpu_class.hh b/libmixed_cpu_and_gpu/include/a_mixed_gpu_and_cpu_class.hh
@@ -0,0 +1,25 @@
+#pragma once
+#include <vector>
+
+
+class a_mixed_gpu_and_cpu_class {
+
+ public:
+
+  a_mixed_gpu_and_cpu_class() { ; }
+  ~a_mixed_gpu_and_cpu_class() { ; }
+
+  // only callable from within gpu kernel context
+  __device__ void a_device_function();
+
+  // can only be called from cpu code, but makes
+  // cuda api calls which alter gpu state
+  __host__   void a_host_function();
+
+  // callable from CPU, only affects cpu code
+  unsigned int a_normal_cpu_method() const { return 0; }
+
+};
+
+// a forward declared cuda kernel
+__global__ void some_kernel (double* data);
diff --git a/libmixed_cpu_and_gpu/src/a_cpu_class.cc b/libmixed_cpu_and_gpu/src/a_cpu_class.cc
@@ -0,0 +1,4 @@
+#include "a_cpu_class.hh"
+
+unsigned int a_cpu_class::get_member() const { return member; }
+void a_cpu_class::set_member(unsigned int val) { member = val; }
diff --git a/libmixed_cpu_and_gpu/src/a_mixed_gpu_and_cpu_class.cu b/libmixed_cpu_and_gpu/src/a_mixed_gpu_and_cpu_class.cu
@@ -0,0 +1,21 @@
+#include "a_mixed_gpu_and_cpu_class.hh"
+
+#include <iostream>
+
+__device__ void a_mixed_gpu_and_cpu_class::a_device_function() {
+  float x = 0;
+  x += 1;
+  printf("%d\n",x);
+}
+
+
+__host__   void a_mixed_gpu_and_cpu_class::a_host_function() {
+  float* gpu_mem;
+  cudaMalloc((void**)&gpu_mem,10*sizeof(float));
+  cudaFree(gpu_mem);
+}
+
+
+__global__ void some_kernel (double* data) {
+  data[0]*=10.0;
+}
diff --git a/minimal.cu b/minimal.cu
@@ -0,0 +1,16 @@
+#include "a_cpu_class.hh"
+#include "a_mixed_gpu_and_cpu_class.hh"
+
+
+int main(int argc, char** argv) {
+
+  cudaDeviceSynchronize();
+  a_cpu_class cpu_obj;
+  a_mixed_gpu_and_cpu_class mixed_obj;
+
+  cpu_obj.set_member(10);
+  mixed_obj.a_host_function();
+
+  return 0;
+
+}
diff --git a/nvcc.py b/nvcc.py