Skip to content

Commit 068dd86

Browse files
author
Chris Sullivan
committed
Minimal example for compiling GPU and CPU code into a single host shared library,
and compiling a host executable CUDA binary that asynchronously runs code on an nVidia device.
0 parents  commit 068dd86

10 files changed

+510
-0
lines changed

SConscript

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Import('env')
2+
3+
env.Append(LIBPATH='#/lib')
4+
env.Append(RPATH=[Literal('\\$$ORIGIN')])
5+
env.Append(RPATH=[Literal('\\$$ORIGIN/../lib')])
6+
7+
libmixed_cpu_and_gpu = env.SConscript('libmixed_cpu_and_gpu/SConscript', exports='env')
8+
env.Append(LIBPATH=[libmixed_cpu_and_gpu[0].dir])
9+
env.Append(LIBS=[libmixed_cpu_and_gpu[0].name])
10+
11+
minimal = env.Program('minimal','minimal.cu')
12+
13+
env.Install('#/lib',[libmixed_cpu_and_gpu])
14+
env.Install('#/bin',[minimal])
15+
16+
Clean('#','#/lib')
17+
Clean('#','#/bin')

SConstruct

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import os
2+
3+
import SCons
4+
5+
exec open("build-env.py")
6+
env = Environment(ENV = os.environ)
7+
env.Append(CCFLAGS=['-std=c++1y','-pthread','-g'])
8+
env.Append(LINKFLAGS=['-pthread'])
9+
10+
def append_lib(self, *libs):
11+
for lib in libs:
12+
if isinstance(lib, str):
13+
libs = File(lib)
14+
15+
if isinstance(lib, SCons.Node.FS.File):
16+
self.Append(LIBPATH=[lib.dir])
17+
self.Append(LIBS=[lib.name])
18+
else:
19+
self.append_lib(*lib)
20+
type(env).append_lib = append_lib
21+
22+
if 'PYTHON_VERSION' in ARGUMENTS:
23+
env['PYTHON_VERSION'] = ARGUMENTS['PYTHON_VERSION']
24+
25+
# More readable output
26+
if not ARGUMENTS.get('VERBOSE'):
27+
env['CXXCOMSTR'] = 'Compiling C++ object $TARGETS'
28+
env['CCCOMSTR'] = 'Compiling C object $TARGETS'
29+
env['ARCOMSTR'] = 'Packing static library $TARGETS'
30+
env['RANLIBCOMSTR'] = 'Indexing static library $TARGETS'
31+
env['SHCCCOMSTR'] = 'Compiling shared C object $TARGETS'
32+
env['SHCXXCOMSTR'] = 'Compiling shared C++ object $TARGETS'
33+
env['LINKCOMSTR'] = 'Linking $TARGETS'
34+
env['SHLINKCOMSTR'] = 'Linking shared $TARGETS'
35+
36+
env.SConscript('SConscript', exports='env', duplicate=True,
37+
variant_dir='build')
38+
Clean('.','build')

build-env.py

+230
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
EnsureSConsVersion(1,2)
2+
3+
import os
4+
5+
import inspect
6+
import platform
7+
8+
def get_cuda_paths():
9+
"""Determines CUDA {bin,lib,include} paths
10+
11+
returns (bin_path,lib_path,inc_path)
12+
"""
13+
14+
# determine defaults
15+
if os.name == 'nt':
16+
bin_path = 'C:/CUDA/bin'
17+
lib_path = 'C:/CUDA/lib'
18+
inc_path = 'C:/CUDA/include'
19+
elif os.name == 'posix':
20+
bin_path = '/usr/local/cuda/bin'
21+
lib_path = '/usr/local/cuda/lib'
22+
inc_path = '/usr/local/cuda/include'
23+
else:
24+
raise ValueError, 'Error: unknown OS. Where is nvcc installed?'
25+
26+
if platform.platform()[:6] != 'Darwin' and \
27+
platform.machine()[-2:] == '64':
28+
lib_path += '64'
29+
30+
# override with environement variables
31+
if 'CUDA_BIN_PATH' in os.environ:
32+
bin_path = os.path.abspath(os.environ['CUDA_BIN_PATH'])
33+
if 'CUDA_LIB_PATH' in os.environ:
34+
lib_path = os.path.abspath(os.environ['CUDA_LIB_PATH'])
35+
if 'CUDA_INC_PATH' in os.environ:
36+
inc_path = os.path.abspath(os.environ['CUDA_INC_PATH'])
37+
38+
return (bin_path,lib_path,inc_path)
39+
40+
def getTools():
41+
result = []
42+
if os.name == 'nt':
43+
result = ['default', 'msvc']
44+
elif os.name == 'posix':
45+
result = ['default', 'gcc']
46+
else:
47+
result = ['default']
48+
return result;
49+
50+
51+
OldEnvironment = Environment;
52+
53+
54+
# this dictionary maps the name of a compiler program to a dictionary mapping the name of
55+
# a compiler switch of interest to the specific switch implementing the feature
56+
gCompilerOptions = {
57+
'gcc' : {'warn_all' : '-Wall', 'warn_errors' : '-Werror', 'optimization' : '-O3', 'inplace':'-fPIC', 'debug' : '-g', 'exception_handling' : '', 'omp' : '-fopenmp'},
58+
'g++' : {'warn_all' : '-Wall', 'warn_errors' : '-Werror', 'optimization' : '-O3', 'inplace':'-fPIC', 'debug' : '-g', 'exception_handling' : '', 'omp' : '-fopenmp'},
59+
'cl' : {'warn_all' : '/Wall', 'warn_errors' : '/WX', 'optimization' : '/Ox', 'debug' : ['/Zi', '-D_DEBUG', '/MTd'], 'exception_handling' : '/EHsc', 'omp' : '/openmp'}
60+
}
61+
62+
63+
# this dictionary maps the name of a linker program to a dictionary mapping the name of
64+
# a linker switch of interest to the specific switch implementing the feature
65+
gLinkerOptions = {
66+
'gcc' : {'debug' : ''},
67+
'g++' : {'debug' : ''},
68+
'link' : {'debug' : '/debug' }
69+
}
70+
71+
72+
def getCFLAGS(mode, warn, warnings_as_errors, CC):
73+
result = []
74+
if mode == 'release':
75+
# turn on optimization
76+
result.append(gCompilerOptions[CC]['optimization'])
77+
result.append(gCompilerOptions[CC]['inplace'])
78+
elif mode == 'debug':
79+
# turn on debug mode
80+
result.append(gCompilerOptions[CC]['debug'])
81+
result.append(gCompilerOptions[CC]['inplace'])
82+
result.append('-DTHRUST_DEBUG')
83+
84+
if warn:
85+
# turn on all warnings
86+
result.append(gCompilerOptions[CC]['warn_all'])
87+
88+
if warnings_as_errors:
89+
# treat warnings as errors
90+
result.append(gCompilerOptions[CC]['warn_errors'])
91+
92+
# avoid problems specific to windows
93+
if CC == 'cl':
94+
# avoid min/max problems due to windows.h
95+
result.append('/DNOMINMAX')
96+
# suppress warnings due to "decorated name length exceeded"
97+
result.append('/wd4503')
98+
99+
return result
100+
101+
102+
def getCXXFLAGS(mode, warn, warnings_as_errors, CXX):
103+
result = []
104+
if mode == 'release':
105+
# turn on optimization
106+
result.append(gCompilerOptions[CXX]['optimization'])
107+
result.append(gCompilerOptions[CXX]['inplace'])
108+
elif mode == 'debug':
109+
# turn on debug mode
110+
result.append(gCompilerOptions[CXX]['debug'])
111+
# enable exception handling
112+
result.append(gCompilerOptions[CXX]['exception_handling'])
113+
114+
if warn:
115+
# turn on all warnings
116+
result.append(gCompilerOptions[CXX]['warn_all'])
117+
118+
if warnings_as_errors:
119+
# treat warnings as errors
120+
result.append(gCompilerOptions[CXX]['warn_errors'])
121+
122+
return result
123+
124+
125+
def getNVCCFLAGS(mode, arch):
126+
result = ['-arch=' + arch]
127+
128+
if platform.platform()[:6] == 'Darwin':
129+
if platform.machine()[-2:] == '64':
130+
result.append('-m64')
131+
else:
132+
result.append('-m32')
133+
134+
if mode == 'debug':
135+
# turn on debug mode
136+
# XXX make this work when we've debugged nvcc -G
137+
result.append('-g')
138+
result.append('-G')
139+
pass
140+
141+
result.append('-std=c++11')
142+
143+
return result
144+
145+
146+
def getLINKFLAGS(mode, LINK):
147+
result = []
148+
if mode == 'debug':
149+
# turn on debug mode
150+
result.append(gLinkerOptions[LINK]['debug'])
151+
152+
return result
153+
154+
155+
def Environment(*args, **keywords):
156+
# allow the user discretion to choose the MSVC version
157+
vars = Variables()
158+
if os.name == 'nt':
159+
vars.Add(EnumVariable('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('8.0', '9.0', '10.0')))
160+
161+
# add a variable to handle RELEASE/DEBUG mode
162+
vars.Add(EnumVariable('mode', 'Release versus debug mode', 'release',
163+
allowed_values = ('release', 'debug')))
164+
165+
# add a variable to handle compute capability
166+
vars.Add(EnumVariable('arch', 'Compute capability code generation', 'sm_35',
167+
allowed_values = ('sm_10', 'sm_11', 'sm_12', 'sm_13', 'sm_20', 'sm_21', 'sm_30', 'sm_35')))
168+
169+
# add a variable to handle warnings
170+
if os.name == 'posix':
171+
vars.Add(BoolVariable('Wall', 'Enable all compilation warnings', 1))
172+
else:
173+
vars.Add(BoolVariable('Wall', 'Enable all compilation warnings', 0))
174+
175+
# add a variable to treat warnings as errors
176+
vars.Add(BoolVariable('Werror', 'Treat warnings as errors', 0))
177+
178+
# create an Environment
179+
env = OldEnvironment(*args, tools = getTools(), variables = vars, **keywords)
180+
181+
# get the absolute path to the directory containing
182+
# this source file
183+
thisFile = inspect.getabsfile(Environment)
184+
thisDir = os.path.dirname(thisFile)
185+
186+
# enable nvcc
187+
env.Tool('nvcc', toolpath = [os.path.join(thisDir)])
188+
189+
# get C compiler switches
190+
env.Append(CFLAGS = getCFLAGS(env['mode'], env['Wall'], env['Werror'], env.subst('$CC')))
191+
192+
# get CXX compiler switches
193+
env.Append(CXXFLAGS = getCXXFLAGS(env['mode'], env['Wall'], env['Werror'], env.subst('$CXX')))
194+
195+
# get NVCC compiler switches
196+
env.Append(NVCCFLAGS = getNVCCFLAGS(env['mode'], env['arch']))
197+
198+
# get linker switches
199+
env.Append(LINKFLAGS = getLINKFLAGS(env['mode'], env.subst('$LINK')))
200+
201+
# get CUDA paths
202+
(cuda_exe_path,cuda_lib_path,cuda_inc_path) = get_cuda_paths()
203+
env.Append(LIBPATH = [cuda_lib_path])
204+
env.Append(CPPPATH = [cuda_inc_path])
205+
206+
# link against the standard library
207+
# we don't have to do this on Windows
208+
if os.name == 'posix':
209+
env.Append(LIBS = ['stdc++'])
210+
211+
# link against backend-specific runtimes
212+
# XXX we shouldn't have to link against cudart unless we're using the
213+
# cuda runtime, but cudafe inserts some dependencies when compiling .cu files
214+
# XXX ideally this gets handled in nvcc.py if possible
215+
env.Append(LIBS = ['cuda','cudart'])
216+
217+
# import the LD_LIBRARY_PATH so we can run commands which depend
218+
# on shared libraries
219+
# XXX we should probably just copy the entire environment
220+
if os.name == 'posix':
221+
if env['PLATFORM'] == "darwin":
222+
env['ENV']['DYLD_LIBRARY_PATH'] = os.environ['DYLD_LIBRARY_PATH']
223+
else:
224+
env['ENV']['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
225+
226+
# generate help text
227+
Help(vars.GenerateHelpText(env))
228+
229+
return env
230+

libmixed_cpu_and_gpu/SConscript

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Import('env')
2+
3+
env.Append(CPPPATH=[Dir('include')])
4+
5+
env.Append(NVCCFLAGS=['--expt-extended-lambda'])
6+
cuda_objects = env.Object(Glob('src/*.cu'),OBJSUFFIX='.cuda.o')
7+
8+
lib = env.SharedLibrary('mixed_cpu_and_gpu',Glob('src/*.cc'),
9+
LIBS=[cuda_objects])
10+
11+
Return('lib')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
class a_cpu_class {
4+
5+
public:
6+
7+
a_cpu_class() { ; }
8+
~a_cpu_class() { ; }
9+
10+
unsigned int get_member() const;
11+
void set_member(unsigned int val);
12+
13+
private:
14+
unsigned int member = 0;
15+
16+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#pragma once
2+
#include <vector>
3+
4+
5+
class a_mixed_gpu_and_cpu_class {
6+
7+
public:
8+
9+
a_mixed_gpu_and_cpu_class() { ; }
10+
~a_mixed_gpu_and_cpu_class() { ; }
11+
12+
// only callable from within gpu kernel context
13+
__device__ void a_device_function();
14+
15+
// can only be called from cpu code, but makes
16+
// cuda api calls which alter gpu state
17+
__host__ void a_host_function();
18+
19+
// callable from CPU, only affects cpu code
20+
unsigned int a_normal_cpu_method() const { return 0; }
21+
22+
};
23+
24+
// a forward declared cuda kernel
25+
__global__ void some_kernel (double* data);
+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#include "a_cpu_class.hh"
2+
3+
unsigned int a_cpu_class::get_member() const { return member; }
4+
void a_cpu_class::set_member(unsigned int val) { member = val; }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include "a_mixed_gpu_and_cpu_class.hh"
2+
3+
#include <iostream>
4+
5+
__device__ void a_mixed_gpu_and_cpu_class::a_device_function() {
6+
float x = 0;
7+
x += 1;
8+
printf("%d\n",x);
9+
}
10+
11+
12+
__host__ void a_mixed_gpu_and_cpu_class::a_host_function() {
13+
float* gpu_mem;
14+
cudaMalloc((void**)&gpu_mem,10*sizeof(float));
15+
cudaFree(gpu_mem);
16+
}
17+
18+
19+
__global__ void some_kernel (double* data) {
20+
data[0]*=10.0;
21+
}

minimal.cu

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include "a_cpu_class.hh"
2+
#include "a_mixed_gpu_and_cpu_class.hh"
3+
4+
5+
int main(int argc, char** argv) {
6+
7+
cudaDeviceSynchronize();
8+
a_cpu_class cpu_obj;
9+
a_mixed_gpu_and_cpu_class mixed_obj;
10+
11+
cpu_obj.set_member(10);
12+
mixed_obj.a_host_function();
13+
14+
return 0;
15+
16+
}

0 commit comments

Comments
 (0)