Skip to content

Commit 7c80053

Browse files
committed
example of linking RAJA with CUDA
1 parent 592e2f5 commit 7c80053

File tree

3 files changed

+307
-0
lines changed

3 files changed

+307
-0
lines changed

raja-cuda-build/main.cpp

+200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2+
// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC.
3+
//
4+
// Produced at the Lawrence Livermore National Laboratory
5+
//
6+
// LLNL-CODE-689114
7+
//
8+
// All rights reserved.
9+
//
10+
// This file is part of RAJA.
11+
//
12+
// For details about use and distribution, please read RAJA/LICENSE.
13+
//
14+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
15+
16+
#include <cstdlib>
17+
#include <cstring>
18+
#include <iostream>
19+
20+
#include "memoryManager.hpp"
21+
22+
#include "RAJA/RAJA.hpp"
23+
24+
/*
25+
* Vector Addition Example
26+
*
27+
* Computes c = a + b, where a, b, c are vectors of ints.
28+
* It illustrates similarities between a C-style for-loop and a RAJA
29+
* forall loop.
30+
*
31+
* RAJA features shown:
32+
* - `forall` loop iteration template method
33+
* - Index range segment
34+
* - Execution policies
35+
*
36+
* If CUDA is enabled, CUDA unified memory is used.
37+
*/
38+
39+
/*
40+
CUDA_BLOCK_SIZE - specifies the number of threads in a CUDA thread block
41+
*/
42+
#if defined(RAJA_ENABLE_CUDA)
43+
const int CUDA_BLOCK_SIZE = 256;
44+
#endif
45+
46+
//
47+
// Functions for checking and printing results
48+
//
49+
void checkResult(int* res, int len);
50+
void printResult(int* res, int len);
51+
52+
53+
int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
54+
{
55+
56+
std::cout << "\n\nRAJA vector addition example...\n";
57+
58+
//
59+
// Define vector length
60+
//
61+
const int N = 1000000;
62+
63+
//
64+
// Allocate and initialize vector data
65+
//
66+
int *a = memoryManager::allocate<int>(N);
67+
int *b = memoryManager::allocate<int>(N);
68+
int *c = memoryManager::allocate<int>(N);
69+
70+
for (int i = 0; i < N; ++i) {
71+
a[i] = -i;
72+
b[i] = i;
73+
}
74+
75+
76+
//----------------------------------------------------------------------------//
77+
78+
std::cout << "\n Running C-version of vector addition...\n";
79+
80+
for (int i = 0; i < N; ++i) {
81+
c[i] = a[i] + b[i];
82+
}
83+
84+
checkResult(c, N);
85+
//printResult(c, N);
86+
87+
88+
//----------------------------------------------------------------------------//
89+
// RAJA::seq_exec policy enforces strictly sequential execution....
90+
//----------------------------------------------------------------------------//
91+
92+
std::cout << "\n Running RAJA sequential vector addition...\n";
93+
94+
RAJA::forall<RAJA::seq_exec>(RAJA::RangeSegment(0, N), [=] (int i) {
95+
c[i] = a[i] + b[i];
96+
});
97+
98+
checkResult(c, N);
99+
//printResult(c, N);
100+
101+
102+
//----------------------------------------------------------------------------//
103+
// RAJA::simd_exec policy should force the compiler to generate SIMD
104+
// vectorization optimizations....
105+
//----------------------------------------------------------------------------//
106+
107+
std::cout << "\n Running RAJA SIMD vector addition...\n";
108+
109+
RAJA::forall<RAJA::simd_exec>(RAJA::RangeSegment(0, N), [=] (int i) {
110+
c[i] = a[i] + b[i];
111+
});
112+
113+
checkResult(c, N);
114+
//printResult(c, N);
115+
116+
117+
//----------------------------------------------------------------------------//
118+
// RAJA::loop_exec policy means that the compiler is allowed to generate
119+
// optimizations (e.g., SIMD) if it thinks it is safe to do so...
120+
//----------------------------------------------------------------------------//
121+
122+
std::cout << "\n Running RAJA loop-exec vector addition...\n";
123+
124+
RAJA::forall<RAJA::loop_exec>(RAJA::RangeSegment(0, N), [=] (int i) {
125+
c[i] = a[i] + b[i];
126+
});
127+
128+
checkResult(c, N);
129+
//printResult(c, N);
130+
131+
132+
//----------------------------------------------------------------------------//
133+
134+
#if defined(RAJA_ENABLE_OPENMP)
135+
std::cout << "\n Running RAJA OpenMP vector addition...\n";
136+
137+
RAJA::forall<RAJA::omp_parallel_for_exec>(RAJA::RangeSegment(0, N), [=] (int i) {
138+
c[i] = a[i] + b[i];
139+
});
140+
141+
checkResult(c, N);
142+
//printResult(c, N);
143+
#endif
144+
145+
146+
//----------------------------------------------------------------------------//
147+
#if defined(RAJA_ENABLE_CUDA)
148+
std::cout << "\n Running RAJA CUDA vector addition...\n";
149+
150+
RAJA::forall<RAJA::cuda_exec<CUDA_BLOCK_SIZE>>(RAJA::RangeSegment(0, N),
151+
[=] RAJA_DEVICE (int i) {
152+
c[i] = a[i] + b[i];
153+
});
154+
155+
checkResult(c, N);
156+
//printResult(c, N);
157+
#endif
158+
159+
//----------------------------------------------------------------------------//
160+
161+
//
162+
// Clean up.
163+
//
164+
memoryManager::deallocate(a);
165+
memoryManager::deallocate(b);
166+
memoryManager::deallocate(c);
167+
168+
std::cout << "\n DONE!...\n";
169+
170+
return 0;
171+
}
172+
173+
//
174+
// Function to check result and report P/F.
175+
//
176+
void checkResult(int* res, int len)
177+
{
178+
bool correct = true;
179+
for (int i = 0; i < len; i++) {
180+
if ( res[i] != 0 ) { correct = false; }
181+
}
182+
if ( correct ) {
183+
std::cout << "\n\t result -- PASS\n";
184+
} else {
185+
std::cout << "\n\t result -- FAIL\n";
186+
}
187+
}
188+
189+
//
190+
// Function to print result.
191+
//
192+
void printResult(int* res, int len)
193+
{
194+
std::cout << std::endl;
195+
for (int i = 0; i < len; i++) {
196+
std::cout << "result[" << i << "] = " << res[i] << std::endl;
197+
}
198+
std::cout << std::endl;
199+
}
200+

raja-cuda-build/makefile

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#-----[Build-type]------
2+
Build-type = CUDA
3+
#Build-type = CPU
4+
5+
6+
#-----[RAJA and CUDA directories]----
7+
RAJA_DIR ?= /home/arturo/git-repo/RAJA/develop/build
8+
CUDA_DIR ?= /usr/local/cuda-9.0
9+
10+
rajaInc = -I$(RAJA_DIR)/include
11+
rajaLib = $(RAJA_DIR)/lib/libRAJA.a
12+
cudaLib = -Wl,-rpath -Wl,$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/lib64 -lcuda -lcudart -lcudadevrt -lnvToolsExt
13+
#===================================
14+
15+
#---[Host compiler]-----
16+
host-compiler = g++-6
17+
host-compilerFlags = '-O3 -g -std=c++11 -m64 -fopenmp'
18+
compilerFlags = -O3 -g -std=c++11 -m64 -fopenmp
19+
paths = -I ./$(iPath)
20+
paths += $(rajaInc)
21+
linker = $(host-compiler)
22+
#======================
23+
24+
#----[device compiler]----
25+
device-compiler=nvcc
26+
device-flags = -g -std=c++11 -Xptxas=-v -lineinfo --expt-extended-lambda --restrict
27+
device-flags += -ccbin=$(linker) -Xcompiler $(host-compilerFlags) -x=cu -arch=sm_50
28+
#======================
29+
30+
#----[Cuda - Compilation]---------
31+
ifeq ($(Build-type),CUDA)
32+
main: main.cpp
33+
@echo Compiling for CUDA - start
34+
$(device-compiler) $(device-flags) $(paths) -g -c -o main.o main.cpp
35+
$(linker) -o main main.o $(cudaLib) -fopenmp $(rajaLib)
36+
@echo Compiling for CUDA - end
37+
else
38+
#----[CPU - Compilation]---------
39+
main: main.cpp
40+
@echo Compiling for CPU - start
41+
$(host-compiler) $(compilerFlags) $(paths) -g -c -o main main.cpp
42+
@echo Compiling for CPU - end
43+
endif
44+
#======================
45+
46+
47+
48+
#-----[Clean up]-------
49+
clean:
50+
rm main
51+
rm -rf main main.o

raja-cuda-build/memoryManager.hpp

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2+
// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC.
3+
//
4+
// Produced at the Lawrence Livermore National Laboratory
5+
//
6+
// LLNL-CODE-689114
7+
//
8+
// All rights reserved.
9+
//
10+
// This file is part of RAJA.
11+
//
12+
// For details about use and distribution, please read RAJA/LICENSE.
13+
//
14+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
15+
16+
#ifndef EXAMPLES_MEMORYMANAGER_HPP
17+
#define EXAMPLES_MEMORYMANAGER_HPP
18+
19+
#include "RAJA/RAJA.hpp"
20+
#include "RAJA/util/defines.hpp"
21+
22+
/*
23+
As RAJA does not manage memory we include a general purpose memory
24+
manager which may be used to perform c++ style allocation/deallocation
25+
or allocate/deallocate CUDA unified memory. The type of memory allocated
26+
is dependent on how RAJA was configured.
27+
*/
28+
namespace memoryManager{
29+
30+
template <typename T>
31+
T *allocate(RAJA::Index_type size)
32+
{
33+
T *ptr;
34+
#if defined(RAJA_ENABLE_CUDA)
35+
cudaMallocManaged((void **)&ptr, sizeof(T) * size, cudaMemAttachGlobal);
36+
#else
37+
ptr = new T[size];
38+
#endif
39+
return ptr;
40+
}
41+
42+
template <typename T>
43+
void deallocate(T *&ptr)
44+
{
45+
if (ptr) {
46+
#if defined(RAJA_ENABLE_CUDA)
47+
cudaFree(ptr);
48+
#else
49+
delete[] ptr;
50+
#endif
51+
ptr = nullptr;
52+
}
53+
}
54+
55+
};
56+
#endif

0 commit comments

Comments
 (0)