diff --git a/README.md b/README.md index cad1abd..abb0313 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,64 @@ CUDA Rasterizer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Siyu Zheng +* Tested on: Windows 10, i7-8750 @ 2.20GHz 16GB, GTX 1060 6GB, Visual Studio 2015, CUDA 8.0(Personal Laptop) -### (TODO: Your README) +## CUDA Rasterizer -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +| Duck | Cesium Milk Truck | +| ------------- |:-------------:| +| ![](images/duckGIF.gif) | ![](images/truckGIF.gif) | +### Shading Methods +| Lambert | Blinn-Phong | +| ------------- |:-------------:| +| ![](images/lambert.png) | ![](images/blinn.png) | -### Credits +Use different shading methods to render the object. + +### Perspective correct texture + +| Non-perspective correct | Perspective correct | +| ------------- |:-------------:| +| ![](images/nonperspective.gif) | ![](images/perspective.gif) | + +Use perspective-correct interpolation instead of barycentric inperpolation on z. Then update the depth if needed and do perspective correct inperpolation on non-positional vertex attributes of this fragment. It's quite obvious that without perpective correct interpolation, the display of checkerboard is wrong. + +### Bilinear texture filtering +| Non-bilinear | Bilinear texture filtering | +| ------------- |:-------------:| +| ![](images/nobilinear.png) | ![](images/bilinear.png) | + +Use four texture points nearest to the point that the pixel represents and interpolate the color value. As the graph shows, the edge in checker board with bilinear texture filtering is more smooth. + +### Color Interpolation +| Triangle | Box | +| ------------- |:-------------:| +| ![](images/triangle_color.png) | ![](images/box_color.png) | + +Assign red, green and blue for vertices of each triangle then use barycentric interpolation to calculate the color of each pixel in that triangle. + +### Rasterize point and line +| Point cloud | Wireframe | +| ------------- |:-------------:| +| ![](images/point.gif) | ![](images/line.gif) | +For rasterizing points, check if that position of the fragment is in the screen and assign each fragment with a color on that pixel. + +For rasterizing lines, loop all edges for each triangle and calculate the length for each line segment. Divide the line segment into tiny subdivision then assigned color on that pixel. + +### Perform Analysis + +![](images/time.png) + +Recorded the running time for each process. In general, vertex transform and primitive assembly took about similar amount of time for different test files. We can see that in truck example, the rasterization took most of the time since Cesium Milk Truck has several different texture. For rendering, there is only some shading methods so it doesn't cost much of time. For sending image to PBO, it took similar amount of time for different objects. + +![](images/primitives.png) + +The running time for different primitive types. Rasterizing triangle cost a little bit more time than point and line. + +### Credits +* [Bilinear filtering Wiki](https://en.wikipedia.org/wiki/Bilinear_filtering) * [tinygltfloader](https://github.com/syoyo/tinygltfloader) by [@soyoyo](https://github.com/syoyo) * [glTF Sample Models](https://github.com/KhronosGroup/glTF/blob/master/sampleModels/README.md) diff --git a/images/bilinear.png b/images/bilinear.png new file mode 100644 index 0000000..d7e3906 Binary files /dev/null and b/images/bilinear.png differ diff --git a/images/blinn.png b/images/blinn.png new file mode 100644 index 0000000..225e9e0 Binary files /dev/null and b/images/blinn.png differ diff --git a/images/box_color.png b/images/box_color.png new file mode 100644 index 0000000..dc1fbbe Binary files /dev/null and b/images/box_color.png differ diff --git a/images/duckGIF.gif b/images/duckGIF.gif new file mode 100644 index 0000000..477d790 Binary files /dev/null and b/images/duckGIF.gif differ diff --git a/images/lambert.png b/images/lambert.png new file mode 100644 index 0000000..f3fba51 Binary files /dev/null and b/images/lambert.png differ diff --git a/images/line.gif b/images/line.gif new file mode 100644 index 0000000..52eed9a Binary files /dev/null and b/images/line.gif differ diff --git a/images/nobilinear.png b/images/nobilinear.png new file mode 100644 index 0000000..ac2ea78 Binary files /dev/null and b/images/nobilinear.png differ diff --git a/images/nonperspective.gif b/images/nonperspective.gif new file mode 100644 index 0000000..c5eb8ad Binary files /dev/null and b/images/nonperspective.gif differ diff --git a/images/perspective.gif b/images/perspective.gif new file mode 100644 index 0000000..1bcc084 Binary files /dev/null and b/images/perspective.gif differ diff --git a/images/point.gif b/images/point.gif new file mode 100644 index 0000000..4646994 Binary files /dev/null and b/images/point.gif differ diff --git a/images/primitives.png b/images/primitives.png new file mode 100644 index 0000000..dab63e6 Binary files /dev/null and b/images/primitives.png differ diff --git a/images/time.png b/images/time.png new file mode 100644 index 0000000..4b22323 Binary files /dev/null and b/images/time.png differ diff --git a/images/triangle_color.png b/images/triangle_color.png new file mode 100644 index 0000000..5ddd3f4 Binary files /dev/null and b/images/triangle_color.png differ diff --git a/images/truck.gif b/images/truck.gif new file mode 100644 index 0000000..fb00b8c Binary files /dev/null and b/images/truck.gif differ diff --git a/images/truckGIF.gif b/images/truckGIF.gif new file mode 100644 index 0000000..46bf79b Binary files /dev/null and b/images/truckGIF.gif differ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a57f69f..40c13cb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,5 +6,5 @@ set(SOURCE_FILES cuda_add_library(src ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_30 ) diff --git a/src/rasterize.cu b/src/rasterize.cu index 1262a09..d2f9680 100644 --- a/src/rasterize.cu +++ b/src/rasterize.cu @@ -17,6 +17,7 @@ #include "rasterize.h" #include #include +#include namespace { @@ -43,10 +44,10 @@ namespace { glm::vec3 eyePos; // eye space position used for shading glm::vec3 eyeNor; // eye space normal used for shading, cuz normal will go wrong after perspective transformation - // glm::vec3 col; + glm::vec3 col; glm::vec2 texcoord0; TextureData* dev_diffuseTex = NULL; - // int texWidth, texHeight; + int texWidth, texHeight; // ... }; @@ -62,10 +63,11 @@ namespace { // The attributes listed below might be useful, // but always feel free to modify on your own - // glm::vec3 eyePos; // eye space position used for shading - // glm::vec3 eyeNor; - // VertexAttributeTexcoord texcoord0; - // TextureData* dev_diffuseTex; + glm::vec3 eyePos; // eye space position used for shading + glm::vec3 eyeNor; + VertexAttributeTexcoord texcoord0; + TextureData* dev_diffuseTex; + int texWidth, texHeight; // ... }; @@ -81,7 +83,6 @@ namespace { VertexAttributePosition* dev_position; VertexAttributeNormal* dev_normal; VertexAttributeTexcoord* dev_texcoord0; - // Materials, add more attributes when needed TextureData* dev_diffuseTex; int diffuseTexWidth; @@ -98,6 +99,16 @@ namespace { } + +#define BLINN 0 +#define LAMBERT 1 +#define BILINEAR_FILTERING 0 +#define PERSPECTIVE_CORRECT 0 +#define RASTERIZE_POINT 0 +#define RASTERIZE_LINE 0 +#define COLOR_INTERPOLATION 0 +#define TIMER 1 + static std::map> mesh2PrimitivesMap; @@ -110,6 +121,13 @@ static Fragment *dev_fragmentBuffer = NULL; static glm::vec3 *dev_framebuffer = NULL; static int * dev_depth = NULL; // you might need this buffer when doing depth test +#if TIMER +static double time_assembly = 0.0; +static double time_rasterize = 0.0; +static double time_render = 0.0; +static double time_sendToPBO = 0.0; +static int iter = 0; +#endif /** * Kernel that writes the image to the OpenGL PBO directly. @@ -133,6 +151,8 @@ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) { } } + + /** * Writes fragment colors to the framebuffer */ @@ -143,10 +163,40 @@ void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) { int index = x + (y * w); if (x < w && y < h) { - framebuffer[index] = fragmentBuffer[index].color; - // TODO: add your fragment shader code here +#if RASTERIZE_POINT || RASTERIZE_LINE + framebuffer[index] = fragmentBuffer[index].color; +#else + if (fragmentBuffer[index].dev_diffuseTex == NULL) { + framebuffer[index] = fragmentBuffer[index].color; + } + else { + // TODO: add your fragment shader code here + Fragment frag = fragmentBuffer[index]; + glm::vec3 lightPos(5.f, 5.f, 5.f); + glm::vec3 lightVec = glm::normalize((lightPos - frag.eyePos)); + glm::vec3 specColor(0.f, 0.f, 0.f); + float lambertian = glm::max(glm::dot(lightVec, frag.eyeNor), 0.0f); + glm::vec3 V = glm::normalize(frag.eyePos); + glm::vec3 L = lightVec; + glm::vec3 H = (V + L) / 2.f; + float exp = 20.f; + float specularTerm = glm::max(pow(glm::dot(glm::normalize(H), glm::normalize(frag.eyeNor)), exp), 0.f);; + float ambientTerm = 0.2; + float diffuseTerm = glm::dot(glm::normalize(frag.eyeNor), glm::normalize(lightVec)); + diffuseTerm = glm::clamp(diffuseTerm, 0.f, 1.f); + glm::vec3 res = glm::vec3(); +#if BLINN + res = ambientTerm * glm::vec3(0.1, 0.1, 0.1) + diffuseTerm * frag.color + specularTerm * glm::vec3(1.f, 1.f, 1.f); +#elif LAMBERT + res = ambientTerm * glm::vec3(0.1, 0.1, 0.1) + diffuseTerm * frag.color; +#else + res = frag.color; +#endif + framebuffer[index] = res; + } +#endif } } @@ -638,15 +688,40 @@ void _vertexTransformAndAssembly( // Multiply the MVP matrix for each vertex position, this will transform everything into clipping space // Then divide the pos by its w element to transform into NDC space // Finally transform x and y to viewport space - + glm::vec4 pos = glm::vec4(primitive.dev_position[vid], 1.f); + pos = MVP * pos; + pos = pos / pos.w; + pos.x = (pos.x + 1.f) * 0.5f * width; + pos.y = (1.f - pos.y) * 0.5f * height; + primitive.dev_verticesOut[vid].pos = pos; + glm::vec4 eyePos = glm::vec4(primitive.dev_position[vid], 1.f); + eyePos = MV * eyePos; + eyePos = eyePos / eyePos.w; + primitive.dev_verticesOut[vid].eyePos = glm::vec3(eyePos); + glm::vec3 eyeNor = primitive.dev_normal[vid]; + primitive.dev_verticesOut[vid].eyeNor = glm::normalize(MV_normal * eyeNor); + primitive.dev_verticesOut[vid].col = glm::vec3(0.5, 0.5, 0.5); // TODO: Apply vertex assembly here // Assemble all attribute arraies into the primitive array - + if (primitive.dev_texcoord0 != NULL) { + primitive.dev_verticesOut[vid].texcoord0 = primitive.dev_texcoord0[vid]; + } + else { + primitive.dev_verticesOut[vid].texcoord0 = glm::vec2(0.f, 0.f); + } + if (primitive.dev_diffuseTex != NULL) { + primitive.dev_verticesOut[vid].dev_diffuseTex = primitive.dev_diffuseTex; + + } + primitive.dev_verticesOut[vid].texWidth = primitive.diffuseTexWidth; + primitive.dev_verticesOut[vid].texHeight = primitive.diffuseTexHeight; + } } + static int curPrimitiveBeginId = 0; __global__ @@ -660,12 +735,12 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_ // TODO: uncomment the following code for a start // This is primitive assembly for triangles - //int pid; // id for cur primitives vector - //if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { - // pid = iid / (int)primitive.primitiveType; - // dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] - // = primitive.dev_verticesOut[primitive.dev_indices[iid]]; - //} + int pid; // id for cur primitives vector + if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { + pid = iid / (int)primitive.primitiveType; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] + = primitive.dev_verticesOut[primitive.dev_indices[iid]]; + } // TODO: other primitive types (point, line) @@ -673,6 +748,145 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_ } +__device__ glm::vec3 getBilinearFilteredPixelColor(TextureData* texture, float u, float v, int width, int height) { + u = u * width - 0.5; + v = v * height - 0.5; + int x = floor(u); + int y = floor(v); + float du = u - x; + float dv = v - y; + float u_opposite = 1 - du; + float v_opposite = 1 - dv; + int xy00 = 3 * (x + y * width); + int xy10 = 3 * (x + 1 + y * width); + int xy01 = 3 * (x + (y + 1) * width); + int xy11 = 3 * (x + 1 + (y + 1) * width); + float r = (texture[xy00] * u_opposite + texture[xy10] * du) * v_opposite + + (texture[xy10] * u_opposite + texture[xy11] * du) * dv; + float g = (texture[xy00 + 1] * u_opposite + texture[xy10 + 1] * du) * v_opposite + + (texture[xy01 + 1] * u_opposite + texture[xy11 + 1] * du) * dv; + float b = (texture[xy00 + 2] * u_opposite + texture[xy10 + 2] * du) * v_opposite + + (texture[xy01 + 2] * u_opposite + texture[xy11 + 2] * du) * dv; + return glm::vec3(r, g, b); +} + +__global__ void rasterize_triangle(const int width, const int height, int* depth, int numPrimitives, Primitive* primitives, Fragment* fragmentBuffer) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= numPrimitives) { + return; + } + Primitive frag = primitives[index]; + glm::vec3 tri[3]; + tri[0] = glm::vec3(frag.v[0].pos); + tri[1] = glm::vec3(frag.v[1].pos); + tri[2] = glm::vec3(frag.v[2].pos); +#if COLOR_INTERPOLATION + frag.v[0].col = glm::vec3(1.f, 0.f, 0.f); + if (index % 2 == 0) { + frag.v[1].col = glm::vec3(0.f, 1.f, 0.f); + frag.v[2].col = glm::vec3(0.f, 0.f, 1.f); + } + else { + frag.v[2].col = glm::vec3(0.f, 1.f, 0.f); + frag.v[1].col = glm::vec3(0.f, 0.f, 1.f); + } +#endif +#if RASTERIZE_POINT + glm::vec3 pointCol = glm::vec3(1.f, 0.f, 0.f); + for (int i = 0; i < 3; i++) { + tri[i].x = glm::clamp(tri[i].x, 0.f, (float)(width - 1)); + tri[i].y = glm::clamp(tri[i].y, 0.f, (float)(height - 1)); + int pixel = int(tri[i].x) + int(tri[i].y) * width; + fragmentBuffer[pixel].color = pointCol; + } +#elif RASTERIZE_LINE + glm::vec3 lineCol = glm::vec3(0.f, 0.f, 1.f); + glm::vec2 start = glm::vec2(tri[0].x, tri[0].y); + glm::vec2 end = glm::vec2(tri[1].x, tri[1].y); + float length = glm::length(glm::vec2(end.x - start.x, end.y - start.y)); + for (float di = 0.f; di < 1.f; di += 1.f / length) { + int x = start.x * (1 - di) + end.x * di; + int y = start.y * (1 - di) + end.y * di; + int pixel = x + y * width; + fragmentBuffer[pixel].color = lineCol; + } + start = glm::vec2(tri[1].x, tri[1].y); + end = glm::vec2(tri[2].x, tri[2].y); + length = glm::length(glm::vec2(end.x - start.x, end.y - start.y)); + for (float di = 0.f; di < 1.f; di += 1.f / length) { + int x = start.x * (1 - di) + end.x * di; + int y = start.y * (1 - di) + end.y * di; + int pixel = x + y * width; + fragmentBuffer[pixel].color = lineCol; + } + start = glm::vec2(tri[2].x, tri[2].y); + end = glm::vec2(tri[0].x, tri[0].y); + length = glm::length(glm::vec2(end.x - start.x, end.y - start.y)); + for (float di = 0.f; di < 1.f; di += 1.f / length) { + int x = start.x * (1 - di) + end.x * di; + int y = start.y * (1 - di) + end.y * di; + int pixel = x + y * width; + fragmentBuffer[pixel].color = lineCol; + } +#else + AABB bb = getAABBForTriangle(tri); + bb.min[0] = glm::clamp(bb.min[0], 0.f, float(width) - 1); + bb.min[1] = glm::clamp(bb.min[1], 0.f, float(height) - 1); + bb.max[0] = glm::clamp(bb.max[0], 0.f, float(width) - 1); + bb.max[1] = glm::clamp(bb.max[1], 0.f, float(height) - 1); + + for (int i = bb.min[0]; i <= bb.max[0]; i++) { + for (int j = bb.min[1]; j <= bb.max[1]; j++) { + glm::vec3 bary = calculateBarycentricCoordinate(tri, glm::vec2(i, j)); + if (isBarycentricCoordInBounds(bary)) { +#if PERSPECTIVE_CORRECT + int curDepth = 1.0f / (bary[0] / tri[0].z + bary[1] / tri[1].z + bary[2] / tri[2].z); +#else + int curDepth = getZAtCoordinate(bary, tri) * INT_MIN; +#endif + int pixel = i + j * width; + atomicMin(&depth[pixel], curDepth); + if (depth[pixel] == curDepth) { + fragmentBuffer[pixel].eyePos = bary[0] * frag.v[0].eyePos + bary[1] * frag.v[1].eyePos + bary[2] * frag.v[2].eyePos; +#if PERSPECTIVE_CORRECT + tri[0].z += FLT_EPSILON; + tri[1].z += FLT_EPSILON; + tri[2].z += FLT_EPSILON; + float perspectiveZ = 1.0f / (bary[0] / tri[0].z + bary[1] / tri[1].z + bary[2] / tri[2].z); + fragmentBuffer[pixel].eyeNor = glm::normalize(perspectiveZ * (bary[0] * frag.v[0].eyeNor / tri[0].z + bary[1] * frag.v[1].eyeNor / tri[1].z + bary[2] * frag.v[2].eyeNor / tri[2].z)); + fragmentBuffer[pixel].texcoord0 = perspectiveZ * (bary[0] * frag.v[0].texcoord0 / tri[0].z + bary[1] * frag.v[1].texcoord0 / tri[1].z + bary[2] * frag.v[2].texcoord0 / tri[2].z); +#else + fragmentBuffer[pixel].eyeNor = glm::normalize(bary[0] * frag.v[0].eyeNor + bary[1] * frag.v[1].eyeNor + bary[2] * frag.v[2].eyeNor); + fragmentBuffer[pixel].texcoord0 = bary[0] * frag.v[0].texcoord0 + bary[1] * frag.v[1].texcoord0 + bary[2] * frag.v[2].texcoord0; +#endif + fragmentBuffer[pixel].texWidth = frag.v[0].texWidth; + fragmentBuffer[pixel].texHeight = frag.v[0].texHeight; + fragmentBuffer[pixel].dev_diffuseTex = frag.v[0].dev_diffuseTex; + if (fragmentBuffer[pixel].dev_diffuseTex != NULL) { + TextureData* texture = fragmentBuffer[pixel].dev_diffuseTex; + float uf = fragmentBuffer[pixel].texcoord0.x * fragmentBuffer[pixel].texWidth; + float vf = fragmentBuffer[pixel].texcoord0.y * fragmentBuffer[pixel].texHeight; + int u = int(uf); + int v = int(vf); + u = glm::min(glm::max(0, u), fragmentBuffer[pixel].texWidth - 1); + v = glm::min(glm::max(0, v), fragmentBuffer[pixel].texHeight - 1); +#if BILINEAR_FILTERING + fragmentBuffer[pixel].color = getBilinearFilteredPixelColor(texture, fragmentBuffer[pixel].texcoord0.x, fragmentBuffer[pixel].texcoord0.y, fragmentBuffer[pixel].texWidth, fragmentBuffer[pixel].texHeight) / 255.f; +#else + int colorInd = 3 * (v * fragmentBuffer[pixel].texWidth + u); + fragmentBuffer[pixel].color = glm::vec3(texture[colorInd], texture[colorInd + 1], texture[colorInd + 2]) / 255.f; +#endif + } + else { + fragmentBuffer[pixel].color = bary[0] * frag.v[0].col + bary[1] * frag.v[1].col + bary[2] * frag.v[2].col; + } + } + } + } + } +#endif +} + /** @@ -686,7 +900,9 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g // Execute your rasterization pipeline here // (See README for rasterization pipeline outline.) - +#if TIMER + auto start = std::chrono::high_resolution_clock::now(); +#endif // Vertex Process & primitive assembly { curPrimitiveBeginId = 0; @@ -694,7 +910,6 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g auto it = mesh2PrimitivesMap.begin(); auto itEnd = mesh2PrimitivesMap.end(); - for (; it != itEnd; ++it) { auto p = (it->second).begin(); // each primitive auto pEnd = (it->second).end(); @@ -718,20 +933,55 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g checkCUDAError("Vertex Processing and Primitive Assembly"); } - +#if TIMER + cudaDeviceSynchronize(); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + time_assembly += double(duration.count()); + std::cout << ++iter << " iteration" << std::endl; + std::cout << "Vertex transform and assembly cost " << time_assembly << " microsecond" << std::endl; +#endif cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); initDepth << > >(width, height, dev_depth); // TODO: rasterize - - + dim3 numThreadsPerBlock(128); + dim3 numBlocksPerGrid((totalNumPrimitives + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); +#if TIMER + start = std::chrono::high_resolution_clock::now(); +#endif + rasterize_triangle << > > (width, height, dev_depth, totalNumPrimitives, dev_primitives, dev_fragmentBuffer); +#if TIMER + cudaDeviceSynchronize(); + end = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(end - start); + time_rasterize += double(duration.count()); + std::cout << "Rasterization cost " << time_rasterize << " microsecond" << std::endl; + start = std::chrono::high_resolution_clock::now(); +#endif // Copy depthbuffer colors into framebuffer render << > >(width, height, dev_fragmentBuffer, dev_framebuffer); checkCUDAError("fragment shader"); +#if TIMER + cudaDeviceSynchronize(); + end = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(end - start); + time_render += double(duration.count()); + std::cout << "rendering cost " << time_render << " microsecond" << std::endl; + start = std::chrono::high_resolution_clock::now(); +#endif // Copy framebuffer into OpenGL buffer for OpenGL previewing sendImageToPBO<<>>(pbo, width, height, dev_framebuffer); checkCUDAError("copy render result to pbo"); +#if TIMER + cudaDeviceSynchronize(); + end = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(end - start); + time_sendToPBO += double(duration.count()); + std::cout << "sendImageToPBO cost " << time_sendToPBO << " microsecond" << std::endl; + std::cout << "total time is " << time_sendToPBO + time_render + time_rasterize + time_assembly << " microsecond" << std::endl; +#endif } /** @@ -774,3 +1024,5 @@ void rasterizeFree() { checkCUDAError("rasterize Free"); } + +