Add CUDA example

dariost · dariost · commit f867b5de41d8 · 2018-06-04T22:58:23.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ Cargo.lock
 *.spv
 *.so
 *.png
+*.ppm
diff --git a/utility/mandelbrot.cu b/utility/mandelbrot.cu
@@ -0,0 +1,71 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdint.h>
+#include <sys/time.h>
+
+#define ITERATIONS 2000
+#define WIDTH 3840
+#define HEIGHT 2160
+#define CENTER_X -0.75
+#define CENTER_Y 0.0
+#define ZOOM (float(HEIGHT) / 2.5)
+
+__global__ void mandelbrot(unsigned* dim, float* output) {
+    unsigned width = dim[0];
+    unsigned height = dim[1];
+    unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
+    for(; tid < width * height; tid += blockDim.x * gridDim.x) {
+        float x = tid % width;
+        float y = tid / width;
+        x -= width / 2.0;
+        y -= height / 2.0;
+        x /= ZOOM;
+        y /= ZOOM;
+        x += CENTER_X;
+        y += CENTER_Y;
+        float a = 0.0, b = 0.0;
+        for(unsigned i = 0; i < ITERATIONS; i++) {
+            float tmp_a = a * a - b * b + x;
+            b = 2.0 * a * b + y;
+            a = tmp_a;
+        }
+        output[tid] = a * a + b * b;
+    }
+}
+
+int main() {
+    unsigned* host_dim;
+    float* host_output;
+    unsigned* device_dim;
+    float* device_output;
+    struct timespec start, end;
+    host_dim = (unsigned*)malloc(2 * sizeof(unsigned));
+    assert(host_dim);
+    host_output = (float*)malloc(WIDTH * HEIGHT * sizeof(float));
+    assert(host_output);
+    cudaMalloc(&device_dim, 2 * sizeof(unsigned));
+    cudaMalloc(&device_output, WIDTH * HEIGHT * sizeof(float));
+    host_dim[0] = WIDTH;
+    host_dim[1] = HEIGHT;
+    cudaMemcpy(device_dim, host_dim, 2 * sizeof(unsigned), cudaMemcpyHostToDevice);
+    clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+    mandelbrot<<<(WIDTH * HEIGHT + 256) / 256, 256>>>(device_dim, device_output);
+    cudaDeviceSynchronize();
+    clock_gettime(CLOCK_MONOTONIC_RAW, &end);
+    cudaMemcpy(host_output, device_output, WIDTH * HEIGHT * sizeof(float), cudaMemcpyDeviceToHost);
+    cudaFree(device_output);
+    cudaFree(device_dim);
+    FILE* output = fopen("out.ppm", "w");
+    fprintf(output, "P2\n%u %u\n255\n", host_dim[0], host_dim[1]);
+    for(unsigned i = 0; i < WIDTH * HEIGHT; i++) {
+        fprintf(output, "%d\n", (host_output[i] <= 2.0) ? (0) : (255));
+    }
+    fclose(output);
+    free(host_dim);
+    free(host_output);
+    uint64_t delta_us = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000;
+    double delta = double(delta_us) / 1e6;
+    printf("Time: %.3lf\n", delta);
+    return 0;
+}
diff --git a/wyvern-python/examples/mandelbrot.py b/wyvern-python/examples/mandelbrot.py
@@ -10,6 +10,11 @@
 from time import time
 import png
 
+WIDTH = 3840
+HEIGHT = 2160
+ITERATIONS = 2000
+
+
 def mandelbrot(g_ctx: wy.builder.Context, id: str, a0: str, b0: str,
                iterations: int):
     ctx = g_ctx.getProgramBuilder().newContext()
@@ -66,27 +71,28 @@ def loop():
     ctx.While(lambda: ctx.id < ctx.width * ctx.height, loop)
     return builder.finalize()
 
+
 if __name__ == "__main__":
-    result = program(1920*1080, -0.75, 0.0, 1080/2.5, 2000)
+    result = program(WIDTH * HEIGHT, -0.75, 0.0, HEIGHT / 2.5, ITERATIONS)
     executor = wy.WyVkExecutor()
     executable = executor.compile(result)
+    start = time()
     input = executor.newResource()
     output = executor.newResource()
-    input.set_data_array_uint32([1920, 1080])
-    output.set_data_array_float32([0.0] * 1920 * 1080)
+    input.set_data_array_uint32([WIDTH, HEIGHT])
+    output.set_data_array_float32([0.0] * WIDTH * HEIGHT)
     executable.bind("input", wy.IoType.input.value, input)
     executable.bind("output", wy.IoType.output.value, output)
-    start = time()
     executable.run()
-    print("Time: %.3fs" % (time() - start),)
     result = output.get_data_array_float32()
+    print("Time: %.3fs" % (time() - start),)
     def mapper(x):
         if x <= 2.0:
             return 0
         else:
             return 255
     result = [mapper(x) for x in result]
     out = open("out.png", "wb")
-    w = png.Writer(1920, 1080, greyscale=True)
+    w = png.Writer(WIDTH, HEIGHT, greyscale=True)
     w.write_array(out, result)
     out.close()

-Original file line number
+Diff line change
 *.spv
 *.so
 *.png
 +*.ppm