diff --git a/backends/vulkan/tools/gpuinfo/include/architecture.h b/backends/vulkan/tools/gpuinfo/include/architecture.h index 9af908eb170..d6b5b2f33f6 100644 --- a/backends/vulkan/tools/gpuinfo/include/architecture.h +++ b/backends/vulkan/tools/gpuinfo/include/architecture.h @@ -40,7 +40,8 @@ void reg_count(const App& app) { uint32_t NITER; auto bench = [&](uint32_t ngrp, uint32_t nreg) { - StagingBuffer buffer(context(), vkapi::kFloat, 1); + StagingBuffer buffer( + context(), vkapi::kFloat, 1, vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "reg_count_" + std::to_string(nreg); @@ -164,7 +165,11 @@ void warp_size(const App& app, const bool verbose = false) { uint32_t NITER; auto bench = [&](uint32_t nthread) { - StagingBuffer out_buf(context(), vkapi::kInt, app.nthread_logic); + StagingBuffer out_buf( + context(), + vkapi::kInt, + app.nthread_logic, + vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "warp_size_physical"; @@ -224,7 +229,11 @@ void warp_size(const App& app, const bool verbose = false) { // doesn't depend on kernel timing, so the extra wait time doesn't lead to // inaccuracy. auto bench_sm = [&](uint32_t nthread) { - StagingBuffer out_buf(context(), vkapi::kInt, app.nthread_logic); + StagingBuffer out_buf( + context(), + vkapi::kInt, + app.nthread_logic, + vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "warp_size_scheduler"; diff --git a/backends/vulkan/tools/gpuinfo/include/buffers.h b/backends/vulkan/tools/gpuinfo/include/buffers.h index 31137b11eea..63fa859e299 100644 --- a/backends/vulkan/tools/gpuinfo/include/buffers.h +++ b/backends/vulkan/tools/gpuinfo/include/buffers.h @@ -35,8 +35,13 @@ void buf_cacheline_size(const App& app) { uint32_t NITER; auto bench = [&](int stride) { - StagingBuffer in_buf(context(), vkapi::kFloat, BUF_SIZE); - StagingBuffer out_buf(context(), vkapi::kFloat, 1); + StagingBuffer in_buf( + context(), + vkapi::kFloat, + BUF_SIZE, + vkapi::CopyDirection::HOST_TO_DEVICE); + StagingBuffer out_buf( + context(), vkapi::kFloat, 1, vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "buf_cacheline_size"; @@ -132,9 +137,16 @@ void _bandwidth( // workgroups, once the size of the access excedes the workgroup width. const uint32_t workgroup_width = local_x * NITER * NUNROLL; - StagingBuffer in_buf(context(), vkapi::kFloat, range / sizeof(float)); + StagingBuffer in_buf( + context(), + vkapi::kFloat, + range / sizeof(float), + vkapi::CopyDirection::HOST_TO_DEVICE); StagingBuffer out_buf( - context(), vkapi::kFloat, VEC_WIDTH * app.nthread_logic); + context(), + vkapi::kFloat, + VEC_WIDTH * app.nthread_logic, + vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "buf_bandwidth_" + memtype_lower; diff --git a/backends/vulkan/tools/gpuinfo/include/textures.h b/backends/vulkan/tools/gpuinfo/include/textures.h index c9ff133f1ec..1f4dbb67f3a 100644 --- a/backends/vulkan/tools/gpuinfo/include/textures.h +++ b/backends/vulkan/tools/gpuinfo/include/textures.h @@ -61,7 +61,11 @@ void tex_cacheline_concurr(const App& app) { vTensor in_tensor = api::vTensor(api::context(), sizes_nchw, vkapi::kFloat); - StagingBuffer out_buf(context(), vkapi::kFloat, TEXEL_WIDTH); + StagingBuffer out_buf( + context(), + vkapi::kFloat, + TEXEL_WIDTH, + vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; @@ -174,7 +178,10 @@ void tex_bandwidth(const App& app) { const uint32_t workgroup_width = local_x * NITER * NUNROLL; StagingBuffer out_buf( - context(), vkapi::kFloat, VEC_WIDTH * app.nthread_logic); + context(), + vkapi::kFloat, + VEC_WIDTH * app.nthread_logic, + vkapi::CopyDirection::DEVICE_TO_HOST); vkapi::PipelineBarrier pipeline_barrier{}; auto time = benchmark_on_gpu(shader_name, 10, [&]() {