Skip to content

Commit 44732ff

Browse files
fix(gpu): div rem size
1 parent a4f77a0 commit 44732ff

2 files changed

Lines changed: 7 additions & 6 deletions

File tree

.github/workflows/gpu_fast_tests.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ jobs:
133133
run: |
134134
nvidia-cuda-mps-control -d
135135
136+
- name: Run High Level API Tests
137+
run: |
138+
make test_high_level_api_fake_multi_gpu
139+
136140
- name: Run core crypto and internal CUDA backend tests
137141
run: |
138142
make test_core_crypto_gpu
@@ -147,9 +151,6 @@ jobs:
147151
run: |
148152
make test_c_api_gpu
149153
150-
- name: Run High Level API Tests
151-
run: |
152-
make test_high_level_api_fake_multi_gpu
153154
154155
slack-notify:
155156
name: gpu_fast_tests/slack-notify

backends/tfhe-cuda-backend/cuda/include/integer/div_rem.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,16 +381,16 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
381381
bool use_seq = overflow_sub_mem_1->prop_simu_group_carries_mem
382382
->use_sequential_algorithm_to_resolve_group_carries;
383383

384-
cuda_set_device(0);
384+
cuda_set_device(streams.gpu_index(0));
385385
check_cuda_error(
386386
cudaEventCreateWithFlags(&create_indexes_done, cudaEventDisableTiming));
387387
create_indexes_for_overflow_sub(streams.get_ith(0), num_blocks, group_size,
388388
use_seq, allocate_gpu_memory, size_tracker);
389389
check_cuda_error(cudaEventRecord(create_indexes_done, streams.stream(0)));
390-
cuda_set_device(1);
390+
cuda_set_device(streams.gpu_index(1));
391391
check_cuda_error(
392392
cudaStreamWaitEvent(streams.stream(1), create_indexes_done, 0));
393-
cuda_set_device(2);
393+
cuda_set_device(streams.gpu_index(2));
394394
check_cuda_error(
395395
cudaStreamWaitEvent(streams.stream(2), create_indexes_done, 0));
396396

0 commit comments

Comments
 (0)