Skip to content

Commit d687755

Browse files
fix(gpu): radix ciphertext
1 parent 44732ff commit d687755

20 files changed

Lines changed: 156 additions & 279 deletions

File tree

backends/tfhe-cuda-backend/cuda/include/linear_algebra.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,11 @@
77
extern "C" {
88

99
void cuda_negate_lwe_ciphertext_vector_32(
10-
void *stream, uint32_t gpu_index, void *lwe_array_out,
11-
void const *lwe_array_in, const uint32_t input_lwe_dimension,
12-
const uint32_t input_lwe_ciphertext_count);
10+
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
11+
CudaRadixCiphertextFFI const *lwe_array_in);
1312
void cuda_negate_lwe_ciphertext_vector_64(
14-
void *stream, uint32_t gpu_index, void *lwe_array_out,
15-
void const *lwe_array_in, const uint32_t input_lwe_dimension,
16-
const uint32_t input_lwe_ciphertext_count);
13+
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
14+
CudaRadixCiphertextFFI const *lwe_array_in);
1715
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
1816
CudaRadixCiphertextFFI *output,
1917
CudaRadixCiphertextFFI const *input_1,
@@ -60,10 +58,8 @@ void cuda_glwe_wrapping_polynomial_mul_one_to_many_64_async(
6058
int8_t *circulant, void const *poly_rhs, uint32_t polynomial_size,
6159
uint32_t glwe_dimension, uint32_t n_rhs);
6260
void cuda_add_lwe_ciphertext_vector_plaintext_64(
63-
void *stream, uint32_t gpu_index, void *lwe_array_out,
64-
void const *lwe_array_in, const uint64_t plaintext_in,
65-
const uint32_t input_lwe_dimension,
66-
const uint32_t input_lwe_ciphertext_count);
61+
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
62+
CudaRadixCiphertextFFI const *lwe_array_in, const uint64_t plaintext_in);
6763
void cuda_add_lwe_ciphertext_vector_inplace_32(
6864
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_inout,
6965
CudaRadixCiphertextFFI const *input_2);

backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,14 @@ host_bitnot(CudaStreams streams, CudaRadixCiphertextFFI *radix_ciphertext,
176176
(ct_message_modulus - 1);
177177

178178
host_negation<Torus>(
179-
streams.stream(0), streams.gpu_index(0), (Torus *)radix_ciphertext->ptr,
180-
(Torus *)radix_ciphertext->ptr, radix_ciphertext->lwe_dimension,
179+
streams.stream(0), streams.gpu_index(0), radix_ciphertext,
180+
radix_ciphertext, radix_ciphertext->lwe_dimension,
181181
radix_ciphertext->num_radix_blocks);
182182

183183
host_addition_plaintext_scalar<Torus>(
184-
streams.stream(0), streams.gpu_index(0), (Torus *)radix_ciphertext->ptr,
185-
(Torus *)radix_ciphertext->ptr, encoded_scalar,
186-
radix_ciphertext->lwe_dimension, radix_ciphertext->num_radix_blocks);
184+
streams.stream(0), streams.gpu_index(0), radix_ciphertext,
185+
radix_ciphertext, encoded_scalar, radix_ciphertext->lwe_dimension,
186+
radix_ciphertext->num_radix_blocks);
187187

188188
for (size_t i = 0; i < radix_ciphertext->num_radix_blocks; ++i) {
189189
radix_ciphertext->degrees[i] = ct_message_modulus - 1;

backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ device_accumulate_all_blocks(Torus *output, Torus const *input_block,
3535

3636
template <typename Torus>
3737
__host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
38-
Torus *output, Torus const *input,
38+
CudaRadixCiphertextFFI *output,
39+
CudaRadixCiphertextFFI const *input,
3940
uint32_t lwe_dimension,
4041
uint32_t num_radix_blocks) {
4142

@@ -45,7 +46,8 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
4546
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
4647
// Add all blocks and store in sum
4748
device_accumulate_all_blocks<Torus><<<num_blocks, num_threads, 0, stream>>>(
48-
output, input, lwe_dimension, num_radix_blocks);
49+
(Torus *)output->ptr, (Torus const *)input->ptr, lwe_dimension,
50+
num_radix_blocks);
4951
check_cuda_error(cudaGetLastError());
5052
}
5153

@@ -102,23 +104,33 @@ __host__ void are_all_comparisons_block_true(
102104

103105
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
104106
// as in the worst case we will be adding `max_value` ones
105-
auto input_blocks = (Torus *)tmp_out->ptr;
106-
auto accumulator_ptr =
107-
(Torus *)are_all_block_true_buffer->tmp_block_accumulated->ptr;
108107
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
108+
GPU_ASSERT(are_all_block_true_buffer->tmp_block_accumulated->lwe_dimension ==
109+
big_lwe_dimension,
110+
"lwe_dimension mismatch between tmp_block_accumulated and "
111+
"big_lwe_dimension");
112+
GPU_ASSERT(tmp_out->lwe_dimension == big_lwe_dimension,
113+
"lwe_dimension mismatch between tmp_out and big_lwe_dimension");
109114
uint32_t chunk_lengths[num_chunks];
110115
auto begin_remaining_blocks = remaining_blocks;
116+
uint32_t acc_offset = 0, inp_offset = 0;
111117
for (int i = 0; i < num_chunks; i++) {
112118
uint32_t chunk_length =
113119
std::min(max_value, begin_remaining_blocks - i * max_value);
114120
chunk_lengths[i] = chunk_length;
121+
CudaRadixCiphertextFFI acc_slice, inp_slice;
122+
as_radix_ciphertext_slice<Torus>(
123+
&acc_slice, are_all_block_true_buffer->tmp_block_accumulated,
124+
acc_offset, acc_offset + 1);
125+
as_radix_ciphertext_slice<Torus>(&inp_slice, tmp_out, inp_offset,
126+
inp_offset + chunk_length);
115127
accumulate_all_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
116-
accumulator_ptr, input_blocks,
117-
big_lwe_dimension, chunk_length);
128+
&acc_slice, &inp_slice, big_lwe_dimension,
129+
chunk_length);
118130

119-
accumulator_ptr += (big_lwe_dimension + 1);
131+
acc_offset += 1;
120132
remaining_blocks -= (chunk_length - 1);
121-
input_blocks += (big_lwe_dimension + 1) * chunk_length;
133+
inp_offset += chunk_length;
122134
}
123135
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
124136

@@ -219,21 +231,31 @@ __host__ void is_at_least_one_comparisons_block_true(
219231

220232
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
221233
// as in the worst case we will be adding `max_value` ones
222-
auto input_blocks = (Torus *)mem_ptr->tmp_lwe_array_out->ptr;
223-
auto accumulator = (Torus *)buffer->tmp_block_accumulated->ptr;
234+
GPU_ASSERT(buffer->tmp_block_accumulated->lwe_dimension == big_lwe_dimension,
235+
"lwe_dimension mismatch between tmp_block_accumulated and "
236+
"big_lwe_dimension");
237+
GPU_ASSERT(mem_ptr->tmp_lwe_array_out->lwe_dimension == big_lwe_dimension,
238+
"lwe_dimension mismatch between tmp_lwe_array_out and "
239+
"big_lwe_dimension");
224240
uint32_t chunk_lengths[num_chunks];
225241
auto begin_remaining_blocks = remaining_blocks;
242+
uint32_t acc_offset = 0, inp_offset = 0;
226243
for (int i = 0; i < num_chunks; i++) {
227244
uint32_t chunk_length =
228245
std::min(max_value, begin_remaining_blocks - i * max_value);
229246
chunk_lengths[i] = chunk_length;
247+
CudaRadixCiphertextFFI acc_slice, inp_slice;
248+
as_radix_ciphertext_slice<Torus>(&acc_slice, buffer->tmp_block_accumulated,
249+
acc_offset, acc_offset + 1);
250+
as_radix_ciphertext_slice<Torus>(&inp_slice, mem_ptr->tmp_lwe_array_out,
251+
inp_offset, inp_offset + chunk_length);
230252
accumulate_all_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
231-
accumulator, input_blocks, big_lwe_dimension,
253+
&acc_slice, &inp_slice, big_lwe_dimension,
232254
chunk_length);
233255

234-
accumulator += (big_lwe_dimension + 1);
256+
acc_offset += 1;
235257
remaining_blocks -= (chunk_length - 1);
236-
input_blocks += (big_lwe_dimension + 1) * chunk_length;
258+
inp_offset += chunk_length;
237259
}
238260

239261
// Selects a LUT
@@ -296,22 +318,31 @@ __host__ void host_compare_blocks_with_zero(
296318
streams.stream(0), streams.gpu_index(0), sum, 0, 1, lwe_array_in, 0, 1);
297319
num_sum_blocks = 1;
298320
} else {
321+
GPU_ASSERT(sum->lwe_dimension == big_lwe_dimension,
322+
"lwe_dimension mismatch between sum and big_lwe_dimension");
323+
GPU_ASSERT(lwe_array_in->lwe_dimension == big_lwe_dimension,
324+
"lwe_dimension mismatch between lwe_array_in and "
325+
"big_lwe_dimension");
299326
uint32_t remainder_blocks = num_radix_blocks;
300-
auto sum_i = (Torus *)sum->ptr;
301-
auto chunk = (Torus *)lwe_array_in->ptr;
327+
uint32_t sum_offset = 0, inp_offset = 0;
302328
while (remainder_blocks > 1) {
303329
uint32_t chunk_size =
304330
std::min(remainder_blocks, num_elements_to_fill_carry);
305-
331+
CudaRadixCiphertextFFI sum_slice, inp_slice;
332+
as_radix_ciphertext_slice<Torus>(&sum_slice, sum, sum_offset,
333+
sum_offset + 1);
334+
as_radix_ciphertext_slice<Torus>(&inp_slice, lwe_array_in, inp_offset,
335+
inp_offset + chunk_size);
306336
accumulate_all_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
307-
sum_i, chunk, big_lwe_dimension, chunk_size);
337+
&sum_slice, &inp_slice, big_lwe_dimension,
338+
chunk_size);
308339

309340
num_sum_blocks++;
310341
remainder_blocks -= (chunk_size - 1);
311342

312343
// Update operands
313-
chunk += (chunk_size - 1) * big_lwe_size;
314-
sum_i += big_lwe_size;
344+
inp_offset += chunk_size - 1;
345+
sum_offset += 1;
315346
}
316347
}
317348

@@ -381,9 +412,8 @@ compare_radix_blocks(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
381412

382413
// Subtract
383414
host_subtraction<Torus>(
384-
streams.stream(0), streams.gpu_index(0), (Torus *)lwe_array_out->ptr,
385-
(Torus *)lwe_array_left->ptr, (Torus *)lwe_array_right->ptr,
386-
big_lwe_dimension, num_radix_blocks);
415+
streams.stream(0), streams.gpu_index(0), lwe_array_out, lwe_array_left,
416+
lwe_array_right, big_lwe_dimension, num_radix_blocks);
387417

388418
// Apply LUT to compare to 0
389419
auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;

backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ host_integer_compress(CudaStreams streams,
214214

215215
if constexpr (std::is_same_v<Torus, uint64_t>) {
216216
lwe_pksk_input = mem_ptr->tmp_lwe;
217-
host_cleartext_multiplication<Torus>(
217+
host_cleartext_multiplication_unsafe_no_degrees<Torus>(
218218
streams.stream(0), streams.gpu_index(0), lwe_pksk_input, lwe_array_in,
219219
(uint64_t)compression_params.message_modulus);
220220
}

backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -192,16 +192,16 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
192192

193193
host_negation<Torus>(
194194
streams.stream(gpu_index), streams.gpu_index(gpu_index),
195-
(Torus *)out_boolean_block->ptr, (Torus *)out_boolean_block->ptr,
195+
out_boolean_block, out_boolean_block,
196196
radix_params.big_lwe_dimension, 1);
197197

198198
// we calculate encoding because this block works only for
199199
// message_modulus = 4 and carry_modulus = 4.
200200
const Torus encoded_scalar = 1ULL << (sizeof(Torus) * 8 - 5);
201201
host_addition_plaintext_scalar<Torus>(
202202
streams.stream(gpu_index), streams.gpu_index(gpu_index),
203-
(Torus *)out_boolean_block->ptr, (Torus *)out_boolean_block->ptr,
204-
encoded_scalar, radix_params.big_lwe_dimension, 1);
203+
out_boolean_block, out_boolean_block, encoded_scalar,
204+
radix_params.big_lwe_dimension, 1);
205205
}
206206
};
207207

@@ -289,35 +289,32 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
289289
// c3 = !o3
290290
copy_radix_ciphertext_slice_async<Torus>(
291291
streams.stream(0), streams.gpu_index(0), c3, 0, 1, o3, 0, 1);
292-
host_negation<Torus>(streams.stream(0), streams.gpu_index(0),
293-
(Torus *)c3->ptr, (Torus *)c3->ptr,
292+
host_negation<Torus>(streams.stream(0), streams.gpu_index(0), c3, c3,
294293
radix_params.big_lwe_dimension, 1);
295294
const Torus encoded_scalar = 1ULL << (sizeof(Torus) * 8 - 5);
296295
host_addition_plaintext_scalar<Torus>(
297-
streams.stream(0), streams.gpu_index(0), (Torus *)c3->ptr,
298-
(Torus *)c3->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1);
296+
streams.stream(0), streams.gpu_index(0), c3, c3, encoded_scalar,
297+
radix_params.big_lwe_dimension, 1);
299298

300299
// c2 = !o2 + o3
301300
copy_radix_ciphertext_slice_async<Torus>(
302301
streams.stream(1), streams.gpu_index(1), c2, 0, 1, o2, 0, 1);
303-
host_negation<Torus>(streams.stream(1), streams.gpu_index(1),
304-
(Torus *)c2->ptr, (Torus *)c2->ptr,
302+
host_negation<Torus>(streams.stream(1), streams.gpu_index(1), c2, c2,
305303
radix_params.big_lwe_dimension, 1);
306304
host_addition_plaintext_scalar<Torus>(
307-
streams.stream(1), streams.gpu_index(1), (Torus *)c2->ptr,
308-
(Torus *)c2->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1);
305+
streams.stream(1), streams.gpu_index(1), c2, c2, encoded_scalar,
306+
radix_params.big_lwe_dimension, 1);
309307
host_addition<Torus>(streams.stream(1), streams.gpu_index(1), c2, c2,
310308
o3_gpu_1, 1, 4, 4);
311309

312310
// c1 = !o1 + o2
313311
copy_radix_ciphertext_slice_async<Torus>(
314312
streams.stream(2), streams.gpu_index(2), c1, 0, 1, o1, 0, 1);
315-
host_negation<Torus>(streams.stream(2), streams.gpu_index(2),
316-
(Torus *)c1->ptr, (Torus *)c1->ptr,
313+
host_negation<Torus>(streams.stream(2), streams.gpu_index(2), c1, c1,
317314
radix_params.big_lwe_dimension, 1);
318315
host_addition_plaintext_scalar<Torus>(
319-
streams.stream(2), streams.gpu_index(2), (Torus *)c1->ptr,
320-
(Torus *)c1->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1);
316+
streams.stream(2), streams.gpu_index(2), c1, c1, encoded_scalar,
317+
radix_params.big_lwe_dimension, 1);
321318
host_addition<Torus>(streams.stream(2), streams.gpu_index(2), c1, c1,
322319
o2_gpu_2, 1, 4, 4);
323320

@@ -330,10 +327,9 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
330327
CudaRadixCiphertextFFI *cx,
331328
CudaRadixCiphertextFFI *rx,
332329
int_radix_lut<Torus> *lut, Torus factor) {
333-
auto rx_list = to_lwe_ciphertext_list(rx);
334330
host_cleartext_multiplication<Torus>(streams.stream(gpu_index),
335331
streams.gpu_index(gpu_index),
336-
(Torus *)rx->ptr, &rx_list, factor);
332+
rx, rx, factor);
337333
host_add_the_same_block_to_all_blocks<Torus>(streams.stream(gpu_index),
338334
streams.gpu_index(gpu_index),
339335
rx, rx, cx, 4, 4);
@@ -954,7 +950,7 @@ __host__ void host_integer_div_rem(
954950
int_mem_ptr->sub_streams_1.synchronize();
955951
int_mem_ptr->sub_streams_2.synchronize();
956952

957-
host_negation<Torus>(
953+
host_integer_negation<Torus>(
958954
int_mem_ptr->sub_streams_1, int_mem_ptr->negated_quotient, quotient,
959955
radix_params.message_modulus, radix_params.carry_modulus, num_blocks);
960956

@@ -965,7 +961,7 @@ __host__ void host_integer_div_rem(
965961
nullptr, int_mem_ptr->scp_mem_1, bsks,
966962
ksks, requested_flag, uses_carry);
967963

968-
host_negation<Torus>(
964+
host_integer_negation<Torus>(
969965
int_mem_ptr->sub_streams_2, int_mem_ptr->negated_remainder, remainder,
970966
radix_params.message_modulus, radix_params.carry_modulus, num_blocks);
971967

backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,14 +2267,13 @@ void host_single_borrow_propagate(CudaStreams streams,
22672267
streams, borrow_states, params, mem->prop_simu_group_carries_mem, bsks,
22682268
ksks, num_radix_blocks, num_groups);
22692269

2270-
auto shifted_blocks =
2271-
(Torus *)mem->shifted_blocks_borrow_state_mem->shifted_blocks->ptr;
22722270
auto prepared_blocks = mem->prop_simu_group_carries_mem->prepared_blocks;
2273-
auto simulators = (Torus *)mem->prop_simu_group_carries_mem->simulators->ptr;
22742271

2275-
host_subtraction<Torus>(streams.stream(0), streams.gpu_index(0),
2276-
(Torus *)prepared_blocks->ptr, shifted_blocks,
2277-
simulators, big_lwe_dimension, num_radix_blocks);
2272+
host_subtraction<Torus>(
2273+
streams.stream(0), streams.gpu_index(0), prepared_blocks,
2274+
mem->shifted_blocks_borrow_state_mem->shifted_blocks,
2275+
mem->prop_simu_group_carries_mem->simulators, big_lwe_dimension,
2276+
num_radix_blocks);
22782277

22792278
host_add_scalar_one_inplace<Torus>(streams, prepared_blocks, message_modulus,
22802279
carry_modulus);
@@ -2318,8 +2317,7 @@ void host_single_borrow_propagate(CudaStreams streams,
23182317

23192318
auto resolved_carries = mem->prop_simu_group_carries_mem->resolved_carries;
23202319
host_negation<Torus>(sub_streams_2.stream(0), sub_streams_2.gpu_index(0),
2321-
(Torus *)resolved_carries->ptr,
2322-
(Torus *)resolved_carries->ptr, big_lwe_dimension,
2320+
resolved_carries, resolved_carries, big_lwe_dimension,
23232321
num_groups);
23242322

23252323
host_radix_sum_in_groups<Torus>(

backends/tfhe-cuda-backend/cuda/src/integer/negation.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ void cuda_negate_ciphertext_64(CudaStreamsFFI streams,
1010
"operations");
1111

1212
auto cuda_streams = CudaStreams(streams);
13-
host_negation<uint64_t>(cuda_streams, lwe_array_out, lwe_array_in,
13+
host_integer_negation<uint64_t>(cuda_streams, lwe_array_out, lwe_array_in,
1414
message_modulus, carry_modulus, num_radix_blocks);
1515
cuda_synchronize_stream(cuda_streams.stream(0), cuda_streams.gpu_index(0));
1616
}

backends/tfhe-cuda-backend/cuda/src/integer/negation.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ __global__ void device_negation(Torus *output, Torus const *input,
4848
}
4949

5050
template <typename Torus>
51-
__host__ void host_negation(CudaStreams streams,
51+
__host__ void host_integer_negation(CudaStreams streams,
5252
CudaRadixCiphertextFFI *lwe_array_out,
5353
CudaRadixCiphertextFFI const *lwe_array_in,
5454
uint64_t message_modulus, uint64_t carry_modulus,

backends/tfhe-cuda-backend/cuda/src/integer/scalar_addition.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ device_scalar_subtraction_inplace(Torus *lwe_array, Torus *scalar_input,
112112

113113
template <typename Torus>
114114
__host__ void host_scalar_subtraction_inplace(
115-
CudaStreams streams, Torus *lwe_array, Torus *scalar_input,
115+
CudaStreams streams, CudaRadixCiphertextFFI *lwe_array, Torus *scalar_input,
116116
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
117117
uint32_t message_modulus, uint32_t carry_modulus) {
118118
cuda_set_device(streams.gpu_index(0));
@@ -130,7 +130,8 @@ __host__ void host_scalar_subtraction_inplace(
130130
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
131131

132132
device_scalar_subtraction_inplace<Torus>
133-
<<<grid, thds, 0, streams.stream(0)>>>(lwe_array, scalar_input,
133+
<<<grid, thds, 0, streams.stream(0)>>>((Torus *)lwe_array->ptr,
134+
scalar_input,
134135
input_lwe_ciphertext_count,
135136
lwe_dimension, delta);
136137
check_cuda_error(cudaGetLastError());

backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ __host__ void scalar_compare_radix_blocks(
6363
// Subtract
6464
// Here we need the true lwe sub, not the one that comes from shortint.
6565
host_scalar_subtraction_inplace<Torus>(
66-
streams, (Torus *)subtracted_blocks->ptr, scalar_blocks,
67-
big_lwe_dimension, num_radix_blocks, message_modulus, carry_modulus);
66+
streams, subtracted_blocks, scalar_blocks, big_lwe_dimension,
67+
num_radix_blocks, message_modulus, carry_modulus);
6868

6969
// Apply LUT to compare to 0
7070
auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut;

0 commit comments

Comments
 (0)