Skip to content

Commit 3dcf7f2

Browse files
committed
chore(gpu): reduce throughput integer bench time
1 parent f24fa62 commit 3dcf7f2

File tree

3 files changed

+34
-6
lines changed

3 files changed

+34
-6
lines changed

.github/workflows/benchmark_gpu_integer_common.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ jobs:
236236
--commit-date "${{ env.COMMIT_DATE }}" \
237237
--bench-date "${{ env.BENCH_DATE }}" \
238238
--walk-subdirs \
239-
--name-suffix avx512
239+
--name-suffix avx512 \
240+
--bench-type ${{ matrix.bench_type }}
240241
241242
- name: Upload parsed results artifact
242243
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882

tfhe/benches/integer/bench.rs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ fn bench_server_key_binary_function_clean_inputs<F>(
144144
}
145145
BenchmarkType::Throughput => {
146146
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
147+
bench_group
148+
.sample_size(10)
149+
.measurement_time(std::time::Duration::from_secs(30));
147150
let elements = throughput_num_threads(num_block);
148151
bench_group.throughput(Throughput::Elements(elements));
149152
bench_group.bench_function(&bench_id, |b| {
@@ -292,6 +295,9 @@ fn bench_server_key_unary_function_clean_inputs<F>(
292295
}
293296
BenchmarkType::Throughput => {
294297
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
298+
bench_group
299+
.sample_size(10)
300+
.measurement_time(std::time::Duration::from_secs(30));
295301
let elements = throughput_num_threads(num_block);
296302
bench_group.throughput(Throughput::Elements(elements));
297303
bench_group.bench_function(&bench_id, |b| {
@@ -446,6 +452,9 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
446452
}
447453
BenchmarkType::Throughput => {
448454
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
455+
bench_group
456+
.sample_size(10)
457+
.measurement_time(std::time::Duration::from_secs(30));
449458
let elements = throughput_num_threads(num_block);
450459
bench_group.throughput(Throughput::Elements(elements));
451460
bench_group.bench_function(&bench_id, |b| {
@@ -559,6 +568,9 @@ fn if_then_else_parallelized(c: &mut Criterion) {
559568
}
560569
BenchmarkType::Throughput => {
561570
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
571+
bench_group
572+
.sample_size(10)
573+
.measurement_time(std::time::Duration::from_secs(30));
562574
let elements = throughput_num_threads(num_block);
563575
bench_group.throughput(Throughput::Elements(elements));
564576
bench_group.bench_function(&bench_id, |b| {
@@ -654,6 +666,9 @@ fn ciphertexts_sum_parallelized(c: &mut Criterion) {
654666
bench_id = format!(
655667
"{bench_name}_{len}_ctxts::throughput::{param_name}::{bit_size}_bits"
656668
);
669+
bench_group
670+
.sample_size(10)
671+
.measurement_time(std::time::Duration::from_secs(30));
657672
let elements = throughput_num_threads(num_block);
658673
bench_group.throughput(Throughput::Elements(elements));
659674
bench_group.bench_function(&bench_id, |b| {
@@ -1344,6 +1359,9 @@ mod cuda {
13441359
}
13451360
BenchmarkType::Throughput => {
13461361
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
1362+
bench_group
1363+
.sample_size(10)
1364+
.measurement_time(std::time::Duration::from_secs(30));
13471365
let elements = throughput_num_threads(num_block);
13481366
bench_group.throughput(Throughput::Elements(elements));
13491367
bench_group.bench_function(&bench_id, |b| {
@@ -1440,6 +1458,9 @@ mod cuda {
14401458
}
14411459
BenchmarkType::Throughput => {
14421460
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
1461+
bench_group
1462+
.sample_size(10)
1463+
.measurement_time(std::time::Duration::from_secs(30));
14431464
let elements = throughput_num_threads(num_block);
14441465
bench_group.throughput(Throughput::Elements(elements));
14451466
bench_group.bench_function(&bench_id, |b| {
@@ -1496,9 +1517,6 @@ mod cuda {
14961517
G: Fn(&mut ThreadRng, usize) -> ScalarType,
14971518
{
14981519
let mut bench_group = c.benchmark_group(bench_name);
1499-
bench_group
1500-
.sample_size(15)
1501-
.measurement_time(std::time::Duration::from_secs(30));
15021520
let mut rng = rand::thread_rng();
15031521

15041522
let streams = CudaStreams::new_multi_gpu();
@@ -1516,6 +1534,9 @@ mod cuda {
15161534

15171535
match BENCH_TYPE.get().unwrap() {
15181536
BenchmarkType::Latency => {
1537+
bench_group
1538+
.sample_size(15)
1539+
.measurement_time(std::time::Duration::from_secs(30));
15191540
bench_id =
15201541
format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); // FIXME it makes no sense to duplicate `bit_size`
15211542
bench_group.bench_function(&bench_id, |b| {
@@ -1543,6 +1564,9 @@ mod cuda {
15431564
});
15441565
}
15451566
BenchmarkType::Throughput => {
1567+
bench_group
1568+
.sample_size(10)
1569+
.measurement_time(std::time::Duration::from_secs(30));
15461570
bench_id = format!(
15471571
"{bench_name}::throughput::{param_name}::{bit_size}_bits_scalar_{bit_size}"
15481572
);
@@ -1644,6 +1668,9 @@ mod cuda {
16441668
}
16451669
BenchmarkType::Throughput => {
16461670
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
1671+
bench_group
1672+
.sample_size(10)
1673+
.measurement_time(std::time::Duration::from_secs(30));
16471674
let elements = throughput_num_threads(num_block);
16481675
bench_group.throughput(Throughput::Elements(elements));
16491676
bench_group.bench_function(&bench_id, |b| {

tfhe/benches/utilities.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,9 +399,9 @@ pub mod integer_utils {
399399
#[cfg(feature = "gpu")]
400400
{
401401
// This value is for Nvidia H100 GPU
402-
let streaming_multiprocessors = 144;
402+
let streaming_multiprocessors = 132;
403403
let num_gpus = unsafe { cuda_get_number_of_gpus() };
404-
((streaming_multiprocessors * 16 * num_gpus) as f64 * block_multiplicator) as u64
404+
((streaming_multiprocessors * num_gpus) as f64 * block_multiplicator) as u64
405405
}
406406
#[cfg(not(feature = "gpu"))]
407407
{

0 commit comments

Comments
 (0)