Skip to content

Commit cfbb862

Browse files
Only tune streaming DeviceSelect versions for 64-bit offsets (#3072)
1 parent c6c032f commit cfbb862

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

cub/benchmarks/bench/select/flagged.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,11 @@ void select(nvbench::state& state, nvbench::type_list<T, OffsetT, MayAlias>)
164164
}
165165

166166
using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
167+
// The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes
168+
// using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI).
169+
using select_offset_types = nvbench::type_list<int64_t>;
167170

168-
NVBENCH_BENCH_TYPES(select, NVBENCH_TYPE_AXES(fundamental_types, offset_types, may_alias))
171+
NVBENCH_BENCH_TYPES(select, NVBENCH_TYPE_AXES(fundamental_types, select_offset_types, may_alias))
169172
.set_name("base")
170173
.set_type_axes_names({"T{ct}", "OffsetT{ct}", "MayAlias{ct}"})
171174
.add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4))

cub/benchmarks/bench/select/if.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,11 @@ void select(nvbench::state& state, nvbench::type_list<T, OffsetT, MayAlias>)
190190
}
191191

192192
using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
193+
// The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes
194+
// using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI).
195+
using select_offset_types = nvbench::type_list<int64_t>;
193196

194-
NVBENCH_BENCH_TYPES(select, NVBENCH_TYPE_AXES(fundamental_types, offset_types, may_alias))
197+
NVBENCH_BENCH_TYPES(select, NVBENCH_TYPE_AXES(fundamental_types, select_offset_types, may_alias))
195198
.set_name("base")
196199
.set_type_axes_names({"T{ct}", "OffsetT{ct}", "MayAlias{ct}"})
197200
.add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4))

cub/benchmarks/bench/select/unique.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,11 @@ static void unique(nvbench::state& state, nvbench::type_list<T, OffsetT, MayAlia
142142
}
143143

144144
using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
145+
// The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes
146+
// using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI).
147+
using select_offset_types = nvbench::type_list<int64_t>;
145148

146-
NVBENCH_BENCH_TYPES(unique, NVBENCH_TYPE_AXES(fundamental_types, offset_types, may_alias))
149+
NVBENCH_BENCH_TYPES(unique, NVBENCH_TYPE_AXES(fundamental_types, select_offset_types, may_alias))
147150
.set_name("base")
148151
.set_type_axes_names({"T{ct}", "OffsetT{ct}", "MayAlias{ct}"})
149152
.add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4))

0 commit comments

Comments
 (0)