Skip to content

Commit b400516

Browse files
authored
simplify device attributes implementation (#3084)
1 parent 1e5da8d commit b400516

File tree

1 file changed

+47
-64
lines changed

1 file changed

+47
-64
lines changed

cudax/include/cuda/experimental/__device/attributes.cuh

Lines changed: 47 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -32,31 +32,8 @@ namespace cuda::experimental
3232
namespace detail
3333
{
3434

35-
_CCCL_NODISCARD inline int __get_attr_impl(::cudaDeviceAttr __attr, int __dev)
36-
{
37-
int __value = 0;
38-
_CCCL_TRY_CUDA_API(::cudaDeviceGetAttribute, "failed to get device attribute", &__value, __attr, __dev);
39-
return __value;
40-
}
41-
42-
template <::cudaDeviceAttr _Attr>
43-
struct __dev_attr
44-
{
45-
using type = int;
46-
47-
_CCCL_NODISCARD constexpr operator ::cudaDeviceAttr() const noexcept
48-
{
49-
return _Attr;
50-
}
51-
52-
_CCCL_NODISCARD type operator()(device_ref __dev_id) const
53-
{
54-
return __get_attr_impl(_Attr, __dev_id.get());
55-
}
56-
};
57-
5835
template <::cudaDeviceAttr _Attr, typename _Type>
59-
struct __dev_attr_with_type
36+
struct __dev_attr_impl
6037
{
6138
using type = _Type;
6239

@@ -67,176 +44,182 @@ struct __dev_attr_with_type
6744

6845
_CCCL_NODISCARD type operator()(device_ref __dev_id) const
6946
{
70-
return static_cast<type>(__get_attr_impl(_Attr, __dev_id.get()));
47+
int __value = 0;
48+
_CCCL_TRY_CUDA_API(::cudaDeviceGetAttribute, "failed to get device attribute", &__value, _Attr, __dev_id.get());
49+
return static_cast<type>(__value);
7150
}
7251
};
7352

53+
template <::cudaDeviceAttr _Attr>
54+
struct __dev_attr : __dev_attr_impl<_Attr, int>
55+
{};
56+
7457
// TODO: give this a strong type for kilohertz
7558
template <>
7659
struct __dev_attr<::cudaDevAttrClockRate> //
77-
: __dev_attr_with_type<::cudaDevAttrClockRate, int>
60+
: __dev_attr_impl<::cudaDevAttrClockRate, int>
7861
{};
7962
template <>
8063
struct __dev_attr<::cudaDevAttrGpuOverlap> //
81-
: __dev_attr_with_type<::cudaDevAttrGpuOverlap, bool>
64+
: __dev_attr_impl<::cudaDevAttrGpuOverlap, bool>
8265
{};
8366
template <>
8467
struct __dev_attr<::cudaDevAttrKernelExecTimeout> //
85-
: __dev_attr_with_type<::cudaDevAttrKernelExecTimeout, bool>
68+
: __dev_attr_impl<::cudaDevAttrKernelExecTimeout, bool>
8669
{};
8770
template <>
8871
struct __dev_attr<::cudaDevAttrIntegrated> //
89-
: __dev_attr_with_type<::cudaDevAttrIntegrated, bool>
72+
: __dev_attr_impl<::cudaDevAttrIntegrated, bool>
9073
{};
9174
template <>
9275
struct __dev_attr<::cudaDevAttrCanMapHostMemory> //
93-
: __dev_attr_with_type<::cudaDevAttrCanMapHostMemory, bool>
76+
: __dev_attr_impl<::cudaDevAttrCanMapHostMemory, bool>
9477
{};
9578
template <>
9679
struct __dev_attr<::cudaDevAttrComputeMode> //
97-
: __dev_attr_with_type<::cudaDevAttrComputeMode, ::cudaComputeMode>
80+
: __dev_attr_impl<::cudaDevAttrComputeMode, ::cudaComputeMode>
9881
{
9982
static constexpr type default_mode = cudaComputeModeDefault;
10083
static constexpr type prohibited_mode = cudaComputeModeProhibited;
10184
static constexpr type exclusive_process_mode = cudaComputeModeExclusiveProcess;
10285
};
10386
template <>
10487
struct __dev_attr<::cudaDevAttrConcurrentKernels> //
105-
: __dev_attr_with_type<::cudaDevAttrConcurrentKernels, bool>
88+
: __dev_attr_impl<::cudaDevAttrConcurrentKernels, bool>
10689
{};
10790
template <>
10891
struct __dev_attr<::cudaDevAttrEccEnabled> //
109-
: __dev_attr_with_type<::cudaDevAttrEccEnabled, bool>
92+
: __dev_attr_impl<::cudaDevAttrEccEnabled, bool>
11093
{};
11194
template <>
11295
struct __dev_attr<::cudaDevAttrTccDriver> //
113-
: __dev_attr_with_type<::cudaDevAttrTccDriver, bool>
96+
: __dev_attr_impl<::cudaDevAttrTccDriver, bool>
11497
{};
11598
// TODO: give this a strong type for kilohertz
11699
template <>
117100
struct __dev_attr<::cudaDevAttrMemoryClockRate> //
118-
: __dev_attr_with_type<::cudaDevAttrMemoryClockRate, int>
101+
: __dev_attr_impl<::cudaDevAttrMemoryClockRate, int>
119102
{};
120103
// TODO: give this a strong type for bits
121104
template <>
122105
struct __dev_attr<::cudaDevAttrGlobalMemoryBusWidth> //
123-
: __dev_attr_with_type<::cudaDevAttrGlobalMemoryBusWidth, int>
106+
: __dev_attr_impl<::cudaDevAttrGlobalMemoryBusWidth, int>
124107
{};
125108
// TODO: give this a strong type for bytes
126109
template <>
127110
struct __dev_attr<::cudaDevAttrL2CacheSize> //
128-
: __dev_attr_with_type<::cudaDevAttrL2CacheSize, int>
111+
: __dev_attr_impl<::cudaDevAttrL2CacheSize, int>
129112
{};
130113
template <>
131114
struct __dev_attr<::cudaDevAttrUnifiedAddressing> //
132-
: __dev_attr_with_type<::cudaDevAttrUnifiedAddressing, bool>
115+
: __dev_attr_impl<::cudaDevAttrUnifiedAddressing, bool>
133116
{};
134117
template <>
135118
struct __dev_attr<::cudaDevAttrStreamPrioritiesSupported> //
136-
: __dev_attr_with_type<::cudaDevAttrStreamPrioritiesSupported, bool>
119+
: __dev_attr_impl<::cudaDevAttrStreamPrioritiesSupported, bool>
137120
{};
138121
template <>
139122
struct __dev_attr<::cudaDevAttrGlobalL1CacheSupported> //
140-
: __dev_attr_with_type<::cudaDevAttrGlobalL1CacheSupported, bool>
123+
: __dev_attr_impl<::cudaDevAttrGlobalL1CacheSupported, bool>
141124
{};
142125
template <>
143126
struct __dev_attr<::cudaDevAttrLocalL1CacheSupported> //
144-
: __dev_attr_with_type<::cudaDevAttrLocalL1CacheSupported, bool>
127+
: __dev_attr_impl<::cudaDevAttrLocalL1CacheSupported, bool>
145128
{};
146129
template <>
147130
struct __dev_attr<::cudaDevAttrManagedMemory> //
148-
: __dev_attr_with_type<::cudaDevAttrManagedMemory, bool>
131+
: __dev_attr_impl<::cudaDevAttrManagedMemory, bool>
149132
{};
150133
template <>
151134
struct __dev_attr<::cudaDevAttrIsMultiGpuBoard> //
152-
: __dev_attr_with_type<::cudaDevAttrIsMultiGpuBoard, bool>
135+
: __dev_attr_impl<::cudaDevAttrIsMultiGpuBoard, bool>
153136
{};
154137
template <>
155138
struct __dev_attr<::cudaDevAttrHostNativeAtomicSupported> //
156-
: __dev_attr_with_type<::cudaDevAttrHostNativeAtomicSupported, bool>
139+
: __dev_attr_impl<::cudaDevAttrHostNativeAtomicSupported, bool>
157140
{};
158141
template <>
159142
struct __dev_attr<::cudaDevAttrPageableMemoryAccess> //
160-
: __dev_attr_with_type<::cudaDevAttrPageableMemoryAccess, bool>
143+
: __dev_attr_impl<::cudaDevAttrPageableMemoryAccess, bool>
161144
{};
162145
template <>
163146
struct __dev_attr<::cudaDevAttrConcurrentManagedAccess> //
164-
: __dev_attr_with_type<::cudaDevAttrConcurrentManagedAccess, bool>
147+
: __dev_attr_impl<::cudaDevAttrConcurrentManagedAccess, bool>
165148
{};
166149
template <>
167150
struct __dev_attr<::cudaDevAttrComputePreemptionSupported> //
168-
: __dev_attr_with_type<::cudaDevAttrComputePreemptionSupported, bool>
151+
: __dev_attr_impl<::cudaDevAttrComputePreemptionSupported, bool>
169152
{};
170153
template <>
171154
struct __dev_attr<::cudaDevAttrCanUseHostPointerForRegisteredMem> //
172-
: __dev_attr_with_type<::cudaDevAttrCanUseHostPointerForRegisteredMem, bool>
155+
: __dev_attr_impl<::cudaDevAttrCanUseHostPointerForRegisteredMem, bool>
173156
{};
174157
template <>
175158
struct __dev_attr<::cudaDevAttrCooperativeLaunch> //
176-
: __dev_attr_with_type<::cudaDevAttrCooperativeLaunch, bool>
159+
: __dev_attr_impl<::cudaDevAttrCooperativeLaunch, bool>
177160
{};
178161
template <>
179162
struct __dev_attr<::cudaDevAttrCooperativeMultiDeviceLaunch> //
180-
: __dev_attr_with_type<::cudaDevAttrCooperativeMultiDeviceLaunch, bool>
163+
: __dev_attr_impl<::cudaDevAttrCooperativeMultiDeviceLaunch, bool>
181164
{};
182165
template <>
183166
struct __dev_attr<::cudaDevAttrCanFlushRemoteWrites> //
184-
: __dev_attr_with_type<::cudaDevAttrCanFlushRemoteWrites, bool>
167+
: __dev_attr_impl<::cudaDevAttrCanFlushRemoteWrites, bool>
185168
{};
186169
template <>
187170
struct __dev_attr<::cudaDevAttrHostRegisterSupported> //
188-
: __dev_attr_with_type<::cudaDevAttrHostRegisterSupported, bool>
171+
: __dev_attr_impl<::cudaDevAttrHostRegisterSupported, bool>
189172
{};
190173
template <>
191174
struct __dev_attr<::cudaDevAttrDirectManagedMemAccessFromHost> //
192-
: __dev_attr_with_type<::cudaDevAttrDirectManagedMemAccessFromHost, bool>
175+
: __dev_attr_impl<::cudaDevAttrDirectManagedMemAccessFromHost, bool>
193176
{};
194177
template <>
195178
struct __dev_attr<::cudaDevAttrSparseCudaArraySupported> //
196-
: __dev_attr_with_type<::cudaDevAttrSparseCudaArraySupported, bool>
179+
: __dev_attr_impl<::cudaDevAttrSparseCudaArraySupported, bool>
197180
{};
198181
template <>
199182
struct __dev_attr<::cudaDevAttrMemoryPoolsSupported> //
200-
: __dev_attr_with_type<::cudaDevAttrMemoryPoolsSupported, bool>
183+
: __dev_attr_impl<::cudaDevAttrMemoryPoolsSupported, bool>
201184
{};
202185
template <>
203186
struct __dev_attr<::cudaDevAttrGPUDirectRDMASupported> //
204-
: __dev_attr_with_type<::cudaDevAttrGPUDirectRDMASupported, bool>
187+
: __dev_attr_impl<::cudaDevAttrGPUDirectRDMASupported, bool>
205188
{};
206189
template <>
207190
struct __dev_attr<::cudaDevAttrDeferredMappingCudaArraySupported> //
208-
: __dev_attr_with_type<::cudaDevAttrDeferredMappingCudaArraySupported, bool>
191+
: __dev_attr_impl<::cudaDevAttrDeferredMappingCudaArraySupported, bool>
209192
{};
210193
template <>
211194
struct __dev_attr<::cudaDevAttrIpcEventSupport> //
212-
: __dev_attr_with_type<::cudaDevAttrIpcEventSupport, bool>
195+
: __dev_attr_impl<::cudaDevAttrIpcEventSupport, bool>
213196
{};
214197
template <>
215198
struct __dev_attr<::cudaDevAttrPageableMemoryAccessUsesHostPageTables>
216-
: __dev_attr_with_type<::cudaDevAttrPageableMemoryAccessUsesHostPageTables, bool>
199+
: __dev_attr_impl<::cudaDevAttrPageableMemoryAccessUsesHostPageTables, bool>
217200
{};
218201
template <>
219202
struct __dev_attr<::cudaDevAttrHostRegisterReadOnlySupported> //
220-
: __dev_attr_with_type<::cudaDevAttrHostRegisterReadOnlySupported, bool>
203+
: __dev_attr_impl<::cudaDevAttrHostRegisterReadOnlySupported, bool>
221204
{};
222205
template <>
223206
struct __dev_attr<::cudaDevAttrGPUDirectRDMAFlushWritesOptions> //
224-
: __dev_attr_with_type<::cudaDevAttrGPUDirectRDMAFlushWritesOptions, ::cudaFlushGPUDirectRDMAWritesOptions>
207+
: __dev_attr_impl<::cudaDevAttrGPUDirectRDMAFlushWritesOptions, ::cudaFlushGPUDirectRDMAWritesOptions>
225208
{
226209
static constexpr type host = ::cudaFlushGPUDirectRDMAWritesOptionHost;
227210
static constexpr type mem_ops = ::cudaFlushGPUDirectRDMAWritesOptionMemOps;
228211
};
229212
template <>
230213
struct __dev_attr<::cudaDevAttrGPUDirectRDMAWritesOrdering> //
231-
: __dev_attr_with_type<::cudaDevAttrGPUDirectRDMAWritesOrdering, ::cudaGPUDirectRDMAWritesOrdering>
214+
: __dev_attr_impl<::cudaDevAttrGPUDirectRDMAWritesOrdering, ::cudaGPUDirectRDMAWritesOrdering>
232215
{
233216
static constexpr type none = ::cudaGPUDirectRDMAWritesOrderingNone;
234217
static constexpr type owner = ::cudaGPUDirectRDMAWritesOrderingOwner;
235218
static constexpr type all_devices = ::cudaGPUDirectRDMAWritesOrderingAllDevices;
236219
};
237220
template <>
238221
struct __dev_attr<::cudaDevAttrMemoryPoolSupportedHandleTypes> //
239-
: __dev_attr_with_type<::cudaDevAttrMemoryPoolSupportedHandleTypes, ::cudaMemAllocationHandleType>
222+
: __dev_attr_impl<::cudaDevAttrMemoryPoolSupportedHandleTypes, ::cudaMemAllocationHandleType>
240223
{
241224
static constexpr type none = ::cudaMemHandleTypeNone;
242225
static constexpr type posix_file_descriptor = ::cudaMemHandleTypePosixFileDescriptor;
@@ -251,7 +234,7 @@ struct __dev_attr<::cudaDevAttrMemoryPoolSupportedHandleTypes> //
251234
#if CUDART_VERSION >= 12020
252235
template <>
253236
struct __dev_attr<::cudaDevAttrNumaConfig> //
254-
: __dev_attr_with_type<::cudaDevAttrNumaConfig, ::cudaDeviceNumaConfig>
237+
: __dev_attr_impl<::cudaDevAttrNumaConfig, ::cudaDeviceNumaConfig>
255238
{
256239
static constexpr type none = ::cudaDeviceNumaConfigNone;
257240
static constexpr type numa_node = ::cudaDeviceNumaConfigNumaNode;

0 commit comments

Comments
 (0)