@@ -32,31 +32,8 @@ namespace cuda::experimental
3232namespace detail
3333{
3434
35- _CCCL_NODISCARD inline int __get_attr_impl (::cudaDeviceAttr __attr, int __dev)
36- {
37- int __value = 0 ;
38- _CCCL_TRY_CUDA_API (::cudaDeviceGetAttribute, " failed to get device attribute" , &__value, __attr, __dev);
39- return __value;
40- }
41-
42- template <::cudaDeviceAttr _Attr>
43- struct __dev_attr
44- {
45- using type = int ;
46-
47- _CCCL_NODISCARD constexpr operator ::cudaDeviceAttr () const noexcept
48- {
49- return _Attr;
50- }
51-
52- _CCCL_NODISCARD type operator ()(device_ref __dev_id) const
53- {
54- return __get_attr_impl (_Attr, __dev_id.get ());
55- }
56- };
57-
5835template <::cudaDeviceAttr _Attr, typename _Type>
59- struct __dev_attr_with_type
36+ struct __dev_attr_impl
6037{
6138 using type = _Type;
6239
@@ -67,176 +44,182 @@ struct __dev_attr_with_type
6744
6845 _CCCL_NODISCARD type operator ()(device_ref __dev_id) const
6946 {
70- return static_cast <type>(__get_attr_impl (_Attr, __dev_id.get ()));
47+ int __value = 0 ;
48+ _CCCL_TRY_CUDA_API (::cudaDeviceGetAttribute, " failed to get device attribute" , &__value, _Attr, __dev_id.get ());
49+ return static_cast <type>(__value);
7150 }
7251};
7352
53+ template <::cudaDeviceAttr _Attr>
54+ struct __dev_attr : __dev_attr_impl<_Attr, int >
55+ {};
56+
7457// TODO: give this a strong type for kilohertz
7558template <>
7659struct __dev_attr <::cudaDevAttrClockRate> //
77- : __dev_attr_with_type <::cudaDevAttrClockRate, int >
60+ : __dev_attr_impl <::cudaDevAttrClockRate, int >
7861{};
7962template <>
8063struct __dev_attr <::cudaDevAttrGpuOverlap> //
81- : __dev_attr_with_type <::cudaDevAttrGpuOverlap, bool >
64+ : __dev_attr_impl <::cudaDevAttrGpuOverlap, bool >
8265{};
8366template <>
8467struct __dev_attr <::cudaDevAttrKernelExecTimeout> //
85- : __dev_attr_with_type <::cudaDevAttrKernelExecTimeout, bool >
68+ : __dev_attr_impl <::cudaDevAttrKernelExecTimeout, bool >
8669{};
8770template <>
8871struct __dev_attr <::cudaDevAttrIntegrated> //
89- : __dev_attr_with_type <::cudaDevAttrIntegrated, bool >
72+ : __dev_attr_impl <::cudaDevAttrIntegrated, bool >
9073{};
9174template <>
9275struct __dev_attr <::cudaDevAttrCanMapHostMemory> //
93- : __dev_attr_with_type <::cudaDevAttrCanMapHostMemory, bool >
76+ : __dev_attr_impl <::cudaDevAttrCanMapHostMemory, bool >
9477{};
9578template <>
9679struct __dev_attr <::cudaDevAttrComputeMode> //
97- : __dev_attr_with_type <::cudaDevAttrComputeMode, ::cudaComputeMode>
80+ : __dev_attr_impl <::cudaDevAttrComputeMode, ::cudaComputeMode>
9881{
9982 static constexpr type default_mode = cudaComputeModeDefault;
10083 static constexpr type prohibited_mode = cudaComputeModeProhibited;
10184 static constexpr type exclusive_process_mode = cudaComputeModeExclusiveProcess;
10285};
10386template <>
10487struct __dev_attr <::cudaDevAttrConcurrentKernels> //
105- : __dev_attr_with_type <::cudaDevAttrConcurrentKernels, bool >
88+ : __dev_attr_impl <::cudaDevAttrConcurrentKernels, bool >
10689{};
10790template <>
10891struct __dev_attr <::cudaDevAttrEccEnabled> //
109- : __dev_attr_with_type <::cudaDevAttrEccEnabled, bool >
92+ : __dev_attr_impl <::cudaDevAttrEccEnabled, bool >
11093{};
11194template <>
11295struct __dev_attr <::cudaDevAttrTccDriver> //
113- : __dev_attr_with_type <::cudaDevAttrTccDriver, bool >
96+ : __dev_attr_impl <::cudaDevAttrTccDriver, bool >
11497{};
11598// TODO: give this a strong type for kilohertz
11699template <>
117100struct __dev_attr <::cudaDevAttrMemoryClockRate> //
118- : __dev_attr_with_type <::cudaDevAttrMemoryClockRate, int >
101+ : __dev_attr_impl <::cudaDevAttrMemoryClockRate, int >
119102{};
120103// TODO: give this a strong type for bits
121104template <>
122105struct __dev_attr <::cudaDevAttrGlobalMemoryBusWidth> //
123- : __dev_attr_with_type <::cudaDevAttrGlobalMemoryBusWidth, int >
106+ : __dev_attr_impl <::cudaDevAttrGlobalMemoryBusWidth, int >
124107{};
125108// TODO: give this a strong type for bytes
126109template <>
127110struct __dev_attr <::cudaDevAttrL2CacheSize> //
128- : __dev_attr_with_type <::cudaDevAttrL2CacheSize, int >
111+ : __dev_attr_impl <::cudaDevAttrL2CacheSize, int >
129112{};
130113template <>
131114struct __dev_attr <::cudaDevAttrUnifiedAddressing> //
132- : __dev_attr_with_type <::cudaDevAttrUnifiedAddressing, bool >
115+ : __dev_attr_impl <::cudaDevAttrUnifiedAddressing, bool >
133116{};
134117template <>
135118struct __dev_attr <::cudaDevAttrStreamPrioritiesSupported> //
136- : __dev_attr_with_type <::cudaDevAttrStreamPrioritiesSupported, bool >
119+ : __dev_attr_impl <::cudaDevAttrStreamPrioritiesSupported, bool >
137120{};
138121template <>
139122struct __dev_attr <::cudaDevAttrGlobalL1CacheSupported> //
140- : __dev_attr_with_type <::cudaDevAttrGlobalL1CacheSupported, bool >
123+ : __dev_attr_impl <::cudaDevAttrGlobalL1CacheSupported, bool >
141124{};
142125template <>
143126struct __dev_attr <::cudaDevAttrLocalL1CacheSupported> //
144- : __dev_attr_with_type <::cudaDevAttrLocalL1CacheSupported, bool >
127+ : __dev_attr_impl <::cudaDevAttrLocalL1CacheSupported, bool >
145128{};
146129template <>
147130struct __dev_attr <::cudaDevAttrManagedMemory> //
148- : __dev_attr_with_type <::cudaDevAttrManagedMemory, bool >
131+ : __dev_attr_impl <::cudaDevAttrManagedMemory, bool >
149132{};
150133template <>
151134struct __dev_attr <::cudaDevAttrIsMultiGpuBoard> //
152- : __dev_attr_with_type <::cudaDevAttrIsMultiGpuBoard, bool >
135+ : __dev_attr_impl <::cudaDevAttrIsMultiGpuBoard, bool >
153136{};
154137template <>
155138struct __dev_attr <::cudaDevAttrHostNativeAtomicSupported> //
156- : __dev_attr_with_type <::cudaDevAttrHostNativeAtomicSupported, bool >
139+ : __dev_attr_impl <::cudaDevAttrHostNativeAtomicSupported, bool >
157140{};
158141template <>
159142struct __dev_attr <::cudaDevAttrPageableMemoryAccess> //
160- : __dev_attr_with_type <::cudaDevAttrPageableMemoryAccess, bool >
143+ : __dev_attr_impl <::cudaDevAttrPageableMemoryAccess, bool >
161144{};
162145template <>
163146struct __dev_attr <::cudaDevAttrConcurrentManagedAccess> //
164- : __dev_attr_with_type <::cudaDevAttrConcurrentManagedAccess, bool >
147+ : __dev_attr_impl <::cudaDevAttrConcurrentManagedAccess, bool >
165148{};
166149template <>
167150struct __dev_attr <::cudaDevAttrComputePreemptionSupported> //
168- : __dev_attr_with_type <::cudaDevAttrComputePreemptionSupported, bool >
151+ : __dev_attr_impl <::cudaDevAttrComputePreemptionSupported, bool >
169152{};
170153template <>
171154struct __dev_attr <::cudaDevAttrCanUseHostPointerForRegisteredMem> //
172- : __dev_attr_with_type <::cudaDevAttrCanUseHostPointerForRegisteredMem, bool >
155+ : __dev_attr_impl <::cudaDevAttrCanUseHostPointerForRegisteredMem, bool >
173156{};
174157template <>
175158struct __dev_attr <::cudaDevAttrCooperativeLaunch> //
176- : __dev_attr_with_type <::cudaDevAttrCooperativeLaunch, bool >
159+ : __dev_attr_impl <::cudaDevAttrCooperativeLaunch, bool >
177160{};
178161template <>
179162struct __dev_attr <::cudaDevAttrCooperativeMultiDeviceLaunch> //
180- : __dev_attr_with_type <::cudaDevAttrCooperativeMultiDeviceLaunch, bool >
163+ : __dev_attr_impl <::cudaDevAttrCooperativeMultiDeviceLaunch, bool >
181164{};
182165template <>
183166struct __dev_attr <::cudaDevAttrCanFlushRemoteWrites> //
184- : __dev_attr_with_type <::cudaDevAttrCanFlushRemoteWrites, bool >
167+ : __dev_attr_impl <::cudaDevAttrCanFlushRemoteWrites, bool >
185168{};
186169template <>
187170struct __dev_attr <::cudaDevAttrHostRegisterSupported> //
188- : __dev_attr_with_type <::cudaDevAttrHostRegisterSupported, bool >
171+ : __dev_attr_impl <::cudaDevAttrHostRegisterSupported, bool >
189172{};
190173template <>
191174struct __dev_attr <::cudaDevAttrDirectManagedMemAccessFromHost> //
192- : __dev_attr_with_type <::cudaDevAttrDirectManagedMemAccessFromHost, bool >
175+ : __dev_attr_impl <::cudaDevAttrDirectManagedMemAccessFromHost, bool >
193176{};
194177template <>
195178struct __dev_attr <::cudaDevAttrSparseCudaArraySupported> //
196- : __dev_attr_with_type <::cudaDevAttrSparseCudaArraySupported, bool >
179+ : __dev_attr_impl <::cudaDevAttrSparseCudaArraySupported, bool >
197180{};
198181template <>
199182struct __dev_attr <::cudaDevAttrMemoryPoolsSupported> //
200- : __dev_attr_with_type <::cudaDevAttrMemoryPoolsSupported, bool >
183+ : __dev_attr_impl <::cudaDevAttrMemoryPoolsSupported, bool >
201184{};
202185template <>
203186struct __dev_attr <::cudaDevAttrGPUDirectRDMASupported> //
204- : __dev_attr_with_type <::cudaDevAttrGPUDirectRDMASupported, bool >
187+ : __dev_attr_impl <::cudaDevAttrGPUDirectRDMASupported, bool >
205188{};
206189template <>
207190struct __dev_attr <::cudaDevAttrDeferredMappingCudaArraySupported> //
208- : __dev_attr_with_type <::cudaDevAttrDeferredMappingCudaArraySupported, bool >
191+ : __dev_attr_impl <::cudaDevAttrDeferredMappingCudaArraySupported, bool >
209192{};
210193template <>
211194struct __dev_attr <::cudaDevAttrIpcEventSupport> //
212- : __dev_attr_with_type <::cudaDevAttrIpcEventSupport, bool >
195+ : __dev_attr_impl <::cudaDevAttrIpcEventSupport, bool >
213196{};
214197template <>
215198struct __dev_attr <::cudaDevAttrPageableMemoryAccessUsesHostPageTables>
216- : __dev_attr_with_type <::cudaDevAttrPageableMemoryAccessUsesHostPageTables, bool >
199+ : __dev_attr_impl <::cudaDevAttrPageableMemoryAccessUsesHostPageTables, bool >
217200{};
218201template <>
219202struct __dev_attr <::cudaDevAttrHostRegisterReadOnlySupported> //
220- : __dev_attr_with_type <::cudaDevAttrHostRegisterReadOnlySupported, bool >
203+ : __dev_attr_impl <::cudaDevAttrHostRegisterReadOnlySupported, bool >
221204{};
222205template <>
223206struct __dev_attr <::cudaDevAttrGPUDirectRDMAFlushWritesOptions> //
224- : __dev_attr_with_type <::cudaDevAttrGPUDirectRDMAFlushWritesOptions, ::cudaFlushGPUDirectRDMAWritesOptions>
207+ : __dev_attr_impl <::cudaDevAttrGPUDirectRDMAFlushWritesOptions, ::cudaFlushGPUDirectRDMAWritesOptions>
225208{
226209 static constexpr type host = ::cudaFlushGPUDirectRDMAWritesOptionHost;
227210 static constexpr type mem_ops = ::cudaFlushGPUDirectRDMAWritesOptionMemOps;
228211};
229212template <>
230213struct __dev_attr <::cudaDevAttrGPUDirectRDMAWritesOrdering> //
231- : __dev_attr_with_type <::cudaDevAttrGPUDirectRDMAWritesOrdering, ::cudaGPUDirectRDMAWritesOrdering>
214+ : __dev_attr_impl <::cudaDevAttrGPUDirectRDMAWritesOrdering, ::cudaGPUDirectRDMAWritesOrdering>
232215{
233216 static constexpr type none = ::cudaGPUDirectRDMAWritesOrderingNone;
234217 static constexpr type owner = ::cudaGPUDirectRDMAWritesOrderingOwner;
235218 static constexpr type all_devices = ::cudaGPUDirectRDMAWritesOrderingAllDevices;
236219};
237220template <>
238221struct __dev_attr <::cudaDevAttrMemoryPoolSupportedHandleTypes> //
239- : __dev_attr_with_type <::cudaDevAttrMemoryPoolSupportedHandleTypes, ::cudaMemAllocationHandleType>
222+ : __dev_attr_impl <::cudaDevAttrMemoryPoolSupportedHandleTypes, ::cudaMemAllocationHandleType>
240223{
241224 static constexpr type none = ::cudaMemHandleTypeNone;
242225 static constexpr type posix_file_descriptor = ::cudaMemHandleTypePosixFileDescriptor;
@@ -251,7 +234,7 @@ struct __dev_attr<::cudaDevAttrMemoryPoolSupportedHandleTypes> //
251234#if CUDART_VERSION >= 12020
252235template <>
253236struct __dev_attr <::cudaDevAttrNumaConfig> //
254- : __dev_attr_with_type <::cudaDevAttrNumaConfig, ::cudaDeviceNumaConfig>
237+ : __dev_attr_impl <::cudaDevAttrNumaConfig, ::cudaDeviceNumaConfig>
255238{
256239 static constexpr type none = ::cudaDeviceNumaConfigNone;
257240 static constexpr type numa_node = ::cudaDeviceNumaConfigNumaNode;
0 commit comments