@@ -108,7 +108,110 @@ private:
108108 }
109109 }
110110
111+ // TODO(bgruber): we want to eventually forward the output tuple to the kernel and optimize writing multiple streams
112+ template <detail::transform::requires_stable_address StableAddress = detail::transform::requires_stable_address::no,
113+ typename ... RandomAccessIteratorsIn,
114+ typename ... RandomAccessIteratorsOut,
115+ typename NumItemsT,
116+ typename Predicate,
117+ typename TransformOp,
118+ typename Env>
119+ CUB_RUNTIME_FUNCTION static cudaError_t TransformInternal (
120+ ::cuda::std::tuple<RandomAccessIteratorsIn...> inputs,
121+ ::cuda::std::tuple<RandomAccessIteratorsOut...> outputs,
122+ NumItemsT num_items,
123+ Predicate predicate,
124+ TransformOp transform_op,
125+ Env env)
126+ {
127+ return TransformInternal<StableAddress>(
128+ ::cuda::std::move (inputs),
129+ ::cuda::make_zip_iterator(::cuda::std::move(outputs)),
130+ num_items,
131+ ::cuda::std::move(predicate),
132+ ::cuda::std::move(transform_op),
133+ ::cuda::std::move(env));
134+ }
135+
111136public:
137+ // ! @rst
138+ // ! Overview
139+ // ! +++++++++++++++++++++++++++++++++++++++++++++
140+ // ! Transforms many input sequences into many output sequence, by applying a transformation operation on corresponding
141+ // ! input elements and writing the tuple result to the corresponding output elements. No guarantee is given on the
142+ // ! identity (i.e. address) of the objects passed to the call operator of the transformation operation.
143+ // !
144+ // ! A Simple Example
145+ // ! +++++++++++++++++++++++++++++++++++++++++++++
146+ // !
147+ // ! .. literalinclude:: ../../../cub/test/catch2_test_device_transform_api.cu
148+ // ! :language: c++
149+ // ! :dedent:
150+ // ! :start-after: example-begin transform-many-many
151+ // ! :end-before: example-end transform-many-many
152+ // !
153+ // ! @endrst
154+ // !
155+ // ! @param inputs A tuple of iterators to the input sequences where num_items elements are read from each. The
156+ // ! iterators' value types must be trivially relocatable.
157+ // ! @param outputs A tuple of iterators to the output sequences where num_items results are written to each. Each
158+ // ! sequence may point to the beginning of one of the input sequences, performing the transformation inplace. Any
159+ // ! output sequence must not overlap with any of the input sequence in any other way.
160+ // ! @param num_items The number of elements in each input and output sequence.
161+ // ! @param transform_op An n-ary function object, where n is the number of input sequences. The input iterators' value
162+ // ! types must be convertible to the parameters of the function object's call operator. The return type of the call
163+ // ! operator must be a tuple where each tuple element is assignable to the corresponding dereferenced output
164+ // ! iterators.
165+ // ! @param env Execution environment, or cudaStream_t. Default is ``cuda::std::execution::env{}``, which will run on
166+ // ! stream\ :sub:`0`
167+ template <typename ... RandomAccessIteratorsIn,
168+ typename ... RandomAccessIteratorsOut,
169+ typename NumItemsT,
170+ typename TransformOp,
171+ typename Env = ::cuda::std::execution::env<>>
172+ CUB_RUNTIME_FUNCTION static cudaError_t Transform (
173+ ::cuda::std::tuple<RandomAccessIteratorsIn...> inputs,
174+ ::cuda::std::tuple<RandomAccessIteratorsOut...> outputs,
175+ NumItemsT num_items,
176+ TransformOp transform_op,
177+ Env env = {})
178+ {
179+ _CCCL_NVTX_RANGE_SCOPE (" cub::DeviceTransform::Transform" );
180+ return TransformInternal (
181+ ::cuda::std::move (inputs),
182+ ::cuda::std::move(outputs),
183+ num_items,
184+ detail::transform::always_true_predicate{},
185+ ::cuda::std::move (transform_op),
186+ ::cuda::std::move(env));
187+ }
188+
189+ #ifndef _CCCL_DOXYGEN_INVOKED // Do not document
190+ // Overload with additional parameters to specify temporary storage. Provided for compatibility with other CUB APIs.
191+ template <typename ... RandomAccessIteratorsIn,
192+ typename ... RandomAccessIteratorsOut,
193+ typename NumItemsT,
194+ typename TransformOp>
195+ CUB_RUNTIME_FUNCTION static cudaError_t Transform (
196+ void * d_temp_storage,
197+ size_t & temp_storage_bytes,
198+ ::cuda::std::tuple<RandomAccessIteratorsIn...> inputs,
199+ ::cuda::std::tuple<RandomAccessIteratorsOut...> outputs,
200+ NumItemsT num_items,
201+ TransformOp transform_op,
202+ cudaStream_t stream = nullptr )
203+ {
204+ if (d_temp_storage == nullptr )
205+ {
206+ temp_storage_bytes = 1 ;
207+ return cudaSuccess;
208+ }
209+
210+ return Transform (
211+ ::cuda::std::move (inputs), ::cuda::std::move(outputs), num_items, ::cuda::std::move(transform_op), stream);
212+ }
213+ #endif // _CCCL_DOXYGEN_INVOKED
214+
112215 // ! @rst
113216 // ! Overview
114217 // ! +++++++++++++++++++++++++++++++++++++++++++++
0 commit comments