Skip to content

Commit 6cab301

Browse files
authored
Merge branch 'main' into strides-compact
2 parents c559674 + 1117366 commit 6cab301

File tree

1 file changed

+262
-3
lines changed

1 file changed

+262
-3
lines changed

include/dlpack/dlpack.h

Lines changed: 262 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*!
2-
* Copyright (c) 2017 by Contributors
2+
* Copyright (c) 2017 - by Contributors
33
* \file dlpack.h
44
* \brief The common header of DLPack.
55
*/
@@ -338,7 +338,7 @@ typedef struct DLManagedTensor {
338338
*
339339
* \note This is the current standard DLPack exchange data structure.
340340
*/
341-
struct DLManagedTensorVersioned {
341+
typedef struct DLManagedTensorVersioned {
342342
/*!
343343
* \brief The API and ABI version of the current managed Tensor
344344
*/
@@ -372,7 +372,266 @@ struct DLManagedTensorVersioned {
372372
uint64_t flags;
373373
/*! \brief DLTensor which is being memory managed */
374374
DLTensor dl_tensor;
375-
};
375+
} DLManagedTensorVersioned;
376+
377+
//----------------------------------------------------------------------
378+
// DLPack `__c_dlpack_exchange_api__` fast exchange protocol definitions
379+
//----------------------------------------------------------------------
380+
/*!
381+
* \brief Request a producer library to create a new tensor.
382+
*
383+
* Create a new `DLManagedTensorVersioned` within the context of the producer
384+
* library. The allocation is defined via the prototype DLTensor.
385+
*
386+
* This function is exposed by the framework through the DLPackExchangeAPI.
387+
*
388+
* \param prototype The prototype DLTensor. Only the dtype, ndim, shape,
389+
* and device fields are used.
390+
* \param out The output DLManagedTensorVersioned.
391+
* \param error_ctx Context for `SetError`.
392+
* \param SetError The function to set the error.
393+
* \return The owning DLManagedTensorVersioned* or NULL on failure.
394+
* SetError is called exactly when NULL is returned (the implementor
395+
* must ensure this).
396+
* \note - As a C function, must not thrown C++ exceptions.
397+
* - Error propagation via SetError to avoid any direct need
398+
* of Python API. Due to this `SetError` may have to ensure the GIL is
399+
* held since it will presumably set a Python error.
400+
*
401+
* \sa DLPackExchangeAPI
402+
*/
403+
typedef int (*DLPackManagedTensorAllocator)( //
404+
DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx, //
405+
void (*SetError)(void* error_ctx, const char* kind, const char* message) //
406+
);
407+
408+
/*!
409+
* \brief Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned.
410+
*
411+
* This function does not perform any stream synchronization. The consumer should query
412+
* DLPackCurrentWorkStream to get the current work stream and launch kernels on it.
413+
*
414+
* This function is exposed by the framework through the DLPackExchangeAPI.
415+
*
416+
* \param py_object The Python object to convert. Must have the same type
417+
* as the one the `DLPackExchangeAPI` was discovered from.
418+
* \return The owning DLManagedTensorVersioned* or NULL on failure with a
419+
* Python exception set. If the data cannot be described using DLPack
420+
* this should be a BufferError if possible.
421+
* \note - As a C function, must not thrown C++ exceptions.
422+
*
423+
* \sa DLPackExchangeAPI, DLPackCurrentWorkStream
424+
*/
425+
typedef int (*DLPackManagedTensorFromPyObjectNoSync)( //
426+
void* py_object, //
427+
DLManagedTensorVersioned** out //
428+
);
429+
430+
/*!
431+
* \brief Exports a PyObject* Tensor/NDArray to a provided DLTensor.
432+
*
433+
* This function provides a faster interface for temporary, non-owning, exchange.
434+
* The producer (implementor) still owns the memory of data, strides, shape.
435+
* The liveness of the DLTensor and the data it views is only guaranteed until
436+
* control is returned.
437+
*
438+
* This function currently assumes that the producer (implementor) can fill
439+
* in the DLTensor shape and strides without the need for temporary allocations.
440+
*
441+
* This function does not perform any stream synchronization. The consumer should query
442+
* DLPackCurrentWorkStream to get the current work stream and launch kernels on it.
443+
*
444+
* This function is exposed by the framework through the DLPackExchangeAPI.
445+
*
446+
* \param py_object The Python object to convert. Must have the same type
447+
* as the one the `DLPackExchangeAPI` was discovered from.
448+
* \param out The output DLTensor, whose space is pre-allocated on stack.
449+
* \return 0 on success, -1 on failure with a Python exception set.
450+
* \note - As a C function, must not thrown C++ exceptions.
451+
*
452+
* \sa DLPackExchangeAPI, DLPackCurrentWorkStream
453+
*/
454+
typedef int (*DLPackDLTensorFromPyObjectNoSync)( //
455+
void* py_object, //
456+
DLTensor* out //
457+
);
458+
459+
/*!
460+
* \brief Obtain the current work stream of a device.
461+
*
462+
* Obtain the current work stream of a device from the producer framework.
463+
* For example, it should map to torch.cuda.current_stream in PyTorch.
464+
*
465+
* When device_type is kDLCPU, the consumer do not have to query the stream
466+
* and the producer can simply return NULL when queried.
467+
* The consumer do not have to do anything on stream sync or setting.
468+
* So CPU only framework can just provide a dummy implementation that
469+
* always set out_current_stream[0] to NULL.
470+
*
471+
* \param device_type The device type.
472+
* \param device_id The device id.
473+
* \param out_current_stream The output current work stream.
474+
*
475+
* \return 0 on success, -1 on failure with a Python exception set.
476+
* \note - As a C function, must not thrown C++ exceptions.
477+
*
478+
* \sa DLPackExchangeAPI
479+
*/
480+
typedef int (*DLPackCurrentWorkStream)( //
481+
DLDeviceType device_type, //
482+
int32_t device_id, //
483+
void** out_current_stream //
484+
);
485+
486+
/*!
487+
* \brief Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray.
488+
*
489+
* Convert an owning DLManagedTensorVersioned* to the Python tensor of the
490+
* producer (implementor) library with the correct type.
491+
*
492+
* This function does not perform any stream synchronization.
493+
*
494+
* This function is exposed by the framework through the DLPackExchangeAPI.
495+
*
496+
* \param tensor The DLManagedTensorVersioned to convert the ownership of the
497+
* tensor is stolen.
498+
* \param out_py_object The output Python object.
499+
* \return 0 on success, -1 on failure with a Python exception set.
500+
*
501+
* \sa DLPackExchangeAPI
502+
*/
503+
typedef int (*DLPackManagedTensorToPyObjectNoSync)( //
504+
DLManagedTensorVersioned* tensor, //
505+
void** out_py_object //
506+
);
507+
508+
/*!
509+
* \brief DLPackExchangeAPI stable header.
510+
* \sa DLPackExchangeAPI
511+
*/
512+
typedef struct DLPackExchangeAPIHeader {
513+
/*!
514+
* \brief The provided DLPack version the consumer must check major version
515+
* compatibility before using this struct.
516+
*/
517+
DLPackVersion version;
518+
/*!
519+
* \brief Optional pointer to an older DLPackExchangeAPI in the chain.
520+
*
521+
* It must be NULL if the framework does not support older versions.
522+
* If the current major version is larger than the one supported by the
523+
* consumer, the consumer may walk this to find an earlier supported version.
524+
*
525+
* \sa DLPackExchangeAPI
526+
*/
527+
struct DLPackExchangeAPIHeader* prev_api;
528+
} DLPackExchangeAPIHeader;
529+
530+
/*!
531+
* \brief Framework-specific function pointers table for DLPack exchange.
532+
*
533+
* Additionally to `__dlpack__()` we define a C function table sharable by
534+
* Python implementations via `__c_dlpack_exchange_api__`.
535+
* This attribute must be set on the type as a Python integer compatible
536+
* with `PyLong_FromVoidPtr`/`PyLong_AsVoidPtr`.
537+
*
538+
* A consumer library may use a pattern such as:
539+
*
540+
* \code
541+
*
542+
* PyObject *api_obj = type(tensor_obj).__c_dlpack_exchange_api__; // as C-code
543+
* MyDLPackExchangeAPI *api = PyLong_AsVoidPtr(api_obj);
544+
* if (api == NULL && PyErr_Occurred()) { goto handle_error; }
545+
*
546+
* \endcode
547+
*
548+
* Note that this must be defined on the type. The consumer should look up the
549+
* attribute on the type and may cache the result for each unique type.
550+
*
551+
* The precise API table is given by:
552+
* \code
553+
* struct MyDLPackExchangeAPI : public DLPackExchangeAPI {
554+
* MyDLPackExchangeAPI() {
555+
* header.version.major = DLPACK_MAJOR_VERSION;
556+
* header.version.minor = DLPACK_MINOR_VERSION;
557+
* header.prev_version_api = nullptr;
558+
*
559+
* managed_tensor_allocator = MyDLPackManagedTensorAllocator;
560+
* managed_tensor_from_py_object_no_sync = MyDLPackManagedTensorFromPyObjectNoSync;
561+
* managed_tensor_to_py_object_no_sync = MyDLPackManagedTensorToPyObjectNoSync;
562+
* dltensor_from_py_object_no_sync = MyDLPackDLTensorFromPyObjectNoSync;
563+
* current_work_stream = MyDLPackCurrentWorkStream;
564+
* }
565+
*
566+
* static const DLPackExchangeAPI* Global() {
567+
* static MyDLPackExchangeAPI inst;
568+
* return &inst;
569+
* }
570+
* };
571+
* \endcode
572+
*
573+
* Guidelines for leveraging DLPackExchangeAPI:
574+
*
575+
* There are generally two kinds of consumer needs for DLPack exchange:
576+
* - N0: library support, where consumer.kernel(x, y, z) would like to run a kernel
577+
* with the data from x, y, z. The consumer is also expected to run the kernel with the same
578+
* stream context as the producer. For example, when x, y, z is torch.Tensor,
579+
* consumer should query exchange_api->current_work_stream to get the
580+
* current stream and launch the kernel with the same stream.
581+
* This setup is necessary for no synchronization in kernel launch and maximum compatibility
582+
* with CUDA graph capture in the producer.
583+
* This is the desirable behavior for library extension support for frameworks like PyTorch.
584+
* - N1: data ingestion and retention
585+
*
586+
* Note that obj.__dlpack__() API should provide useful ways for N1.
587+
* The primary focus of the current DLPackExchangeAPI is to enable faster exchange N0
588+
* with the support of the function pointer current_work_stream.
589+
*
590+
* Array/Tensor libraries should statically create and initialize this structure
591+
* then return a pointer to DLPackExchangeAPI as an int value in Tensor/Array.
592+
* The DLPackExchangeAPI* must stay alive throughout the lifetime of the process.
593+
*
594+
* One simple way to do so is to create a static instance of DLPackExchangeAPI
595+
* within the framework and return a pointer to it. The following code
596+
* shows an example to do so in C++. It should also be reasonably easy
597+
* to do so in other languages.
598+
*/
599+
typedef struct DLPackExchangeAPI {
600+
/*!
601+
* \brief The header that remains stable across versions.
602+
*/
603+
DLPackExchangeAPIHeader header;
604+
/*!
605+
* \brief Producer function pointer for DLPackManagedTensorAllocator
606+
* This function must not be NULL.
607+
* \sa DLPackManagedTensorAllocator
608+
*/
609+
DLPackManagedTensorAllocator managed_tensor_allocator;
610+
/*!
611+
* \brief Producer function pointer for DLPackManagedTensorFromPyObject
612+
* This function must be not NULL.
613+
* \sa DLPackManagedTensorFromPyObject
614+
*/
615+
DLPackManagedTensorFromPyObjectNoSync managed_tensor_from_py_object_no_sync;
616+
/*!
617+
* \brief Producer function pointer for DLPackManagedTensorToPyObject
618+
* This function must be not NULL.
619+
* \sa DLPackManagedTensorToPyObject
620+
*/
621+
DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync;
622+
/*!
623+
* \brief Producer function pointer for DLPackDLTensorFromPyObject
624+
* This function can be NULL when the producer does not support this function.
625+
* \sa DLPackDLTensorFromPyObjectNoSync
626+
*/
627+
DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync;
628+
/*!
629+
* \brief Producer function pointer for DLPackCurrentWorkStream
630+
* This function must be not NULL.
631+
* \sa DLPackCurrentWorkStream
632+
*/
633+
DLPackCurrentWorkStream current_work_stream;
634+
} DLPackExchangeAPI;
376635

377636
#ifdef __cplusplus
378637
} // DLPACK_EXTERN_C

0 commit comments

Comments
 (0)