Skip to content

Commit d6aaac7

Browse files
tqchenseberg
andcommitted
Incorporate feedbacks from sberg
Co-authored-by: Sebastian Berg <[email protected]>
1 parent bddb25b commit d6aaac7

File tree

1 file changed

+103
-116
lines changed

1 file changed

+103
-116
lines changed

include/dlpack/dlpack.h

Lines changed: 103 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -362,98 +362,79 @@ typedef struct DLManagedTensorVersioned {
362362
DLTensor dl_tensor;
363363
} DLManagedTensorVersioned;
364364

365-
//--------------------------------------------------------------------
366-
// DLPack C functions for speed exchange
367-
//--------------------------------------------------------------------
365+
//----------------------------------------------------------------------
366+
// DLPack `__c_dlpack_exchange_api__` fast exchange protocol definitions
367+
//----------------------------------------------------------------------
368368
/*!
369-
* \brief A generic C-style allocator that exposes allocation of a Tensor/Array.
369+
* \brief Request a producer library to create a new tensor.
370370
*
371-
* This information can then be used to set allocators of a callee to run allocations.
372-
* This information can then be used to set the callee's allocator to perform allocations.
373-
* This function can be exposed by the framework through the DLPackExchangeAPI.
371+
* Create a new `DLManagedTensorVersioned` within the context of the producer
372+
* library. The allocation is defined via the prototype DLTensor.
374373
*
375-
* This particular function does not assume a Python environment; as a result,
376-
* the error handling mechanism is different from Python-related functions.
374+
* This function is exposed by the framework through the DLPackExchangeAPI.
377375
*
378-
* \param prototype The prototype DLTensor to offer details about the device and shape.
379-
* Other field information will be ignored during allocation.
376+
* \param prototype The prototype DLTensor. Only the dtype, ndim, shape,
377+
* and device fields are used.
380378
* \param out The output DLManagedTensorVersioned.
381-
* \param error_ctx The context to set the error.
379+
* \param error_ctx Context for `SetError`.
382380
* \param SetError The function to set the error.
383-
* \return 0 on success, -1 on failure.
384-
* The callee should call SetError(error_ctx, kind, message) to set the error kind and message.
385-
* \note Error propagation via SetError.
381+
* \return The owning DLManagedTensorVersioned* or NULL on failure.
382+
* SetError is called exactly when NULL is returned (the implementor
383+
* must ensure this).
384+
* \note - As a C function, must not thrown C++ exceptions.
385+
* - Error propagation via SetError to avoid any direct need
386+
* of Python API. Due to this `SetError` may have to ensure the GIL is
387+
* held since it will presumably set a Python error.
386388
*
387389
* \sa DLPackExchangeAPI
388390
*/
389-
typedef int (*DLPackManagedTensorAllocator)( //
390-
DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx, //
391-
void (*SetError)(void* error_ctx, const char* kind, const char* message) //
391+
typedef int (*DLPackManagedTensorAllocator)( //
392+
DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx, //
393+
void (*SetError)(void* error_ctx, const char* kind, const char* message) //
392394
);
393395

394396
/*!
395397
* \brief Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned.
396398
*
397-
* This function is a C-style function pointer to quickly convert a PyObject* Tensor/NDArray
398-
* to a DLManagedTensorVersioned without going through the Python interpreter.
399-
*
400399
* This function does not perform any stream synchronization. The consumer should query
401400
* DLPackCurrentWorkStream to get the current work stream and launch kernels on it.
402401
*
403402
* This function is exposed by the framework through the DLPackExchangeAPI.
404403
*
405-
* This information can then be picked up by importers and libraries to perform a fast conversion.
406-
* This function should not throw any exceptions; if it fails, it should return -1 and
407-
* set the error message via PyErr_SetXXX.
408-
*
409-
* \param py_object The Python object to convert; this should be PyObject*.
410-
* We use void* to avoid dependency on Python.h.
411-
*
412-
* \param out The output DLManagedTensorVersioned.
413-
* \return 0 on success, -1 on failure. PyError should be set if -1 is returned.
414-
* \note We use void* to avoid dependency on Python.h, so this specific type is
415-
* not dependent on Python.h and can be copied to dlpack.h.
404+
* \param py_object The Python object to convert. Must have the same type
405+
* as the one the `DLPackExchangeAPI` was discovered from.
406+
* \return The owning DLManagedTensorVersioned* or NULL on failure with a
407+
* Python exception set. If the data cannot be described using DLPack
408+
* this should be a BufferError if possible.
409+
* \note - As a C function, must not thrown C++ exceptions.
416410
*
417411
* \sa DLPackExchangeAPI, DLPackCurrentWorkStream
418412
*/
419-
typedef int (*DLPackManagedTensorFromPyObjectNoSync)( //
420-
void* py_object, //
421-
DLManagedTensorVersioned** out //
413+
typedef int (*DLPackManagedTensorFromPyObjectNoSync)( //
414+
void* py_object, DLManagedTensorVersioned** out //
422415
);
423416

424417
/*!
425-
* \brief Exports a PyObject* Tensor/NDArray to a DLTensor whose space is pre-allocated on stack.
418+
* \brief Exports a PyObject* Tensor/NDArray to a provided DLTensor.
426419
*
427-
* This function is a C-style function pointer to quickly convert a PyObject* Tensor/NDArray
428-
* to a DLTensor whose space is pre-allocated on stack without going through the Python interpreter.
420+
* This function provides a faster interface for temporary, non-owning, exchange.
421+
* The producer (implementor) still owns the memory of data, strides, shape.
422+
* The liveness of the DLTensor and the data it views is only guaranteed until
423+
* control is returned.
429424
*
430-
* This is an non-owning conversion, the producer still owns the memory of data, strides, shape.
431-
* The liveness of DLTensor is only guaranteed until the consumer returns control to the caller.
432-
*
433-
* In the context of this function, we expect the producer to allocated space for data, strides and shape.
425+
* This function currently assumes that the producer (implementor) can fill
426+
* in the DLTensor shape and strides without the need for temporary allocations.
434427
*
435428
* This function does not perform any stream synchronization. The consumer should query
436429
* DLPackCurrentWorkStream to get the current work stream and launch kernels on it.
437430
*
438-
* This function is useful when the consumer do not need to retain the tensor memory.
439-
* It generally can provide about 2x faster conversion than DLPackManagedTensorFromPyObjectNoSync.
440-
*
441-
* For cases where consumer may needs to reorganize the tensor memory via temporary managed copy,
442-
* DLPackManagedTensorFromPyObjectNoSync should be used.
443-
*
444431
* This function is exposed by the framework through the DLPackExchangeAPI.
445432
*
446-
* This information can then be picked up by importers and libraries to perform a fast conversion.
447-
* This function should not throw any exceptions; if it fails, it should return -1 and
448-
* set the error message via PyErr_SetXXX.
449-
*
450-
* \param py_object The Python object to convert; this should be PyObject*.
451-
* We use void* to avoid dependency on Python.h.
452-
*
433+
* \param py_object The Python object to convert. Must have the same type
434+
* as the one the `DLPackExchangeAPI` was discovered from.
453435
* \param out The output DLTensor, whose space is pre-allocated on stack.
454-
* \return 0 on success, -1 on failure. PyError should be set if -1 is returned.
455-
* \note We use void* to avoid dependency on Python.h, so this specific type is
456-
* not dependent on Python.h and can be copied to dlpack.h.
436+
* \return 0 on success, -1 on failure with a Python exception set.
437+
* \note - As a C function, must not thrown C++ exceptions.
457438
*
458439
* \sa DLPackExchangeAPI, DLPackCurrentWorkStream
459440
*/
@@ -465,21 +446,18 @@ typedef int (*DLPackDLTensorFromPyObjectNoSync)( //
465446
/*!
466447
* \brief Obtain the current work stream of a device.
467448
*
468-
* This function is a C-style function pointer to obtain the current work stream
469-
* of a device for frameworks that rely on a context manager to manage the stream.
449+
* Obtain the current work stream of a device from the producer framework.
470450
* For example, it should map to torch.cuda.current_stream in PyTorch.
471451
*
472-
* This function can be set to NULL if the framework does not rely on a context manager
473-
* to manage the stream. However, we encourage frameworks to provide this function
474-
* if possible.
475-
*
476-
* As if this field is not set, likely consumer cannot safely do stream based
477-
* exchange based on the
478-
*
479452
* \param device_type The device type.
480453
* \param device_id The device id.
481454
* \param out_current_stream The output current work stream.
482-
* \return 0 on success, -1 on failure.
455+
* Producer can return reinterpret_cast<void*>(-1)
456+
* to indicate that no stream is available, the consumer
457+
* should not do stream sync in such case.
458+
*
459+
* \return 0 on success, -1 on failure with a Python exception set.
460+
* \note - As a C function, must not thrown C++ exceptions.
483461
*
484462
* \sa DLPackExchangeAPI
485463
*/
@@ -492,54 +470,43 @@ typedef int (*DLPackCurrentWorkStream)( //
492470
/*!
493471
* \brief Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray.
494472
*
495-
* This function is a C-style function pointer to quickly convert a DLManagedTensorVersioned
496-
* to a PyObject* without going through the Python Interpreter.
473+
* Convert an owning DLManagedTensorVersioned* to the Python tensor of the
474+
* producer (implementor) library with the correct type.
497475
*
498476
* This function does not perform any stream synchronization.
499477
*
500478
* This function is exposed by the framework through the DLPackExchangeAPI.
501479
*
502-
* \param tensor The DLManagedTensorVersioned to convert.
480+
* \param tensor The DLManagedTensorVersioned to convert the ownership of the
481+
* the data is stolen.
503482
* \param out_py_object The output Python object.
504-
* \return 0 on success, -1 on failure. PyError should be set if -1 is returned.
505-
* \note We use void* to avoid dependency on Python.h, so this specific type is
506-
* not dependent on Python.h and can be copied to dlpack.h.
483+
* \return 0 on success, -1 on failure with a Python exception set.
507484
*
508485
* \sa DLPackExchangeAPI
509486
*/
510-
typedef int (*DLPackManagedTensorToPyObjectNoSync)( //
511-
DLManagedTensorVersioned* tensor, void** out_py_object //
487+
typedef int (*DLPackManagedTensorToPyObjectNoSync)( //
488+
DLManagedTensorVersioned* tensor, //
489+
void** out_py_object //
512490
);
513491

514492
/*!
515493
* \brief Framework-specific function pointers table for DLPack exchange.
516494
*
517-
* Guidelines for leveraging DLPackExchangeAPI:
495+
* Additionally to `__dlpack__()` we define a C function table sharable by
496+
* Python implementations via `__c_dlpack_exchange_api__`.
497+
* This attribute must be set on the type as a Python integer compatible
498+
* with `PyLong_FromVoidPtr`/`PyLong_AsVoidPtr`.
518499
*
519-
* There are generally two kinds of consumer needs for DLPack exchange:
520-
* - N0: library support, where consumer.kernel(x, y, z) would like to run a kernel
521-
* with the data from x, y, z. The consumer is also expected to run the kernel with the same
522-
* stream context as the producer. For example, when x, y, z is torch.Tensor,
523-
* consumer should query exchange_api->current_work_stream to get the
524-
* current stream and launch the kernel with the same stream.
525-
* This setup is necessary for no synchronization in kernel launch and maximum compatibility
526-
* with CUDA graph capture in the producer.
527-
* This is the desirable behavior for library extension support for frameworks like PyTorch.
528-
* - N1: data ingestion and retention
529-
*
530-
* Note that obj.__dlpack__() API should provide useful ways for N1.
531-
* The primary focus of the current DLPackExchangeAPI is to enable faster exchange N0
532-
* with the support of the function pointer current_work_stream.
533-
*
534-
* Array/Tensor libraries should statically create and initialize this structure
535-
* then return a pointer to DLPackExchangeAPI as an int value in Tensor/Array.
536-
* The DLPackExchangeAPI* should stay alive throughout the lifetime of the process.
537-
*
538-
* One simple way to do so is to create a static instance of DLPackExchangeAPI
539-
* within the framework and return a pointer to it. The following code
540-
* shows an example to do so in C++. It should also be reasonably easy
541-
* to do so in other languages.
500+
* A consumer library may use a pattern such as:
501+
* \code
502+
* PyObject *api_obj = type(tensor_obj).__c_dlpack_exchange_api__; // as C-code
503+
* MyDLPackExchangeAPI *api = PyLong_AsVoidPtr(api_obj);
504+
* if (api == NULL && PyErr_Occurred()) { goto handle_error; }
505+
* \endcode
506+
* Note that this must be defined on the type. The consumer should look up the
507+
* attribute on the type and may cache the result for each unique type.
542508
*
509+
* The precise API table is given by:
543510
* \code
544511
* struct MyDLPackExchangeAPI : public DLPackExchangeAPI {
545512
* MyDLPackExchangeAPI() {
@@ -560,55 +527,75 @@ typedef int (*DLPackManagedTensorToPyObjectNoSync)( //
560527
* };
561528
* \endcode
562529
*
563-
* Each framework should attach a dunder `__c_dlpack_exchange_api__` integer
564-
* to point to the DLPackExchangeAPI* pointer.
530+
* Guidelines for leveraging DLPackExchangeAPI:
565531
*
566-
* Importantly, the attribute should be attached to the class of the Tensor, not the instance.
532+
* There are generally two kinds of consumer needs for DLPack exchange:
533+
* - N0: library support, where consumer.kernel(x, y, z) would like to run a kernel
534+
* with the data from x, y, z. The consumer is also expected to run the kernel with the same
535+
* stream context as the producer. For example, when x, y, z is torch.Tensor,
536+
* consumer should query exchange_api->current_work_stream to get the
537+
* current stream and launch the kernel with the same stream.
538+
* This setup is necessary for no synchronization in kernel launch and maximum compatibility
539+
* with CUDA graph capture in the producer.
540+
* This is the desirable behavior for library extension support for frameworks like PyTorch.
541+
* - N1: data ingestion and retention
567542
*
568-
* mypackage.Tensor.__c_dlpack_exchange_api__ = MyPackageDLPackExchangeAPI
543+
* Note that obj.__dlpack__() API should provide useful ways for N1.
544+
* The primary focus of the current DLPackExchangeAPI is to enable faster exchange N0
545+
* with the support of the function pointer current_work_stream.
569546
*
570-
* or equivalently:
547+
* Array/Tensor libraries should statically create and initialize this structure
548+
* then return a pointer to DLPackExchangeAPI as an int value in Tensor/Array.
549+
* The DLPackExchangeAPI* must stay alive throughout the lifetime of the process.
571550
*
572-
* type(tensor_obj).__c_dlpack_exchange_api__ = MyPackageDLPackExchangeAPI
551+
* One simple way to do so is to create a static instance of DLPackExchangeAPI
552+
* within the framework and return a pointer to it. The following code
553+
* shows an example to do so in C++. It should also be reasonably easy
554+
* to do so in other languages.
573555
*/
574556
struct DLPackExchangeAPI {
575557
/*!
576-
* \brief The current DLPack version.
558+
* \brief The provided DLPack version the consumer must check major version
559+
* compatibility before using this struct.
577560
*/
578561
DLPackVersion version;
579562
/*!
580563
* \brief Optional pointer to an older DLPackExchangeAPI in the chain.
581564
*
582-
* It should be set to NULL if the framework does not support older versions.
565+
* It must be NULL if the framework does not support older versions.
566+
* If the current major version is larger than the one supported by the
567+
* consumer, the consumer may walk this to find an earlier supported version.
583568
*
584569
* \sa DLPackExchangeAPI
585570
*/
586571
struct DLPackExchangeAPI* prev_version_api;
587572
/*!
588-
* \brief Framework-specific function pointer for DLPackManagedTensorAllocator
573+
* \brief Producer function pointer for DLPackManagedTensorAllocator
574+
* This function must be not NULL.
589575
* \sa DLPackManagedTensorAllocator
590576
*/
591577
DLPackManagedTensorAllocator managed_tensor_allocator;
592578
/*!
593-
* \brief Framework-specific function pointer for DLPackManagedTensorFromPyObject
579+
* \brief Producer function pointer for DLPackManagedTensorFromPyObject
580+
* This function must be not NULL.
594581
* \sa DLPackManagedTensorFromPyObject
595582
*/
596583
DLPackManagedTensorFromPyObjectNoSync managed_tensor_from_py_object_no_sync;
597584
/*!
598-
* \brief Framework-specific function pointer for DLPackManagedTensorToPyObject
585+
* \brief Producer function pointer for DLPackManagedTensorToPyObject
586+
* This function must be not NULL.
599587
* \sa DLPackManagedTensorToPyObject
600588
*/
601589
DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync;
602590
/*!
603-
* \brief Framework-specific function pointer for DLPackDLTensorFromPyObject
591+
* \brief Producer function pointer for DLPackDLTensorFromPyObject
592+
* This function can be NULL when the producer does not support this function.
604593
* \sa DLPackDLTensorFromPyObjectNoSync
605594
*/
606595
DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync;
607596
/*!
608-
* \brief Framework-specific function pointer for DLPackCurrentWorkStream
609-
*
610-
* This function can be set to NULL if the framework does not rely on context manager to manage the stream.
611-
*
597+
* \brief Producer function pointer for DLPackCurrentWorkStream
598+
* This function must be not NULL.
612599
* \sa DLPackCurrentWorkStream
613600
*/
614601
DLPackCurrentWorkStream current_work_stream;

0 commit comments

Comments
 (0)