dlpack.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /*!
  2. * Copyright (c) 2017 by Contributors
  3. * \file dlpack.h
  4. * \brief The common header of DLPack.
  5. */
  6. #ifndef DLPACK_DLPACK_H_
  7. #define DLPACK_DLPACK_H_
  8. #ifdef __cplusplus
  9. #define DLPACK_EXTERN_C extern "C"
  10. #else
  11. #define DLPACK_EXTERN_C
  12. #endif
  13. /*! \brief The current version of dlpack */
  14. #define DLPACK_VERSION 60
  15. /*! \brief DLPACK_DLL prefix for windows */
  16. #ifdef _WIN32
  17. #ifdef DLPACK_EXPORTS
  18. #define DLPACK_DLL __declspec(dllexport)
  19. #else
  20. #define DLPACK_DLL __declspec(dllimport)
  21. #endif
  22. #else
  23. #define DLPACK_DLL
  24. #endif
  25. #include <stdint.h>
  26. #include <stddef.h>
  27. #ifdef __cplusplus
  28. extern "C" {
  29. #endif
  30. /*!
  31. * \brief The device type in DLDevice.
  32. */
  33. typedef enum {
  34. /*! \brief CPU device */
  35. kDLCPU = 1,
  36. /*! \brief CUDA GPU device */
  37. kDLCUDA = 2,
  38. /*!
  39. * \brief Pinned CUDA CPU memory by cudaMallocHost
  40. */
  41. kDLCUDAHost = 3,
  42. /*! \brief OpenCL devices. */
  43. kDLOpenCL = 4,
  44. /*! \brief Vulkan buffer for next generation graphics. */
  45. kDLVulkan = 7,
  46. /*! \brief Metal for Apple GPU. */
  47. kDLMetal = 8,
  48. /*! \brief Verilog simulator buffer */
  49. kDLVPI = 9,
  50. /*! \brief ROCm GPUs for AMD GPUs */
  51. kDLROCM = 10,
  52. /*!
  53. * \brief Pinned ROCm CPU memory allocated by hipMallocHost
  54. */
  55. kDLROCMHost = 11,
  56. /*!
  57. * \brief Reserved extension device type,
  58. * used for quickly test extension device
  59. * The semantics can differ depending on the implementation.
  60. */
  61. kDLExtDev = 12,
  62. /*!
  63. * \brief CUDA managed/unified memory allocated by cudaMallocManaged
  64. */
  65. kDLCUDAManaged = 13,
  66. } DLDeviceType;
  67. /*!
  68. * \brief A Device for Tensor and operator.
  69. */
  70. typedef struct {
  71. /*! \brief The device type used in the device. */
  72. DLDeviceType device_type;
  73. /*!
  74. * \brief The device index.
  75. * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
  76. */
  77. int device_id;
  78. } DLDevice;
  79. /*!
  80. * \brief The type code options DLDataType.
  81. */
  82. typedef enum {
  83. /*! \brief signed integer */
  84. kDLInt = 0U,
  85. /*! \brief unsigned integer */
  86. kDLUInt = 1U,
  87. /*! \brief IEEE floating point */
  88. kDLFloat = 2U,
  89. /*!
  90. * \brief Opaque handle type, reserved for testing purposes.
  91. * Frameworks need to agree on the handle data type for the exchange to be well-defined.
  92. */
  93. kDLOpaqueHandle = 3U,
  94. /*! \brief bfloat16 */
  95. kDLBfloat = 4U,
  96. /*!
  97. * \brief complex number
  98. * (C/C++/Python layout: compact struct per complex number)
  99. */
  100. kDLComplex = 5U,
  101. } DLDataTypeCode;
  102. /*!
  103. * \brief The data type the tensor can hold.
  104. *
  105. * Examples
  106. * - float: type_code = 2, bits = 32, lanes=1
  107. * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
  108. * - int8: type_code = 0, bits = 8, lanes=1
  109. * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
  110. */
  111. typedef struct {
  112. /*!
  113. * \brief Type code of base types.
  114. * We keep it uint8_t instead of DLDataTypeCode for minimal memory
  115. * footprint, but the value should be one of DLDataTypeCode enum values.
  116. * */
  117. uint8_t code;
  118. /*!
  119. * \brief Number of bits, common choices are 8, 16, 32.
  120. */
  121. uint8_t bits;
  122. /*! \brief Number of lanes in the type, used for vector types. */
  123. uint16_t lanes;
  124. } DLDataType;
  125. /*!
  126. * \brief Plain C Tensor object, does not manage memory.
  127. */
  128. typedef struct {
  129. /*!
  130. * \brief The opaque data pointer points to the allocated data. This will be
  131. * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
  132. * aligned to 256 bytes as in CUDA.
  133. *
  134. * For given DLTensor, the size of memory required to store the contents of
  135. * data is calculated as follows:
  136. *
  137. * \code{.c}
  138. * static inline size_t GetDataSize(const DLTensor* t) {
  139. * size_t size = 1;
  140. * for (tvm_index_t i = 0; i < t->ndim; ++i) {
  141. * size *= t->shape[i];
  142. * }
  143. * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
  144. * return size;
  145. * }
  146. * \endcode
  147. */
  148. void* data;
  149. /*! \brief The device of the tensor */
  150. DLDevice device;
  151. /*! \brief Number of dimensions */
  152. int ndim;
  153. /*! \brief The data type of the pointer*/
  154. DLDataType dtype;
  155. /*! \brief The shape of the tensor */
  156. int64_t* shape;
  157. /*!
  158. * \brief strides of the tensor (in number of elements, not bytes)
  159. * can be NULL, indicating tensor is compact and row-majored.
  160. */
  161. int64_t* strides;
  162. /*! \brief The offset in bytes to the beginning pointer to data */
  163. uint64_t byte_offset;
  164. } DLTensor;
  165. /*!
  166. * \brief C Tensor object, manage memory of DLTensor. This data structure is
  167. * intended to facilitate the borrowing of DLTensor by another framework. It is
  168. * not meant to transfer the tensor. When the borrowing framework doesn't need
  169. * the tensor, it should call the deleter to notify the host that the resource
  170. * is no longer needed.
  171. */
  172. typedef struct DLManagedTensor {
  173. /*! \brief DLTensor which is being memory managed */
  174. DLTensor dl_tensor;
  175. /*! \brief the context of the original host framework of DLManagedTensor in
  176. * which DLManagedTensor is used in the framework. It can also be NULL.
  177. */
  178. void * manager_ctx;
  179. /*! \brief Destructor signature void (*)(void*) - this should be called
  180. * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
  181. * if there is no way for the caller to provide a reasonable destructor.
  182. * The destructors deletes the argument self as well.
  183. */
  184. void (*deleter)(struct DLManagedTensor * self);
  185. } DLManagedTensor;
  186. #ifdef __cplusplus
  187. } // DLPACK_EXTERN_C
  188. #endif
  189. #endif // DLPACK_DLPACK_H_