cuda.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * Copyright © 2010-2023 Inria. All rights reserved.
  3. * Copyright © 2010-2011 Université Bordeaux
  4. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
  5. * See COPYING in top-level directory.
  6. */
  7. /** \file
  8. * \brief Macros to help interaction between hwloc and the CUDA Driver API.
  9. *
  10. * Applications that use both hwloc and the CUDA Driver API may want to
  11. * include this file so as to get topology information for CUDA devices.
  12. *
  13. */
  14. #ifndef HWLOC_CUDA_H
  15. #define HWLOC_CUDA_H
  16. #include "hwloc.h"
  17. #include "hwloc/autogen/config.h"
  18. #include "hwloc/helper.h"
  19. #ifdef HWLOC_LINUX_SYS
  20. #include "hwloc/linux.h"
  21. #endif
  22. #include <cuda.h>
  23. #ifdef __cplusplus
  24. extern "C" {
  25. #endif
  26. /** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API
  27. *
  28. * This interface offers ways to retrieve topology information about
  29. * CUDA devices when using the CUDA Driver API.
  30. *
  31. * @{
  32. */
  33. /** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
  34. *
  35. * Device \p cudevice must match the local machine.
  36. *
  37. * \return 0 on success.
  38. * \return -1 on error, for instance if device information could not be found.
  39. */
  40. static __hwloc_inline int
  41. hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
  42. CUdevice cudevice, int *domain, int *bus, int *dev)
  43. {
  44. CUresult cres;
  45. #if CUDA_VERSION >= 4000
  46. cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
  47. if (cres != CUDA_SUCCESS) {
  48. errno = ENOSYS;
  49. return -1;
  50. }
  51. #else
  52. *domain = 0;
  53. #endif
  54. cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
  55. if (cres != CUDA_SUCCESS) {
  56. errno = ENOSYS;
  57. return -1;
  58. }
  59. cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
  60. if (cres != CUDA_SUCCESS) {
  61. errno = ENOSYS;
  62. return -1;
  63. }
  64. return 0;
  65. }
  66. /** \brief Get the CPU set of processors that are physically
  67. * close to device \p cudevice.
  68. *
  69. * Store in \p set the CPU-set describing the locality of the CUDA device \p cudevice.
  70. *
  71. * Topology \p topology and device \p cudevice must match the local machine.
  72. * I/O devices detection and the CUDA component are not needed in the topology.
  73. *
  74. * The function only returns the locality of the device.
  75. * If more information about the device is needed, OS objects should
  76. * be used instead, see hwloc_cuda_get_device_osdev()
  77. * and hwloc_cuda_get_device_osdev_by_index().
  78. *
  79. * This function is currently only implemented in a meaningful way for
  80. * Linux; other systems will simply get a full cpuset.
  81. *
  82. * \return 0 on success.
  83. * \return -1 on error, for instance if device information could not be found.
  84. */
  85. static __hwloc_inline int
  86. hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
  87. CUdevice cudevice, hwloc_cpuset_t set)
  88. {
  89. #ifdef HWLOC_LINUX_SYS
  90. /* If we're on Linux, use the sysfs mechanism to get the local cpus */
  91. #define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128
  92. char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX];
  93. int domainid, busid, deviceid;
  94. if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid))
  95. return -1;
  96. if (!hwloc_topology_is_thissystem(topology)) {
  97. errno = EINVAL;
  98. return -1;
  99. }
  100. sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid);
  101. if (hwloc_linux_read_path_as_cpumask(path, set) < 0
  102. || hwloc_bitmap_iszero(set))
  103. hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
  104. #else
  105. /* Non-Linux systems simply get a full cpuset */
  106. hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
  107. #endif
  108. return 0;
  109. }
  110. /** \brief Get the hwloc PCI device object corresponding to the
  111. * CUDA device \p cudevice.
  112. *
  113. * \return The hwloc PCI device object describing the CUDA device \p cudevice.
  114. * \return \c NULL if none could be found.
  115. *
  116. * Topology \p topology and device \p cudevice must match the local machine.
  117. * I/O devices detection must be enabled in topology \p topology.
  118. * The CUDA component is not needed in the topology.
  119. */
  120. static __hwloc_inline hwloc_obj_t
  121. hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
  122. {
  123. int domain, bus, dev;
  124. if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
  125. return NULL;
  126. return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
  127. }
  128. /** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
  129. *
  130. * \return The hwloc OS device object that describes the given CUDA device \p cudevice.
  131. * \return \c NULL if none could be found.
  132. *
  133. * Topology \p topology and device \p cudevice must match the local machine.
  134. * I/O devices detection and the CUDA component must be enabled in the topology.
  135. * If not, the locality of the object may still be found using
  136. * hwloc_cuda_get_device_cpuset().
  137. *
  138. * \note This function cannot work if PCI devices are filtered out.
  139. *
  140. * \note The corresponding hwloc PCI device may be found by looking
  141. * at the result parent pointer (unless PCI devices are filtered out).
  142. */
  143. static __hwloc_inline hwloc_obj_t
  144. hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
  145. {
  146. hwloc_obj_t osdev = NULL;
  147. int domain, bus, dev;
  148. if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
  149. return NULL;
  150. osdev = NULL;
  151. while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
  152. hwloc_obj_t pcidev = osdev->parent;
  153. if (strncmp(osdev->name, "cuda", 4))
  154. continue;
  155. if (pcidev
  156. && pcidev->type == HWLOC_OBJ_PCI_DEVICE
  157. && (int) pcidev->attr->pcidev.domain == domain
  158. && (int) pcidev->attr->pcidev.bus == bus
  159. && (int) pcidev->attr->pcidev.dev == dev
  160. && pcidev->attr->pcidev.func == 0)
  161. return osdev;
  162. /* if PCI are filtered out, we need a info attr to match on */
  163. }
  164. return NULL;
  165. }
  166. /** \brief Get the hwloc OS device object corresponding to the
  167. * CUDA device whose index is \p idx.
  168. *
  169. * \return The hwloc OS device object describing the CUDA device whose index is \p idx.
  170. * \return \c NULL if none could be found.
  171. *
  172. * The topology \p topology does not necessarily have to match the current
  173. * machine. For instance the topology may be an XML import of a remote host.
  174. * I/O devices detection and the CUDA component must be enabled in the topology.
  175. *
  176. * \note The corresponding PCI device object can be obtained by looking
  177. * at the OS device parent object (unless PCI devices are filtered out).
  178. *
  179. * \note This function is identical to hwloc_cudart_get_device_osdev_by_index().
  180. */
  181. static __hwloc_inline hwloc_obj_t
  182. hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
  183. {
  184. hwloc_obj_t osdev = NULL;
  185. while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
  186. if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
  187. && osdev->name
  188. && !strncmp("cuda", osdev->name, 4)
  189. && atoi(osdev->name + 4) == (int) idx)
  190. return osdev;
  191. }
  192. return NULL;
  193. }
  194. /** @} */
  195. #ifdef __cplusplus
  196. } /* extern "C" */
  197. #endif
  198. #endif /* HWLOC_CUDA_H */