cudart.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. /*
  2. * Copyright © 2010-2023 Inria. All rights reserved.
  3. * Copyright © 2010-2011 Université Bordeaux
  4. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
  5. * See COPYING in top-level directory.
  6. */
  7. /** \file
  8. * \brief Macros to help interaction between hwloc and the CUDA Runtime API.
  9. *
  10. * Applications that use both hwloc and the CUDA Runtime API may want to
  11. * include this file so as to get topology information for CUDA devices.
  12. *
  13. */
  14. #ifndef HWLOC_CUDART_H
  15. #define HWLOC_CUDART_H
  16. #include "hwloc.h"
  17. #include "hwloc/autogen/config.h"
  18. #include "hwloc/helper.h"
  19. #ifdef HWLOC_LINUX_SYS
  20. #include "hwloc/linux.h"
  21. #endif
  22. #include <cuda.h> /* for CUDA_VERSION */
  23. #include <cuda_runtime_api.h>
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif
  27. /** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API
  28. *
  29. * This interface offers ways to retrieve topology information about
  30. * CUDA devices when using the CUDA Runtime API.
  31. *
  32. * @{
  33. */
  34. /** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
  35. *
  36. * Device index \p idx must match the local machine.
  37. *
  38. * \return 0 on success.
  39. * \return -1 on error, for instance if device information could not be found.
  40. */
  41. static __hwloc_inline int
  42. hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
  43. int idx, int *domain, int *bus, int *dev)
  44. {
  45. cudaError_t cerr;
  46. struct cudaDeviceProp prop;
  47. cerr = cudaGetDeviceProperties(&prop, idx);
  48. if (cerr) {
  49. errno = ENOSYS;
  50. return -1;
  51. }
  52. #if CUDA_VERSION >= 4000
  53. *domain = prop.pciDomainID;
  54. #else
  55. *domain = 0;
  56. #endif
  57. *bus = prop.pciBusID;
  58. *dev = prop.pciDeviceID;
  59. return 0;
  60. }
  61. /** \brief Get the CPU set of processors that are physically
  62. * close to device \p idx.
  63. *
  64. * Store in \p set the CPU-set describing the locality of the CUDA device
  65. * whose index is \p idx.
  66. *
  67. * Topology \p topology and device \p idx must match the local machine.
  68. * I/O devices detection and the CUDA component are not needed in the topology.
  69. *
  70. * The function only returns the locality of the device.
  71. * If more information about the device is needed, OS objects should
  72. * be used instead, see hwloc_cudart_get_device_osdev_by_index().
  73. *
  74. * This function is currently only implemented in a meaningful way for
  75. * Linux; other systems will simply get a full cpuset.
  76. *
  77. * \return 0 on success.
  78. * \return -1 on error, for instance if device information could not be found.
  79. */
  80. static __hwloc_inline int
  81. hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
  82. int idx, hwloc_cpuset_t set)
  83. {
  84. #ifdef HWLOC_LINUX_SYS
  85. /* If we're on Linux, use the sysfs mechanism to get the local cpus */
  86. #define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128
  87. char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX];
  88. int domain, bus, dev;
  89. if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
  90. return -1;
  91. if (!hwloc_topology_is_thissystem(topology)) {
  92. errno = EINVAL;
  93. return -1;
  94. }
  95. sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", (unsigned) domain, (unsigned) bus, (unsigned) dev);
  96. if (hwloc_linux_read_path_as_cpumask(path, set) < 0
  97. || hwloc_bitmap_iszero(set))
  98. hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
  99. #else
  100. /* Non-Linux systems simply get a full cpuset */
  101. hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
  102. #endif
  103. return 0;
  104. }
  105. /** \brief Get the hwloc PCI device object corresponding to the
  106. * CUDA device whose index is \p idx.
  107. *
  108. * \return The hwloc PCI device object describing the CUDA device whose index is \p idx.
  109. * \return \c NULL if none could be found.
  110. *
  111. * Topology \p topology and device \p idx must match the local machine.
  112. * I/O devices detection must be enabled in topology \p topology.
  113. * The CUDA component is not needed in the topology.
  114. */
  115. static __hwloc_inline hwloc_obj_t
  116. hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
  117. {
  118. int domain, bus, dev;
  119. if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
  120. return NULL;
  121. return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
  122. }
  123. /** \brief Get the hwloc OS device object corresponding to the
  124. * CUDA device whose index is \p idx.
  125. *
  126. * \return The hwloc OS device object describing the CUDA device whose index is \p idx.
  127. * \return \c NULL if none could be found.
  128. *
  129. * The topology \p topology does not necessarily have to match the current
  130. * machine. For instance the topology may be an XML import of a remote host.
  131. * I/O devices detection and the CUDA component must be enabled in the topology.
  132. * If not, the locality of the object may still be found using
  133. * hwloc_cudart_get_device_cpuset().
  134. *
  135. * \note The corresponding PCI device object can be obtained by looking
  136. * at the OS device parent object (unless PCI devices are filtered out).
  137. *
  138. * \note This function is identical to hwloc_cuda_get_device_osdev_by_index().
  139. */
  140. static __hwloc_inline hwloc_obj_t
  141. hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
  142. {
  143. hwloc_obj_t osdev = NULL;
  144. while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
  145. if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
  146. && osdev->name
  147. && !strncmp("cuda", osdev->name, 4)
  148. && atoi(osdev->name + 4) == (int) idx)
  149. return osdev;
  150. }
  151. return NULL;
  152. }
  153. /** @} */
  154. #ifdef __cplusplus
  155. } /* extern "C" */
  156. #endif
  157. #endif /* HWLOC_CUDART_H */