distances.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /*
  2. * Copyright © 2010-2023 Inria. All rights reserved.
  3. * See COPYING in top-level directory.
  4. */
  5. /** \file
  6. * \brief Object distances.
  7. */
  8. #ifndef HWLOC_DISTANCES_H
  9. #define HWLOC_DISTANCES_H
  10. #ifndef HWLOC_H
  11. #error Please include the main hwloc.h instead
  12. #endif
  13. #ifdef __cplusplus
  14. extern "C" {
  15. #elif 0
  16. }
  17. #endif
  18. /** \defgroup hwlocality_distances_get Retrieve distances between objects
  19. * @{
  20. */
  21. /** \brief Matrix of distances between a set of objects.
  22. *
  23. * This matrix often contains latencies between NUMA nodes
  24. * (as reported in the System Locality Distance Information Table (SLIT)
  25. * in the ACPI specification), which may or may not be physically accurate.
  26. * It corresponds to the latency for accessing the memory of one node
  27. * from a core in another node.
  28. * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER.
  29. * The name of this distances structure is "NUMALatency".
  30. * Others distance structures include and "XGMIBandwidth", "XGMIHops",
  31. * "XeLinkBandwidth" and "NVLinkBandwidth".
  32. *
  33. * The matrix may also contain bandwidths between random sets of objects,
  34. * possibly provided by the user, as specified in the \p kind attribute.
  35. *
  36. * Pointers \p objs and \p values should not be replaced, reallocated, freed, etc.
  37. * However callers are allowed to modify \p kind as well as the contents
  38. * of \p objs and \p values arrays.
  39. * For instance, if there is a single NUMA node per Package,
  40. * hwloc_get_obj_with_same_locality() may be used to convert between them
  41. * and replace NUMA nodes in the \p objs array with the corresponding Packages.
  42. * See also hwloc_distances_transform() for applying some transformations
  43. * to the structure.
  44. */
  45. struct hwloc_distances_s {
  46. unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */
  47. hwloc_obj_t *objs; /**< \brief Array of objects described by the distance matrix.
  48. * These objects are not in any particular order,
  49. * see hwloc_distances_obj_index() and hwloc_distances_obj_pair_values()
  50. * for easy ways to find objects in this array and their corresponding values.
  51. */
  52. unsigned long kind; /**< \brief OR'ed set of ::hwloc_distances_kind_e. */
  53. hwloc_uint64_t *values; /**< \brief Matrix of distances between objects, stored as a one-dimension array.
  54. *
  55. * Distance from i-th to j-th object is stored in slot i*nbobjs+j.
  56. * The meaning of the value depends on the \p kind attribute.
  57. */
  58. };
  59. /** \brief Kinds of distance matrices.
  60. *
  61. * The \p kind attribute of struct hwloc_distances_s is a OR'ed set
  62. * of kinds.
  63. *
  64. * A kind of format HWLOC_DISTANCES_KIND_FROM_* specifies where the
  65. * distance information comes from, if known.
  66. *
  67. * A kind of format HWLOC_DISTANCES_KIND_MEANS_* specifies whether
  68. * values are latencies or bandwidths, if applicable.
  69. */
  70. enum hwloc_distances_kind_e {
  71. /** \brief These distances were obtained from the operating system or hardware.
  72. * \hideinitializer
  73. */
  74. HWLOC_DISTANCES_KIND_FROM_OS = (1UL<<0),
  75. /** \brief These distances were provided by the user.
  76. * \hideinitializer
  77. */
  78. HWLOC_DISTANCES_KIND_FROM_USER = (1UL<<1),
  79. /** \brief Distance values are similar to latencies between objects.
  80. * Values are smaller for closer objects, hence minimal on the diagonal
  81. * of the matrix (distance between an object and itself).
  82. * It could also be the number of network hops between objects, etc.
  83. * \hideinitializer
  84. */
  85. HWLOC_DISTANCES_KIND_MEANS_LATENCY = (1UL<<2),
  86. /** \brief Distance values are similar to bandwidths between objects.
  87. * Values are higher for closer objects, hence maximal on the diagonal
  88. * of the matrix (distance between an object and itself).
  89. * Such values are currently ignored for distance-based grouping.
  90. * \hideinitializer
  91. */
  92. HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3),
  93. /** \brief This distances structure covers objects of different types.
  94. * This may apply to the "NVLinkBandwidth" structure in presence
  95. * of a NVSwitch or POWER processor NVLink port.
  96. * \hideinitializer
  97. */
  98. HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4)
  99. };
  100. /** \brief Retrieve distance matrices.
  101. *
  102. * Retrieve distance matrices from the topology into the \p distances array.
  103. *
  104. * \p flags is currently unused, should be \c 0.
  105. *
  106. * \p kind serves as a filter. If \c 0, all distance matrices are returned.
  107. * If it contains some HWLOC_DISTANCES_KIND_FROM_*, only distance matrices
  108. * whose kind matches one of these are returned.
  109. * If it contains some HWLOC_DISTANCES_KIND_MEANS_*, only distance matrices
  110. * whose kind matches one of these are returned.
  111. *
  112. * On input, \p nr points to the number of distance matrices that may be stored
  113. * in \p distances.
  114. * On output, \p nr points to the number of distance matrices that were actually
  115. * found, even if some of them couldn't be stored in \p distances.
  116. * Distance matrices that couldn't be stored are ignored, but the function still
  117. * returns success (\c 0). The caller may find out by comparing the value pointed
  118. * by \p nr before and after the function call.
  119. *
  120. * Each distance matrix returned in the \p distances array should be released
  121. * by the caller using hwloc_distances_release().
  122. *
  123. * \return 0 on success, -1 on error.
  124. */
  125. HWLOC_DECLSPEC int
  126. hwloc_distances_get(hwloc_topology_t topology,
  127. unsigned *nr, struct hwloc_distances_s **distances,
  128. unsigned long kind, unsigned long flags);
  129. /** \brief Retrieve distance matrices for object at a specific depth in the topology.
  130. *
  131. * Identical to hwloc_distances_get() with the additional \p depth filter.
  132. *
  133. * \return 0 on success, -1 on error.
  134. */
  135. HWLOC_DECLSPEC int
  136. hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
  137. unsigned *nr, struct hwloc_distances_s **distances,
  138. unsigned long kind, unsigned long flags);
  139. /** \brief Retrieve distance matrices for object of a specific type.
  140. *
  141. * Identical to hwloc_distances_get() with the additional \p type filter.
  142. *
  143. * \return 0 on success, -1 on error.
  144. */
  145. HWLOC_DECLSPEC int
  146. hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
  147. unsigned *nr, struct hwloc_distances_s **distances,
  148. unsigned long kind, unsigned long flags);
  149. /** \brief Retrieve a distance matrix with the given name.
  150. *
  151. * Usually only one distances structure may match a given name.
  152. *
  153. * The name of the most common structure is "NUMALatency".
  154. * Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth",
  155. * and "NVLinkBandwidth".
  156. *
  157. * \return 0 on success, -1 on error.
  158. */
  159. HWLOC_DECLSPEC int
  160. hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
  161. unsigned *nr, struct hwloc_distances_s **distances,
  162. unsigned long flags);
  163. /** \brief Get a description of what a distances structure contains.
  164. *
  165. * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
  166. * or \c NULL if unknown.
  167. *
  168. * \return the constant string with the name of the distance structure.
  169. *
  170. * \note The returned name should not be freed by the caller,
  171. * it belongs to the hwloc library.
  172. */
  173. HWLOC_DECLSPEC const char *
  174. hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
  175. /** \brief Release a distance matrix structure previously returned by hwloc_distances_get().
  176. *
  177. * \note This function is not required if the structure is removed with hwloc_distances_release_remove().
  178. */
  179. HWLOC_DECLSPEC void
  180. hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
  181. /** \brief Transformations of distances structures. */
  182. enum hwloc_distances_transform_e {
  183. /** \brief Remove \c NULL objects from the distances structure.
  184. *
  185. * Every object that was replaced with \c NULL in the \p objs array
  186. * is removed and the \p values array is updated accordingly.
  187. *
  188. * At least \c 2 objects must remain, otherwise hwloc_distances_transform()
  189. * will return \c -1 with \p errno set to \c EINVAL.
  190. *
  191. * \p kind will be updated with or without ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES
  192. * according to the remaining objects.
  193. *
  194. * \hideinitializer
  195. */
  196. HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL = 0,
  197. /** \brief Replace bandwidth values with a number of links.
  198. *
  199. * Usually all values will be either \c 0 (no link) or \c 1 (one link).
  200. * However some matrices could get larger values if some pairs of
  201. * peers are connected by different numbers of links.
  202. *
  203. * Values on the diagonal are set to \c 0.
  204. *
  205. * This transformation only applies to bandwidth matrices.
  206. *
  207. * \hideinitializer
  208. */
  209. HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
  210. /** \brief Merge switches with multiple ports into a single object.
  211. * This currently only applies to NVSwitches where GPUs seem connected to different
  212. * separate switch ports in the NVLinkBandwidth matrix. This transformation will
  213. * replace all of them with the same port connected to all GPUs.
  214. * Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
  215. * \hideinitializer
  216. */
  217. HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
  218. /** \brief Apply a transitive closure to the matrix to connect objects across switches.
  219. * This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix.
  220. * All pairs of GPUs will be reported as directly connected.
  221. * \hideinitializer
  222. */
  223. HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
  224. };
  225. /** \brief Apply a transformation to a distances structure.
  226. *
  227. * Modify a distances structure that was previously obtained with
  228. * hwloc_distances_get() or one of its variants.
  229. *
  230. * This modifies the local copy of the distances structures but does
  231. * not modify the distances information stored inside the topology
  232. * (retrieved by another call to hwloc_distances_get() or exported to XML).
  233. * To do so, one should add a new distances structure with same
  234. * name, kind, objects and values (see \ref hwlocality_distances_add)
  235. * and then remove this old one with hwloc_distances_release_remove().
  236. *
  237. * \p transform must be one of the transformations listed
  238. * in ::hwloc_distances_transform_e.
  239. *
  240. * These transformations may modify the contents of the \p objs or \p values arrays.
  241. *
  242. * \p transform_attr must be \c NULL for now.
  243. *
  244. * \p flags must be \c 0 for now.
  245. *
  246. * \return 0 on success, -1 on error for instance if flags are invalid.
  247. *
  248. * \note Objects in distances array \p objs may be directly modified
  249. * in place without using hwloc_distances_transform().
  250. * One may use hwloc_get_obj_with_same_locality() to easily convert
  251. * between similar objects of different types.
  252. */
  253. HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct hwloc_distances_s *distances,
  254. enum hwloc_distances_transform_e transform,
  255. void *transform_attr,
  256. unsigned long flags);
  257. /** @} */
  258. /** \defgroup hwlocality_distances_consult Helpers for consulting distance matrices
  259. * @{
  260. */
  261. /** \brief Find the index of an object in a distances structure.
  262. *
  263. * \return the index of the object in the distances structure if any.
  264. * \return -1 if object \p obj is not involved in structure \p distances.
  265. */
  266. static __hwloc_inline int
  267. hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj)
  268. {
  269. unsigned i;
  270. for(i=0; i<distances->nbobjs; i++)
  271. if (distances->objs[i] == obj)
  272. return (int)i;
  273. return -1;
  274. }
  275. /** \brief Find the values between two objects in a distance matrices.
  276. *
  277. * The distance from \p obj1 to \p obj2 is stored in the value pointed by
  278. * \p value1to2 and reciprocally.
  279. *
  280. * \return 0 on success.
  281. * \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances.
  282. */
  283. static __hwloc_inline int
  284. hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances,
  285. hwloc_obj_t obj1, hwloc_obj_t obj2,
  286. hwloc_uint64_t *value1to2, hwloc_uint64_t *value2to1)
  287. {
  288. int i1 = hwloc_distances_obj_index(distances, obj1);
  289. int i2 = hwloc_distances_obj_index(distances, obj2);
  290. if (i1 < 0 || i2 < 0)
  291. return -1;
  292. *value1to2 = distances->values[i1 * distances->nbobjs + i2];
  293. *value2to1 = distances->values[i2 * distances->nbobjs + i1];
  294. return 0;
  295. }
  296. /** @} */
  297. /** \defgroup hwlocality_distances_add Add distances between objects
  298. *
  299. * The usual way to add distances is:
  300. * \code
  301. * hwloc_distances_add_handle_t handle;
  302. * int err = -1;
  303. * handle = hwloc_distances_add_create(topology, "name", kind, 0);
  304. * if (handle) {
  305. * err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0);
  306. * if (!err)
  307. * err = hwloc_distances_add_commit(topology, handle, flags);
  308. * }
  309. * \endcode
  310. * If \p err is \c 0 at the end, then addition was successful.
  311. *
  312. * @{
  313. */
  314. /** \brief Handle to a new distances structure during its addition to the topology. */
  315. typedef void * hwloc_distances_add_handle_t;
  316. /** \brief Create a new empty distances structure.
  317. *
  318. * Create an empty distances structure
  319. * to be filled with hwloc_distances_add_values()
  320. * and then committed with hwloc_distances_add_commit().
  321. *
  322. * Parameter \p name is optional, it may be \c NULL.
  323. * Otherwise, it will be copied internally and may later be freed by the caller.
  324. *
  325. * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
  326. * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set
  327. * according to objects having different types in hwloc_distances_add_values().
  328. *
  329. * \p flags must be \c 0 for now.
  330. *
  331. * \return A hwloc_distances_add_handle_t that should then be passed
  332. * to hwloc_distances_add_values() and hwloc_distances_add_commit().
  333. *
  334. * \return \c NULL on error.
  335. */
  336. HWLOC_DECLSPEC hwloc_distances_add_handle_t
  337. hwloc_distances_add_create(hwloc_topology_t topology,
  338. const char *name, unsigned long kind,
  339. unsigned long flags);
  340. /** \brief Specify the objects and values in a new empty distances structure.
  341. *
  342. * Specify the objects and values for a new distances structure
  343. * that was returned as a handle by hwloc_distances_add_create().
  344. * The structure must then be committed with hwloc_distances_add_commit().
  345. *
  346. * The number of objects is \p nbobjs and the array of objects is \p objs.
  347. * Distance values are stored as a one-dimension array in \p values.
  348. * The distance from object i to object j is in slot i*nbobjs+j.
  349. *
  350. * \p nbobjs must be at least 2.
  351. *
  352. * Arrays \p objs and \p values will be copied internally,
  353. * they may later be freed by the caller.
  354. *
  355. * On error, the temporary distances structure and its content are destroyed.
  356. *
  357. * \p flags must be \c 0 for now.
  358. *
  359. * \return 0 on success.
  360. * \return -1 on error.
  361. */
  362. HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology,
  363. hwloc_distances_add_handle_t handle,
  364. unsigned nbobjs, hwloc_obj_t *objs,
  365. hwloc_uint64_t *values,
  366. unsigned long flags);
  367. /** \brief Flags for adding a new distances to a topology. */
  368. enum hwloc_distances_add_flag_e {
  369. /** \brief Try to group objects based on the newly provided distance information.
  370. * This is ignored for distances between objects of different types.
  371. * \hideinitializer
  372. */
  373. HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0),
  374. /** \brief If grouping, consider the distance values as inaccurate and relax the
  375. * comparisons during the grouping algorithms. The actual accuracy may be modified
  376. * through the HWLOC_GROUPING_ACCURACY environment variable (see \ref envvar).
  377. * \hideinitializer
  378. */
  379. HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1)
  380. };
  381. /** \brief Commit a new distances structure.
  382. *
  383. * This function finalizes the distances structure and inserts in it the topology.
  384. *
  385. * Parameter \p handle was previously returned by hwloc_distances_add_create().
  386. * Then objects and values were specified with hwloc_distances_add_values().
  387. *
  388. * \p flags configures the behavior of the function using an optional OR'ed set of
  389. * ::hwloc_distances_add_flag_e.
  390. * It may be used to request the grouping of existing objects based on distances.
  391. *
  392. * On error, the temporary distances structure and its content are destroyed.
  393. *
  394. * \return 0 on success.
  395. * \return -1 on error.
  396. */
  397. HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
  398. hwloc_distances_add_handle_t handle,
  399. unsigned long flags);
  400. /** @} */
  401. /** \defgroup hwlocality_distances_remove Remove distances between objects
  402. * @{
  403. */
  404. /** \brief Remove all distance matrices from a topology.
  405. *
  406. * Remove all distance matrices, either provided by the user or
  407. * gathered through the OS.
  408. *
  409. * If these distances were used to group objects, these additional
  410. * Group objects are not removed from the topology.
  411. *
  412. * \return 0 on success, -1 on error.
  413. */
  414. HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
  415. /** \brief Remove distance matrices for objects at a specific depth in the topology.
  416. *
  417. * Identical to hwloc_distances_remove() but only applies to one level of the topology.
  418. *
  419. * \return 0 on success, -1 on error.
  420. */
  421. HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth);
  422. /** \brief Remove distance matrices for objects of a specific type in the topology.
  423. *
  424. * Identical to hwloc_distances_remove() but only applies to one level of the topology.
  425. *
  426. * \return 0 on success, -1 on error.
  427. */
  428. static __hwloc_inline int
  429. hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
  430. {
  431. int depth = hwloc_get_type_depth(topology, type);
  432. if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
  433. return 0;
  434. return hwloc_distances_remove_by_depth(topology, depth);
  435. }
  436. /** \brief Release and remove the given distance matrice from the topology.
  437. *
  438. * This function includes a call to hwloc_distances_release().
  439. *
  440. * \return 0 on success, -1 on error.
  441. */
  442. HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
  443. /** @} */
  444. #ifdef __cplusplus
  445. } /* extern "C" */
  446. #endif
  447. #endif /* HWLOC_DISTANCES_H */