NEWS.txt 98 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073
  1. Copyright © 2009 CNRS
  2. Copyright © 2009-2023 Inria. All rights reserved.
  3. Copyright © 2009-2013 Université Bordeaux
  4. Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
  5. Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
  6. $COPYRIGHT$
  7. Additional copyrights may follow
  8. $HEADER$
  9. ===========================================================================
  10. This file contains the main features as well as overviews of specific
  11. bug fixes (and other actions) for each version of hwloc since version
  12. 0.9.
  13. Version 2.9.3
  14. -------------
  15. * Handle Linux glibc allocation errors in binding routines (CVE-2022-47022).
  16. * Fix hwloc-calc when searching objects on heterogeneous memory platforms,
  17. thanks to Antoine Morvan for the report.
  18. * Fix hwloc_get_next_child() when there are some memory-side caches.
  19. * Don't crash if the topology is empty because Linux cgroups are wrong.
  20. * Improve some hwloc-bind warnings in case of command-line parsing errors.
  21. * Many documentation improvements all over the place, including:
  22. + hwloc_topology_restrict() and hwloc_topology_insert_group() may reorder
  23. children, causing the logical indexes of objects to change.
  24. Version 2.9.2
  25. -------------
  26. * Don't forget L3i when defining filters for multiple levels of caches
  27. with hwloc_topology_set_cache/icache_types_filter().
  28. * Fix object total_memory after hwloc_topology_insert_group_object().
  29. * Fix the (non-yet) exporting in synthetic description for complex memory
  30. hierarchies with memory-side caches, etc.
  31. * Fix some default size attributes when building synthetic topologies.
  32. * Fix size units in hwloc-annotate.
  33. * Improve bitmap reallocation error management in many functions.
  34. * Documentation improvements:
  35. + Better document return values of functions.
  36. + Add "Error reporting" section (in hwloc.h and in the doxygen doc).
  37. + Add FAQ entry "What may I disable to make hwloc faster?"
  38. + Improve FAQ entries "Why is lstopo slow?" and
  39. "I only need ..., why should I use hwloc?"
  40. + Clarify how to deal with cpukinds in hwloc-calc and hwloc-bind
  41. manpages.
  42. Version 2.9.1
  43. -------------
  44. * Don't forget to apply object type filters to "perflevel" caches detected
  45. on recent Mac OS X releases, thanks to Michel Lesoinne for the report.
  46. * Fix a failed assertion in hwloc_topology_restrict() when some NUMA nodes
  47. are removed because of HWLOC_RESTRICT_FLAG_REMOVE_CPULESS but no PUs are.
  48. Thanks to Mark Grondona for reporting the issue.
  49. * Mark HPE Cray Slingshot NICs with subtype "Slingshot".
  50. Version 2.9.0
  51. -------------
  52. * Backends
  53. + Expose the memory size of CXL memory devices (Type 3) on Linux.
  54. + The LevelZero backend now reports the "XeLinkBandwidth" distance
  55. matrix between L0 devices (and subdevices) when available.
  56. + Add support for CUDA compute capability up to 9.0.
  57. * Tools
  58. + lstopo now switches to console mode when its output is redirected.
  59. Graphical window mode may be forced back with --of window.
  60. + hwloc-calc now accepts "numa" in -H, and I/O subtypes such as "gpu"
  61. in -I and -N.
  62. Version 2.8.0
  63. -------------
  64. * API
  65. + Add HWLOC_TOPOLOGY_FLAG_NO_DISTANCES, _NO_MEMATTRS and _NO_CPUKINDS
  66. to reduce the overhead when unneeded.
  67. + Add separate Read/Write Bandwidth/Latency memory attributes and
  68. implement them on Linux.
  69. * Backends
  70. + NUMA nodes may now have a subtype such as DRAM, HBM, SPM, or NVM
  71. on heterogeneous memory platforms on Linux.
  72. - Add DAXType and DAXParent attributes on Linux to tell where a
  73. DAX device or its corresponding NUMA node come from (SPM for
  74. Specific-Purpose or NVM for Non-Volatile Memory).
  75. + Detect heterogeneous caches in hybrid CPUs on MacOS X,
  76. thanks to Paul Bone for the help.
  77. + Max frequencies are not ignored in Linux cpukinds anymore (they were
  78. ignored in hwloc 2.7.0), but they may be slightly adjusted to avoid
  79. reporting hybrid CPUs because Intel Turbo Boost Max 3.0.
  80. - See the documentation of environment variable HWLOC_CPUKINDS_MAXFREQ.
  81. + Hardwire the PCI locality of HPE Cray EX235a nodes.
  82. * Tools
  83. + lstopo and other tools may now load Linux and x86 cpuid topology files
  84. from a tarball.
  85. + lstopo may now replace the P# and L# index prefixes with custom strings
  86. thanks to --os-index-prefix and --logical-index-prefix options.
  87. * Misc
  88. + Add --disable-readme to avoid regenerating the top-level hwloc README
  89. file from the documentation.
  90. Version 2.7.2
  91. -------------
  92. * Fix a crash when LevelZero devices have multiple subdevices,
  93. e.g. on PonteVecchio GPUs, thanks to Jonathan Peyton.
  94. * Fix a leak when importing cpukinds from XML,
  95. thanks to Hui Zhou.
  96. Version 2.7.1
  97. -------------
  98. * Workaround crashes when virtual machines report incoherent x86 CPUID
  99. information about numbers of cores and threads.
  100. Thanks to Peter Bense for the report.
  101. * Use setenv() instead of putenv() when trying to force enable oneAPI L0
  102. support, to avoid issues with applications that touch the environment,
  103. thanks to Josh Hursey for the patch.
  104. * Add some warnings at the end of configure when GPU libraries are
  105. missing on the system or their path is missing in the environment.
  106. Version 2.7.0
  107. -------------
  108. * Backends
  109. + Add support for NUMA nodes and caches with more than 64 PUs across
  110. multiple processor groups on Windows 11 and Windows Server 2022.
  111. + Group objects are not created for Windows processor groups anymore,
  112. except if HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS=1 in the environment.
  113. + Expose "Cluster" group objects on Linux kernel 5.16+ for CPUs
  114. that share some internal cache or bus. This can be equivalent
  115. to the L2 Cache level on some platforms (e.g. x86) or a specific
  116. level between L2 and L3 on others (e.g. ARM Kungpeng 920).
  117. Thanks to Jonathan Cameron for the help.
  118. - HWLOC_DONT_MERGE_CLUSTER_GROUPS=1 may be set in the environment
  119. to prevent these groups from being merged with identical caches, etc.
  120. + Improve the oneAPI LevelZero backend:
  121. - Expose subdevices such as "ze0.1" inside root OS devices ("ze0")
  122. when the hardware contains multiple subdevices.
  123. - Add many new attributes to describe device type, and the
  124. numbers of slices, subslices, execution units and threads.
  125. - Expose the memory information as LevelZeroHBM/DDR/MemorySize infos.
  126. + Ignore the max frequencies of cores in Linux cpukinds when the
  127. base frequencies are available (to avoid exposing hybrid CPUs
  128. when Intel Turbo Boost Max 3.0 gives slightly different max
  129. frequencies to CPU cores).
  130. - May be reverted by setting HWLOC_CPUKINDS_MAXFREQ=1 in the environment.
  131. * Tools
  132. + Add --grey and --palette options to switch lstopo to greyscale or
  133. white-background-only graphics, or to tune individual colors.
  134. * Build
  135. + Windows CMake builds now support non-MSVC compilers, detect several
  136. features at build time, can build/run tests, etc.
  137. Thanks to Michael Hirsch and Alexander Neumann .
  138. Version 2.6.0
  139. -------------
  140. * Backends
  141. + Expose two cpukinds for energy-efficient cores (icestorm) and
  142. high-performance cores (firestorm) on Apple M1 on Mac OS X.
  143. + Use sysfs CPU "capacity" to rank hybrid cores by efficiency
  144. on Linux when available (mostly on recent ARM platforms for now).
  145. + Improve HWLOC_MEMBIND_BIND (without the STRICT flag) on Linux kernel
  146. >= 5.15: If more than one node is given, the kernel may now use all
  147. of them instead of only the first one before falling back to others.
  148. + Expose cache os_index when available on Linux, it may be needed
  149. when using resctrl to configure cache partitioning, memory bandwidth
  150. monitoring, etc.
  151. + Add a "XGMIHops" distances matrix in the RSMI backend for AMD GPU
  152. interconnected through XGMI links.
  153. + Expose AMD GPU memory information (VRAM and GTT) in the RSMI backend.
  154. + Add OS devices such as "bxi0" for Atos/Bull BXI HCAs on Linux.
  155. * Tools
  156. + lstopo has a better placement algorithm with respect to I/O
  157. objects, see --children-order in the manpage for details.
  158. + hwloc-annotate may now change object subtypes and cache or memory
  159. sizes.
  160. * Build
  161. + Allow to specify the ROCm installation for building the RSMI backend:
  162. - Use a custom installation path if specified with --with-rocm=<dir>.
  163. - Use /opt/rocm-<version> if specified with --with-rocm-version=<version>
  164. or the ROCM_VERSION environment variable.
  165. - Try /opt/rocm if it exists.
  166. - See "How do I enable ROCm SMI and select which version to use?"
  167. in the FAQ for details.
  168. + Add a CMakeLists for Windows under contrib/windows-cmake/ .
  169. * Documentation
  170. + Add FAQ entry "How do I create a custom heterogeneous and
  171. asymmetric topology?"
  172. Version 2.5.0
  173. -------------
  174. * API
  175. + Add hwloc/windows.h to query Windows processor groups.
  176. + Add hwloc_get_obj_with_same_locality() to convert between objects
  177. with same locality, for instance NUMA nodes and Packages,
  178. or OS devices within a PCI device.
  179. + Add hwloc_distances_transform() to modify distances structures.
  180. - hwloc-annotate and lstopo have new distances-transform options.
  181. + hwloc_distances_add() is replaced with _add_create() followed by
  182. _add_values() and _add_commit(). See hwloc/distances.h for details.
  183. + Add topology flags to mitigate binding modifications during
  184. hwloc discovery, especially on Windows:
  185. - HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING and _MEMBINDING
  186. restrict discovery to PUs and NUMA nodes inside the binding.
  187. - HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING prevents from ever
  188. changing the binding during discovery.
  189. * Backends
  190. + Add a levelzero backend for oneAPI L0 devices, exposed as OS devices
  191. of subtype "LevelZero" and name such as "ze0".
  192. - Add hwloc/levelzero.h for interoperability between converting
  193. between L0 API devices and hwloc cpusets or OS devices.
  194. + Expose NEC Vector Engine cards on Linux as OS devices of subtype
  195. "VectorEngine" and name "ve0", etc.
  196. Thanks to Anara Kozhokanova, Tim Cramer and Erich Focht for the help.
  197. + Add a NVLinkBandwidth distances structure between NVIDIA GPUs
  198. (and POWER processor or NVSwitches) in the NVML backend,
  199. and a XGMIBandwidth distances structure between AMD GPUs
  200. in the RSMI backends.
  201. - See "Topology Attributes: Distances, Memory Attributes and CPU Kinds"
  202. in the documentation for details about these new distances.
  203. + Add support for NUMA node 0 being offline in Linux, thanks to Jirka Hladky.
  204. * Build
  205. + Add --with-cuda-version=<version> or look at the CUDA_VERSION
  206. environment variable to find the appropriate CUDA pkg-config files.
  207. Thanks to Stephen Herbein for the suggestion.
  208. - Also add --with-cuda=<dir> to specify the CUDA installation path
  209. manually (and its NVML and OpenCL components).
  210. Thanks to Andrea Bocci for the suggestion.
  211. - See "How do I enable CUDA and select which CUDA version to use?"
  212. in the FAQ for details.
  213. * Tools
  214. + lstopo now has a --windows-processor-groups option on Windows.
  215. + hwloc-ps now has a --short-name option to avoid long/truncated
  216. command path.
  217. + hwloc-ps now has a --single-ancestor option to return a single
  218. (possibly too large) object where a process is bound.
  219. + hwloc-ps --pid-cmd may now query environment variables,
  220. including MPI-specific variables to find out process ranks.
  221. Version 2.4.1
  222. -------------
  223. * Fix AMD OpenCL device locality when PCI bus or device number >= 128.
  224. Thanks to Edgar Leon for reporting the issue.
  225. + Applications using any of the following inline functions must
  226. be recompiled to get the fix: hwloc_opencl_get_device_pci_busid()
  227. hwloc_opencl_get_device_cpuset(), hwloc_opencl_get_device_osdev().
  228. * Fix the ranking of cpukinds on non-Windows systems,
  229. thanks to Ivan Kochin for the report.
  230. * Fix the insertion of custom Groups after loading the topology,
  231. thanks to Scott Hicks.
  232. * Add support for CPU0 being offline in Linux, thanks to Garrett Clay.
  233. * Fix missing x86 Package and Core objects FreeBSD/NetBSD.
  234. Thanks to Thibault Payet and Yuri Victorovich for the report.
  235. * Fix the import of very large distances with heterogeneous object types.
  236. * Fix a memory leak in the Linux backend,
  237. thanks to Perceval Anichini.
  238. Version 2.4.0
  239. -------------
  240. * API
  241. + Add hwloc/cpukinds.h for reporting information about hybrid CPUs.
  242. - Use Linux cpufreq frequencies to rank cores by efficiency.
  243. - Use x86 CPUID hybrid leaf and future Linux kernels sysfs CPU type
  244. files to identify Intel Atom and Core cores.
  245. - Use the Windows native EfficiencyClass to separate kinds.
  246. * Backends
  247. + Properly handle Linux kernel 5.10+ exposing ACPI HMAT information
  248. with knowledge of Generic Initiators.
  249. * Tools
  250. + lstopo has new --cpukinds and --no-cpukinds options for showing
  251. CPU kinds or not in textual and graphical modes respectively.
  252. + hwloc-calc has a new --cpukind option for filtering PUs by kind.
  253. + hwloc-annotate has a new cpukind command for modifying CPU kinds.
  254. * Misc
  255. + Fix hwloc_bitmap_nr_ulongs(), thanks to Norbert Eicker.
  256. + Add a documentation section about
  257. "Topology Attributes: Distances, Memory Attributes and CPU Kinds".
  258. + Silence some spurious warnings in the OpenCL backend and when showing
  259. process binding with lstopo --ps.
  260. Version 2.3.0
  261. -------------
  262. * API
  263. + Add hwloc/memattrs.h for exposing latency/bandwidth information
  264. between initiators (CPU sets for now) and target NUMA nodes,
  265. typically on heterogeneous platforms.
  266. - When available, bandwidths and latencies are read from the ACPI HMAT
  267. table exposed by Linux kernel 5.2+.
  268. - Attributes may also be customized to expose user-defined performance
  269. information.
  270. + Add hwloc_get_local_numanode_objs() for listing NUMA nodes that are
  271. local to some locality.
  272. + The new topology flag HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT causes
  273. support arrays to be loaded from XML exported with hwloc 2.3+.
  274. - hwloc_topology_get_support() now returns an additional "misc"
  275. array with feature "imported_support" set when support was imported.
  276. + Add hwloc_topology_refresh() to refresh internal caches after modifying
  277. the topology and before consulting the topology in a multithread context.
  278. * Backends
  279. + Add a ROCm SMI backend and a hwloc/rsmi.h helper file for getting
  280. the locality of AMD GPUs, now exposed as "rsmi" OS devices.
  281. Thanks to Mike Li.
  282. + Remove POWER device-tree-based topology on Linux,
  283. (it was disabled by default since 2.1).
  284. * Tools
  285. + Command-line options for specifying flags now understand comma-separated
  286. lists of flag names (substrings).
  287. + hwloc-info and hwloc-calc have new --local-memory --local-memory-flags
  288. and --best-memattr options for reporting local memory nodes and filtering
  289. by memory attributes.
  290. + hwloc-bind has a new --best-memattr option for filtering by memory attributes
  291. among the memory binding set.
  292. + Tools that have a --restrict option may now receive a nodeset or
  293. some custom flags for restricting the topology.
  294. + lstopo now has a --thickness option for changing line thickness in the
  295. graphical output.
  296. + Fix lstopo drawing when autoresizing on Windows 10.
  297. + Pressing the F5 key in lstopo X11 and Windows graphical/interactive outputs
  298. now refreshes the display according to the current topology and binding.
  299. + Add a tikz lstopo graphical backend to generate picture easily included into
  300. LaTeX documents. Thanks to Clement Foyer.
  301. * Misc
  302. + The default installation path of the Bash completion file has changed to
  303. ${datadir}/bash-completion/completions/hwloc. Thanks to Tomasz Kłoczko.
  304. Version 2.2.0
  305. -------------
  306. * API
  307. + Add hwloc_bitmap_singlify_by_core() to remove SMT from a given cpuset,
  308. thanks to Florian Reynier for the suggestion.
  309. + Add --enable-32bits-pci-domain to stop ignoring PCI devices with domain
  310. >16bits (e.g. 10000:02:03.4). Enabling this option breaks the library ABI.
  311. Thanks to Dylan Simon for the help.
  312. * Backends
  313. + Add support for Linux cgroups v2.
  314. + Add NUMA support for FreeBSD.
  315. + Add get_last_cpu_location support for FreeBSD.
  316. + Remove support for Intel Xeon Phi (MIC, Knights Corner) co-processors.
  317. * Tools
  318. + Add --uid to filter the hwloc-ps output by uid on Linux.
  319. + Add a GRAPHICAL OUTPUT section in the manpage of lstopo.
  320. * Misc
  321. + Use the native dlopen instead of libltdl,
  322. unless --disable-plugin-dlopen is passed at configure time.
  323. Version 2.1.0
  324. -------------
  325. * API
  326. + Add a new "Die" object (HWLOC_OBJ_DIE) for upcoming x86 processors
  327. with multiple dies per package, in the x86 and Linux backends.
  328. + Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches.
  329. - They are filtered-out by default, except in command-line tools.
  330. - They are only available on very recent platforms running Linux 5.2+
  331. and uptodate ACPI tables.
  332. - The KNL MCDRAM in cache mode is still exposed as a L3 unless
  333. HWLOC_KNL_MSCACHE_L3=0 in the environment.
  334. + Add HWLOC_RESTRICT_FLAG_BYNODESET and _REMOVE_MEMLESS for restricting
  335. topologies based on some memory nodes.
  336. + Add hwloc_topology_set_components() for blacklisting some components
  337. from being enabled in a topology.
  338. + Add hwloc_bitmap_nr_ulongs() and hwloc_bitmap_from/to_ulongs(),
  339. thanks to Junchao Zhang for the suggestion.
  340. + Improve the API for dealing with disallowed resources
  341. - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM is replaced with FLAG_INCLUDE_DISALLOWED
  342. and --whole-system command-line options with --disallowed.
  343. . Former names are still accepted for backward compatibility.
  344. - Add hwloc_topology_allow() for changing allowed sets after load().
  345. - Add the HWLOC_ALLOW=all environment variable to totally ignore
  346. administrative restrictions such as Linux Cgroups.
  347. - Add disallowed_pu and disallowed_numa bits to the discovery support
  348. structure.
  349. + Group objects have a new "dont_merge" attribute to prevent them from
  350. being automatically merged with identical parent or children.
  351. + Add more distances-related features:
  352. - Add hwloc_distances_get_name() to retrieve a string describing
  353. what a distances structure contain.
  354. - Add hwloc_distances_get_by_name() to retrieve distances structures
  355. based on their name.
  356. - Add hwloc_distances_release_remove()
  357. - Distances may now cover objects of different types with new kind
  358. HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES.
  359. * Backends
  360. + Add support for Linux 5.3 new sysfs cpu topology files with Die information.
  361. + Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
  362. + Improve memory locality on Linux by using HMAT initiators (exposed
  363. since Linux 5.2+), and NUMA distances for CPU-less NUMA nodes.
  364. + The x86 backend now properly handles offline CPUs.
  365. + Detect the locality of NVIDIA GPU OpenCL devices.
  366. + Ignore NUMA nodes that correspond to NVIDIA GPU by default.
  367. - They may be unignored if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment.
  368. - Fix their CPU locality and add info attributes to identify them.
  369. Thanks to Max Katz and Edgar Leon for the help.
  370. + Add support for IBM S/390 drawers.
  371. + Rework the heuristics for discovering KNL Cluster and Memory modes
  372. to stop assuming all CPUs are online (required for mOS support).
  373. Thanks to Sharath K Bhat for testing patches.
  374. + Ignore NUMA node information from AMD topoext in the x86 backend,
  375. unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
  376. + Expose Linux DAX devices as hwloc Block OS devices.
  377. + Remove support for /proc/cpuinfo-only topology discovery in Linux
  378. kernel prior to 2.6.16.
  379. + Disable POWER device-tree-based topology on Linux by default.
  380. - It may be reenabled by setting HWLOC_USE_DT=1 in the environment.
  381. + Discovery components are now divided in phases that may be individually
  382. blacklisted.
  383. - The linuxio component has been merged back into the linux component.
  384. * Tools
  385. + lstopo
  386. - lstopo factorizes objects by default in the graphical output when
  387. there are more than 4 identical children.
  388. . New options --no-factorize and --factorize may be used to configure this.
  389. . Hit the 'f' key to disable factorizing in interactive outputs.
  390. - Both logical and OS/physical indexes are now displayed by default
  391. for PU and NUMA nodes.
  392. - The X11 and Windows interactive outputs support many keyboard
  393. shortcuts to dynamically customize the attributes, legend, etc.
  394. - Add --linespacing and change default margins and linespacing.
  395. - Add --allow for changing allowed sets.
  396. - Add a native SVG backend. Its graphical output may be slightly less
  397. pretty than Cairo (still used by default if available) but the SVG
  398. code provides attributes to manipulate objects from HTML/JS.
  399. See dynamic_SVG_example.html for an example.
  400. + Add --nodeset options to hwloc-calc for converting between cpusets and
  401. nodesets.
  402. + Add --no-smt to lstopo, hwloc-bind and hwloc-calc to ignore multiple
  403. PU in SMT cores.
  404. + hwloc-annotate may annotate multiple locations at once.
  405. + Add a HTML/JS version of hwloc-ps. See contrib/hwloc-ps.www/README.
  406. + Add bash completions.
  407. * Misc
  408. + Add several FAQ entries in "Compatibility between hwloc versions"
  409. about API version, ABI, XML, Synthetic strings, and shmem topologies.
  410. Version 2.0.4 (also included in 1.11.13 when appropriate)
  411. -------------
  412. * Add support for Linux 5.3 new sysfs cpu topology files with Die information.
  413. * Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
  414. * Tiles, Modules and Dies are exposed as Groups for now.
  415. + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent
  416. Die groups from being automatically merged with identical parent or children.
  417. * Ignore NUMA node information from AMD topoext in the x86 backend,
  418. unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
  419. * Group objects have a new "dont_merge" attribute to prevent them from
  420. being automatically merged with identical parent or children.
  421. Version 2.0.3 (also included in 1.11.12 when appropriate)
  422. -------------
  423. * Fix build on Cygwin, thanks to Marco Atzeri for the patches.
  424. * Fix a corner case of hwloc_topology_restrict() where children would
  425. become out-of-order.
  426. * Fix the return length of export_xmlbuffer() functions to always
  427. include the ending \0.
  428. * Fix lstopo --children-order argument parsing.
  429. Version 2.0.2 (also included in 1.11.11 when appropriate)
  430. -------------
  431. * Add support for Hygon Dhyana processors in the x86 backend,
  432. thanks to Pu Wen for the patch.
  433. * Fix symbol renaming to also rename internal components,
  434. thanks to Evan Ramos for the patch.
  435. * Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues.
  436. * Detect PCI link speed without being root on Linux >= 4.13.
  437. * Add HWLOC_VERSION* macros to the public headers,
  438. thanks to Gilles Gouaillardet for the suggestion.
  439. Version 2.0.1 (also included in 1.11.10 when relevant)
  440. -------------
  441. * Bump the library soname to 15:0:0 to avoid conflicts with hwloc 1.11.x
  442. releases. The hwloc 2.0.0 soname was buggy (12:0:0), applications will
  443. have to be recompiled.
  444. * Serialize pciaccess discovery to fix concurrent topology loads in
  445. multiple threads.
  446. * Fix hwloc-dump-hwdata to only process SMBIOS information that correspond
  447. to the KNL and KNM configuration.
  448. * Add a heuristic for guessing KNL/KNM memory and cluster modes when
  449. hwloc-dump-hwdata could not run as root earlier.
  450. * Add --no-text lstopo option to remove text from some boxes in the
  451. graphical output. Mostly useful for removing Group labels.
  452. * Some minor fixes to memory binding.
  453. Version 2.0.0
  454. -------------
  455. *** The ABI of the library has changed. ***
  456. For instance some hwloc_obj fields were reordered, added or removed, see below.
  457. + HWLOC_API_VERSION and hwloc_get_api_version() now give 0x00020000.
  458. + See "How do I handle ABI breaks and API upgrades ?" in the FAQ
  459. and "Upgrading to hwloc 2.0 API" in the documentation.
  460. * Major API changes
  461. + Memory, I/O and Misc objects are now stored in dedicated children lists,
  462. not in the usual children list that is now only used for CPU-side objects.
  463. - hwloc_get_next_child() may still be used to iterate over these 4 lists
  464. of children at once.
  465. - hwloc_obj_type_is_normal(), _memory() and _io() may be used to check
  466. the kind of a given object type.
  467. + Topologies always have at least one NUMA object. On non-NUMA machines,
  468. a single NUMA object is added to describe the entire machine memory.
  469. The NUMA level cannot be ignored anymore.
  470. + The NUMA level is special since NUMA nodes are not in the main hierarchy
  471. of objects anymore. Its depth is a fake negative depth that should not be
  472. compared with normal levels.
  473. - If all memory objects are attached to parents at the same depth,
  474. it may be retrieved with hwloc_get_memory_parents_depth().
  475. + The HWLOC_OBJ_CACHE type is replaced with 8 types HWLOC_OBJ_L[1-5]CACHE
  476. and HWLOC_OBJ_L[1-3]ICACHE that remove the need to disambiguate levels
  477. when looking for caches with _by_type() functions.
  478. - New hwloc_obj_type_is_{,d,i}cache() functions may be used to check whether
  479. a given type is a cache.
  480. + Reworked ignoring/filtering API
  481. - Replace hwloc_topology_ignore*() functions with hwloc_topology_set_type_filter()
  482. and hwloc_topology_set_all_types_filter().
  483. . Contrary to hwloc_topology_ignore_{type,all}_keep_structure() which
  484. removed individual objects, HWLOC_TYPE_FILTER_KEEP_STRUCTURE only removes
  485. entire levels (so that topology do not become too asymmetric).
  486. - Remove HWLOC_TOPOLOGY_FLAG_ICACHES in favor of hwloc_topology_set_icache_types_filter()
  487. with HWLOC_TYPE_FILTER_KEEP_ALL.
  488. - Remove HWLOC_TOPOLOGY_FLAG_IO_DEVICES, _IO_BRIDGES and _WHOLE_IO in favor of
  489. hwloc_topology_set_io_types_filter() with HWLOC_TYPE_FILTER_KEEP_ALL or
  490. HWLOC_TYPE_FILTER_KEEP_IMPORTANT.
  491. + The distance API has been completely reworked. It is now described
  492. in hwloc/distances.h.
  493. + Return values
  494. - Most functions in hwloc/bitmap.h now return an int that may be negative
  495. in case of failure to realloc/extend the internal storage of a bitmap.
  496. - hwloc_obj_add_info() also returns an int in case allocations fail.
  497. * Minor API changes
  498. + Object attributes
  499. - obj->memory is removed.
  500. . local_memory and page_types attributes are now in obj->attr->numanode
  501. . total_memory moves obj->total_memory.
  502. - Objects do not have allowed_cpuset and allowed_nodeset anymore.
  503. They are only available for the entire topology using
  504. hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset().
  505. - Objects now have a "subtype" field that supersedes former "Type" and
  506. "CoProcType" info attributes.
  507. + Object and level depths are now signed ints.
  508. + Object string printing and parsing
  509. - hwloc_type_sscanf() deprecates the old hwloc_obj_type_sscanf().
  510. - hwloc_type_sscanf_as_depth() is added to convert a type name into
  511. a level depth.
  512. - hwloc_obj_cpuset_snprintf() is deprecated in favor of hwloc_bitmap_snprintf().
  513. + Misc objects
  514. - Replace hwloc_topology_insert_misc_object_by_cpuset() with
  515. hwloc_topology_insert_group_object() to precisely specify the location
  516. of an additional hierarchy level in the topology.
  517. - Misc objects have their own level and depth to iterate over all of them.
  518. - Misc objects may now only be inserted as a leaf object with
  519. hwloc_topology_insert_misc_object() which deprecates
  520. hwloc_topology_insert_misc_object_by_parent().
  521. + hwloc_topology_restrict() doesn't remove objects that contain memory
  522. by default anymore.
  523. - The list of existing restrict flags was modified.
  524. + The discovery support array now contains some NUMA specific bits.
  525. + XML export functions take an additional flags argument,
  526. for instance for exporting XMLs that are compatible with hwloc 1.x.
  527. + Functions diff_load_xml*(), diff_export_xml*() and diff_destroy() in
  528. hwloc/diff.h do not need a topology as first parameter anymore.
  529. + hwloc_parse_cpumap_file () superseded by hwloc_linux_read_path_as_cpumask()
  530. in hwloc/linux.h.
  531. + HWLOC_MEMBIND_DEFAULT and HWLOC_MEMBIND_FIRSTTOUCH were clarified.
  532. * New APIs and Features
  533. + Add hwloc/shmem.h for sharing topologies between processes running on
  534. the same machine (for reducing the memory footprint).
  535. + Add the experimental netloc subproject. It is disabled by default
  536. and can be enabled with --enable-netloc.
  537. It currently brings command-line tools to gather and visualize the
  538. topology of InfiniBand fabrics, and an API to convert such topologies
  539. into Scotch architectures for process mapping.
  540. See the documentation for details.
  541. * Removed APIs and features
  542. + Remove the online_cpuset from struct hwloc_obj. Offline PUs get unknown
  543. topologies on Linux nowadays, and wrong topology on Solaris. Other OS
  544. do not support them. And one cannot do much about them anyway. Just keep
  545. them in complete_cpuset.
  546. + Remove the now-unused "System" object type HWLOC_OBJ_SYSTEM,
  547. defined to MACHINE for backward compatibility.
  548. + The almost-unused "os_level" attribute has been removed from the
  549. hwloc_obj structure.
  550. + Remove the custom interface for assembling the topologies of different
  551. nodes as well as the hwloc-assembler tools.
  552. + hwloc_topology_set_fsroot() is removed, the environment variable
  553. HWLOC_FSROOT may be used for the same remote testing/debugging purpose.
  554. + Remove the deprecated hwloc_obj_snprintf(), hwloc_obj_type_of_string(),
  555. hwloc_distribute[v]().
  556. * Remove Myrinet Express interoperability (hwloc/myriexpress.h).
  557. + Remove Kerrighed support from the Linux backend.
  558. + Remove Tru64 (OSF/1) support.
  559. - Remove HWLOC_MEMBIND_REPLICATE which wasn't available anywhere else.
  560. * Backend improvements
  561. + Linux
  562. - OS devices do not have to be attached through PCI anymore,
  563. for instance enabling the discovery of NVDIMM block devices.
  564. - Remove the dependency on libnuma.
  565. - Add a SectorSize attribute to block OS devices.
  566. + Mac OS X
  567. - Fix detection of cores and hyperthreads.
  568. - Add CPUVendor, Model, ... attributes.
  569. + Windows
  570. - Add get_area_memlocation().
  571. * Tools
  572. + lstopo and hwloc-info have a new --filter option matching the new filtering API.
  573. + lstopo can be given --children-order=plain to force a basic displaying
  574. of memory and normal children together below their parent.
  575. + hwloc-distances was removed and replaced with lstopo --distances.
  576. * Misc
  577. + Exports
  578. - Exporting to synthetic now ignores I/O and Misc objects.
  579. + PCI discovery
  580. - Separate OS device discovery from PCI discovery. Only the latter is disabled
  581. with --disable-pci at configure time. Both may be disabled with --disable-io.
  582. - The `linuxpci' component is now renamed into `linuxio'.
  583. - The old `libpci' component name from hwloc 1.6 is not supported anymore,
  584. only the `pci' name from hwloc 1.7 is now recognized.
  585. - The HWLOC_PCI_<domain>_<bus>_LOCALCPUS environment variables are superseded
  586. with a single HWLOC_PCI_LOCALITY where bus ranges may be specified.
  587. - Do not set PCI devices and bridges name automatically. Vendor and device
  588. names are already in info attributes.
  589. + Components and discovery
  590. - Add HWLOC_SYNTHETIC environment variable to enforce a synthetic topology
  591. as if hwloc_topology_set_synthetic() had been called.
  592. - HWLOC_COMPONENTS doesn't support xml or synthetic component attributes
  593. anymore, they should be passed in HWLOC_XMLFILE or HWLOC_SYNTHETIC instead.
  594. - HWLOC_COMPONENTS takes precedence over other environment variables
  595. for selecting components.
  596. + hwloc now requires a C99 compliant compiler.
  597. Version 1.11.13 (also included in 2.0.4)
  598. ---------------
  599. * Add support for Linux 5.3 new sysfs cpu topology files with Die information.
  600. * Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
  601. * Tiles, Modules and Dies are exposed as Groups for now.
  602. + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent
  603. Die groups from being automatically merged with identical parent or children.
  604. * Ignore NUMA node information from AMD topoext in the x86 backend,
  605. unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
  606. * Group objects have a new "dont_merge" attribute to prevent them from
  607. being automatically merged with identical parent or children.
  608. Version 1.11.12 (also included in 2.0.3)
  609. ---------------
  610. * Fix a corner case of hwloc_topology_restrict() where children would
  611. become out-of-order.
  612. * Fix the return length of export_xmlbuffer() functions to always
  613. include the ending \0.
  614. Version 1.11.11 (also included in 2.0.2)
  615. ---------------
  616. * Add support for Hygon Dhyana processors in the x86 backend,
  617. thanks to Pu Wen for the patch.
  618. * Fix symbol renaming to also rename internal components,
  619. thanks to Evan Ramos for the patch.
  620. * Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues.
  621. * Detect PCI link speed without being root on Linux >= 4.13.
  622. Version 1.11.10 (also included in 2.0.1)
  623. ---------------
  624. * Fix detection of cores and hyperthreads on Mac OS X.
  625. * Serialize pciaccess discovery to fix concurrent topology loads in
  626. multiple threads.
  627. * Fix first touch area memory binding on Linux when thread memory
  628. binding is different.
  629. * Some minor fixes to memory binding.
  630. * Fix hwloc-dump-hwdata to only process SMBIOS information that correspond
  631. to the KNL and KNM configuration.
  632. * Add a heuristic for guessing KNL/KNM memory and cluster modes when
  633. hwloc-dump-hwdata could not run as root earlier.
  634. * Fix discovery of NVMe OS devices on Linux >= 4.0.
  635. * Add get_area_memlocation() on Windows.
  636. * Add CPUVendor, Model, ... attributes on Mac OS X.
  637. Version 1.11.9
  638. --------------
  639. * Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend,
  640. thanks to Jeff Zhao for the patch.
  641. * Fix AMD Epyc 24-core L3 cache locality in the x86 backend.
  642. * Don't crash in the x86 backend when the CPUID vendor string is unknown.
  643. * Fix the missing pu discovery support bit on some OS.
  644. * Fix the management of the lstopoStyle info attribute for custom colors.
  645. * Add verbose warnings when failing to load hwloc v2.0+ XMLs.
  646. Version 1.11.8
  647. --------------
  648. * Multiple Solaris improvements, thanks to Maureen Chew for the help:
  649. + Detect caches on Sparc.
  650. + Properly detect allowed/disallowed PUs and NUMA nodes with processor sets.
  651. + Add hwloc_get_last_cpu_location() support for the current thread.
  652. * Add support for CUDA compute capability 7.0 and fix support for 6.[12].
  653. * Tools improvements
  654. + Fix search for objects by physical index in command-line tools.
  655. + Add missing "cpubind:get_thisthread_last_cpu_location" in the output
  656. of hwloc-info --support.
  657. + Add --pid and --name to specify target processes in hwloc-ps.
  658. + Display thread names in lstopo and hwloc-ps on Linux.
  659. * Doc improvements
  660. + Add a FAQ entry about building on Windows.
  661. + Install missing sub-manpage for hwloc_obj_add_info() and
  662. hwloc_obj_get_info_by_name().
  663. Version 1.11.7
  664. --------------
  665. * Fix hwloc-bind --membind for CPU-less NUMA nodes (again).
  666. Thanks to Gilles Gouaillardet for reporting the issue.
  667. * Fix a memory leak on IBM S/390 platforms running Linux.
  668. * Fix a memory leak when forcing the x86 backend first on amd64/topoext
  669. platforms running Linux.
  670. * Command-line tools now support "hbm" instead "numanode" for filtering
  671. only high-bandwidth memory nodes when selecting locations.
  672. + hwloc-bind also support --hbm and --no-hbm for filtering only or
  673. no HBM nodes.
  674. Thanks to Nicolas Denoyelle for the suggestion.
  675. * Add --children and --descendants to hwloc-info for listing object
  676. children or object descendants of a specific type.
  677. * Add --no-index, --index, --no-attrs, --attrs to disable/enable display
  678. of index numbers or attributes in the graphical lstopo output.
  679. * Try to gather hwloc-dump-hwdata output from all possible locations
  680. in hwloc-gather-topology.
  681. * Updates to the documentation of locations in hwloc(7) and
  682. command-line tools manpages.
  683. Version 1.11.6
  684. --------------
  685. * Make the Linux discovery about twice faster, especially on the CPU side,
  686. by trying to avoid sysfs file accesses as much as possible.
  687. * Add support for AMD Family 17h processors (Zen) SMT cores in the Linux
  688. and x86 backends.
  689. * Add the HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES flag (and the
  690. HWLOC_THISSYSTEM_ALLOWED_RESOURCES environment variable) for reading the
  691. set of allowed resources from the local operating system even if the
  692. topology was loaded from XML or synthetic.
  693. * Fix hwloc_bitmap_set/clr_range() for infinite ranges that do not
  694. overlap currently defined ranges in the bitmap.
  695. * Don't reset the lstopo zoom scale when moving the X11 window.
  696. * lstopo now has --flags for manually setting topology flags.
  697. * hwloc_get_depth_type() returns HWLOC_TYPE_DEPTH_UNKNOWN for Misc objects.
  698. Version 1.11.5
  699. --------------
  700. * Add support for Knights Mill Xeon Phi, thanks to Piotr Luc for the patch.
  701. * Reenable distance gathering on Solaris, disabled by mistake since v1.0.
  702. Thanks to TU Wien for the help.
  703. * Fix hwloc_get_*obj*_inside_cpuset() functions to ignore objects with
  704. empty CPU sets, for instance, CPU-less NUMA nodes such as KNL MCDRAM.
  705. Thanks to Nicolas Denoyelle for the report.
  706. * Fix XML import of multiple distance matrices.
  707. * Add a FAQ entry about "hwloc is only a structural model, it ignores
  708. performance models, memory bandwidth, etc.?"
  709. Version 1.11.4
  710. --------------
  711. * Add MemoryMode and ClusterMode attributes in the Machine object on KNL.
  712. Add doc/examples/get-knl-modes.c for an example of retrieving them.
  713. Thanks to Grzegorz Andrejczuk.
  714. * Fix Linux build with -m32 with respect to libudev.
  715. Thanks to Paul Hargrove for reporting the issue.
  716. * Fix build with Visual Studio 2015, thanks to Eloi Gaudry for reporting
  717. the issue and providing the patch.
  718. * Don't forget to display OS device children in the graphical lstopo.
  719. * Fix a memory leak on Solaris, thanks to Bryon Gloden for the patch.
  720. * Properly handle realloc() failures, thanks to Bryon Gloden for reporting
  721. the issue.
  722. * Fix lstopo crash in ascii/fig/windows outputs when some objects have a
  723. lstopoStyle info attribute.
  724. Version 1.11.3
  725. --------------
  726. * Bug fixes
  727. + Fix a memory leak on Linux S/390 hosts with books.
  728. + Fix /proc/mounts parsing on Linux by using mntent.h.
  729. Thanks to Nathan Hjelm for reporting the issue.
  730. + Fix a x86 infinite loop on VMware due to the x2APIC feature being
  731. advertised without actually being fully supported.
  732. Thanks to Jianjun Wen for reporting the problem and testing the patch.
  733. + Fix the return value of hwloc_alloc() on mmap() failure.
  734. Thanks to Hugo Brunie for reporting the issue.
  735. + Fix the return value of command-line tools in some error cases.
  736. + Do not break individual thread bindings during x86 backend discovery in a
  737. multithreaded process. Thanks to Farouk Mansouri for the report.
  738. + Fix hwloc-bind --membind for CPU-less NUMA nodes.
  739. + Fix some corner cases in the XML export/import of application userdata.
  740. * API Improvements
  741. + Add HWLOC_MEMBIND_BYNODESET flag so that membind() functions accept
  742. either cpusets or nodesets.
  743. + Add hwloc_get_area_memlocation() to check where pages are actually
  744. allocated. Only implemented on Linux for now.
  745. - There's no _nodeset() variant, but the new flag HWLOC_MEMBIND_BYNODESET
  746. is supported.
  747. + Make hwloc_obj_type_sscanf() parse back everything that may be outputted
  748. by hwloc_obj_type_snprintf().
  749. * Detection Improvements
  750. + Allow the x86 backend to add missing cache levels, so that it completes
  751. what the Solaris backend lacks.
  752. Thanks to Ryan Zezeski for reporting the issue.
  753. + Do not filter-out FibreChannel PCI adapters by default anymore.
  754. Thanks to Matt Muggeridge for the report.
  755. + Add support for CUDA compute capability 6.x.
  756. * Tools
  757. + Add --support to hwloc-info to list supported features, just like with
  758. hwloc_topology_get_support().
  759. - Also add --objects and --topology to explicitly switch between the
  760. default modes.
  761. + Add --tid to let hwloc-bind operate on individual threads on Linux.
  762. + Add --nodeset to let hwloc-bind report memory binding as NUMA node sets.
  763. + hwloc-annotate and lstopo don't drop application userdata from XMLs anymore.
  764. - Add --cu to hwloc-annotate to drop these application userdata.
  765. + Make the hwloc-dump-hwdata dump directory configurable through configure
  766. options such as --runstatedir or --localstatedir.
  767. * Misc Improvements
  768. + Add systemd service template contrib/systemd/hwloc-dump-hwdata.service
  769. for launching hwloc-dump-hwdata at boot on Linux.
  770. Thanks to Grzegorz Andrejczuk.
  771. + Add HWLOC_PLUGINS_BLACKLIST environment variable to prevent some plugins
  772. from being loaded. Thanks to Alexandre Denis for the suggestion.
  773. + Small improvements for various Windows build systems,
  774. thanks to Jonathan L Peyton and Marco Atzeri.
  775. Version 1.11.2
  776. --------------
  777. * Improve support for Intel Knights Landing Xeon Phi on Linux:
  778. + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory
  779. (MCDRAM) together through "Cluster" groups so that the local MCDRAM is
  780. easy to find.
  781. - See "How do I find the local MCDRAM NUMA node on Intel Knights
  782. Landing Xeon Phi?" in the documentation.
  783. - For uniformity across all KNL configurations, always have a NUMA node
  784. object even if the host is UMA.
  785. + Fix the detection of the memory-side cache:
  786. - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information
  787. into /var/run/hwloc/ as root during boot, and load this dumped
  788. information from the hwloc library at runtime.
  789. - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights
  790. Landing Xeon Phi?" in the documentation.
  791. Thanks to Grzegorz Andrejczuk for the patches and for the help.
  792. * The x86 and linux backends may now be combined for discovering CPUs
  793. through x86 CPUID and memory from the Linux kernel.
  794. This is useful for working around buggy CPU information reported by Linux
  795. (for instance the AMD Bulldozer/Piledriver bug below).
  796. Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment.
  797. * Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer)
  798. in the x86 backend. Thanks to many users who helped.
  799. * Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1
  800. for AMD Opteron 61xx (Magny-Cours) processors.
  801. * The x86 backend may now add the info attribute Inclusive=0 or 1 to caches
  802. it discovers, or to caches discovered by other backends earlier.
  803. Thanks to Guillaume Beauchamp for the patch.
  804. * Fix the management on alloc_membind() allocation failures on AIX, HP-UX
  805. and OSF/Tru64.
  806. * Fix spurious failures to load with ENOMEM on AIX in case of Misc objects
  807. below PUs.
  808. * lstopo improvements in X11 and Windows graphical mode:
  809. + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale,
  810. or fit the entire window.
  811. + Display all keyboard shortcuts in the console.
  812. * Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0
  813. in the environment when --enable-debug was passed to configure.
  814. * Add a FAQ entry "What are these Group objects in my topology?".
  815. Version 1.11.1
  816. --------------
  817. * Detection fixes
  818. + Hardwire the topology of Fujitsu K-computer, FX10, FX100 servers to
  819. workaround buggy Linux kernels.
  820. Thanks to Takahiro Kawashima and Gilles Gouaillardet.
  821. + Fix L3 cache information on AMD Opteron 61xx Magny-Cours processors
  822. in the x86 backend. Thanks to Guillaume Beauchamp for the patch.
  823. + Detect block devices directly attached to PCI without a controller,
  824. for instance NVMe disks. Thanks to Barry M. Tannenbaum.
  825. + Add the PCISlot attribute to all PCI functions instead of only the
  826. first one.
  827. * Miscellaneous internal fixes
  828. + Ignore PCI bridges that could fail assertions by reporting buggy
  829. secondary-subordinate bus numbers
  830. Thanks to George Bosilca for reporting the issue.
  831. + Fix an overzealous assertion when inserting an intermediate Group object
  832. while Groups are totally ignored.
  833. + Fix a memory leak on Linux on AMD processors with dual-core compute units.
  834. Thanks to Bob Benner.
  835. + Fix a memory leak on failure to load a xml diff file.
  836. + Fix some segfaults when inputting an invalid synthetic description.
  837. + Fix a segfault when plugins fail to find core symbols.
  838. Thanks to Guy Streeter.
  839. * Many fixes and improvements in the Windows backend:
  840. + Fix the discovery of more than 32 processors and multiple processor
  841. groups. Thanks to Barry M. Tannenbaum for the help.
  842. + Add thread binding set support in case of multiple process groups.
  843. + Add thread binding get support.
  844. + Add get_last_cpu_location() support for the current thread.
  845. + Disable the unsupported process binding in case of multiple processor
  846. groups.
  847. + Fix/update the Visual Studio support under contrib/windows.
  848. Thanks to Eloi Gaudry for the help.
  849. * Tools fixes
  850. + Fix a segfault when displaying logical indexes in the graphical lstopo.
  851. Thanks to Guillaume Mercier for reporting the issue.
  852. + Fix lstopo linking with X11 libraries, for instance on Mac OS X.
  853. Thanks to Scott Atchley and Pierre Ramet for reporting the issue.
  854. + hwloc-annotate, hwloc-diff and hwloc-patch do not drop unavailable
  855. resources from the output anymore and those may be annotated as well.
  856. + Command-line tools may now import XML from the standard input with -i -.xml
  857. + Add missing documentation for the hwloc-info --no-icaches option.
  858. Version 1.11.0
  859. --------------
  860. * API
  861. + Socket objects are renamed into Package to align with the terminology
  862. used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket"
  863. name are still supported for backward compatibility.
  864. + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification.
  865. HWLOC_OBJ_NODE is still supported for backward compatibility.
  866. "Node" and "NUMANode" strings are supported as in earlier releases.
  867. * Detection improvements
  868. + Add support for Intel Knights Landing Xeon Phi.
  869. Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski.
  870. + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID
  871. info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe
  872. for the help.
  873. - Add --disable-libudev to avoid dependency on the libudev library.
  874. + Add "MemoryModule" Misc objects with information about DIMMs, on Linux
  875. when privileged and when I/O is enabled.
  876. Thanks to Vineet Pedaballe for the help.
  877. + Add a PCISlot attribute to PCI devices on Linux when supported to
  878. identify the physical PCI slot where the board is plugged.
  879. + Add CPUStepping info attribute on x86 processors,
  880. thanks to Thomas Röhl for the suggestion.
  881. + Ignore the device-tree on non-Power architectures to avoid buggy
  882. detection on ARM. Thanks to Orion Poplawski for reporting the issue.
  883. + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity
  884. for the PCI links on the second processor.
  885. + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz.
  886. + Many fixes to the x86 backend
  887. - Add L1i and fix L2/L3 type on old AMD processors without topoext support.
  888. - Fix Intel CPU family and model numbers when basic family isn't 6 or 15.
  889. - Fix package IDs on recent AMD processors.
  890. - Fix misc issues due to incomplete APIC IDs on x2APIC processors.
  891. - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs.
  892. + Gather total machine memory on NetBSD.
  893. * Tools
  894. + lstopo
  895. - Collapse identical PCI devices unless --no-collapse is given.
  896. This avoids gigantic outputs when a PCI device contains dozens of
  897. identical virtual functions.
  898. - The ASCII art output is now called "ascii", for instance in
  899. "lstopo -.ascii".
  900. The former "txt" extension is retained for backward compatibility.
  901. - Automatically scales graphical box width to the inner text in Cairo,
  902. ASCII and Windows outputs.
  903. - Add --rect to lstopo to force rectangular layout even for NUMA nodes.
  904. - Add --restrict-flags to configure the behavior of --restrict.
  905. - Objects may have a "Type" info attribute to specify a better type name
  906. and display it in lstopo.
  907. - Really export all verbose information to the given output file.
  908. + hwloc-annotate
  909. - May now operate on all types of objects, including I/O.
  910. - May now insert Misc objects in the topology.
  911. - Do not drop instruction caches and I/O devices from the output anymore.
  912. + Fix lstopo path in hwloc-gather-topology after install.
  913. * Misc
  914. + Fix hwloc/cudart.h for machines with multiple PCI domains,
  915. thanks to Imre Kerr for reporting the problem.
  916. + Fix PCI Bridge-specific depth attribute.
  917. + Fix hwloc_bitmap_intersect() for two infinite bitmaps.
  918. + Fix some corner cases in the building of levels on large NUMA machines
  919. with non-uniform NUMA groups and I/Os.
  920. + Improve the performance of object insertion by cpuset for large
  921. topologies.
  922. + Prefix verbose XML import errors with the source name.
  923. + Improve pkg-config checks and error messages.
  924. + Fix excluding after a component with an argument in the HWLOC_COMPONENTS
  925. environment variable.
  926. * Documentation
  927. + Fix the recommended way in documentation and examples to allocate memory
  928. on some node, it should use HWLOC_MEMBIND_BIND.
  929. Thanks to Nicolas Bouzat for reporting the issue.
  930. + Add a "Miscellaneous objects" section in the documentation.
  931. + Add a FAQ entry "What happens to my topology if I disable symmetric
  932. multithreading, hyper-threading, etc. ?" to the documentation.
  933. Version 1.10.1
  934. --------------
  935. * Actually remove disallowed NUMA nodes from nodesets when the whole-system
  936. flag isn't enabled.
  937. * Fix the gathering of PCI domains. Thanks to James Custer for reporting
  938. the issue and providing a patch.
  939. * Fix the merging of identical parent and child in presence of Misc objects.
  940. Thanks to Dave Love for reporting the issue.
  941. * Fix some misordering of children when merging with ignore_keep_structure()
  942. in partially allowed topologies.
  943. * Fix an overzealous assertion in the debug code when running on a single-PU
  944. host with I/O. Thanks to Thomas Van Doren for reporting the issue.
  945. * Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs)
  946. and OSF/Tru64 backend.
  947. * Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren
  948. for reporting the issue.
  949. * Fix support for future very large caches in the x86 backend.
  950. * Fix vendor/device names for SR-IOV PCI devices on Linux.
  951. * Fix an unlikely crash in case of buggy hierarchical distance matrix.
  952. * Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and
  953. Erik Schnetter for helping debugging.
  954. * Fix hwloc_bitmap_isincluded() in case of infinite sets.
  955. * Change hwloc-ls.desktop into a lstopo.desktop and only install it if
  956. lstopo is built with Cairo/X11 support. It cannot work with a non-graphical
  957. lstopo or hwloc-ls.
  958. * Add support for the renaming of Socket into Package in future releases.
  959. * Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE
  960. in future releases.
  961. * Clarify the documentation of distance matrices in hwloc.h and in the manpage
  962. of the hwloc-distances. Thanks to Dave Love for the suggestion.
  963. * Improve some error messages by displaying more information about the
  964. hwloc library in use.
  965. * Document how to deal with the ABI break when upgrading to the upcoming 2.0
  966. See "How do I handle ABI breaks and API upgrades ?" in the FAQ.
  967. Version 1.10.0
  968. --------------
  969. * API
  970. + Add hwloc_topology_export_synthetic() to export a topology to a
  971. synthetic string without using lstopo. See the Synthetic topologies
  972. section in the documentation.
  973. + Add hwloc_topology_set/get_userdata() to let the application save
  974. a private pointer in the topology whenever it needs a way to find
  975. its own object corresponding to a topology.
  976. + Add hwloc_get_numanode_obj_by_os_index() and document that this function
  977. as well as hwloc_get_pu_obj_by_os_index() are good at converting
  978. nodesets and cpusets into objects.
  979. + hwloc_distrib() does not ignore any objects anymore when there are
  980. too many of them. They get merged with others instead.
  981. Thanks to Tim Creech for reporting the issue.
  982. * Tools
  983. + hwloc-bind --get <command-line> now executes the command after displaying
  984. the binding instead of ignoring the command entirely.
  985. Thanks to John Donners for the suggestion.
  986. + Clarify that memory sizes shown in lstopo are local by default
  987. unless specified (total memory added in the root object).
  988. * Synthetic topologies
  989. + Synthetic topology descriptions may now specify attributes such as
  990. memory sizes and OS indexes. See the Synthetic topologies section
  991. in the documentation.
  992. + lstopo now exports in this fully-detailed format by default.
  993. The new option --export-synthetic-flags may be used to revert
  994. back the old format.
  995. * Documentation
  996. + Add the doc/examples/ subdirectory with several real-life examples,
  997. including the already existing hwloc-hello.C for basics.
  998. Thanks to Rob Aulwes for the suggestion.
  999. + Improve the documentation of CPU and memory binding in the API.
  1000. + Add a FAQ entry about operating system errors, especially on AMD
  1001. platforms with buggy cache information.
  1002. + Add a FAQ entry about loading many topologies in a single program.
  1003. * Misc
  1004. + Work around buggy Linux kernels reporting 2 sockets instead
  1005. 1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor.
  1006. + pciutils/libpci support is now removed since libpciaccess works
  1007. well and there's also a Linux-specific PCI backend. For the record,
  1008. pciutils was GPL and therefore disabled by default since v1.6.2.
  1009. + Add --disable-cpuid configure flag to work around buggy processor
  1010. simulators reporting invalid CPUID information.
  1011. Thanks for Andrew Friedley for reporting the issue.
  1012. + Fix a racy use of libltdl when manipulating multiple topologies in
  1013. different threads.
  1014. Thanks to Andra Hugo for reporting the issue and testing patches.
  1015. + Fix some build failures in private/misc.h.
  1016. Thanks to Pavan Balaji and Ralph Castain for the reports.
  1017. + Fix failures to detect X11/Xutil.h on some Solaris platforms.
  1018. Thanks to Siegmar Gross for reporting the failure.
  1019. + The plugin ABI has changed, this release will not load plugins
  1020. built against previous hwloc releases.
  1021. Version 1.9.1
  1022. -------------
  1023. * Fix a crash when the PCI locality is invalid. Attach to the root object
  1024. instead. Thanks to Nicolas Denoyelle for reporting the issue.
  1025. * Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue.
  1026. * Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly
  1027. available. Thanks to Nick Papior Andersen for reporting the problem.
  1028. * Mark Linux file descriptors as close-on-exec to avoid leaks on exec.
  1029. * Fix some minor memory leaks.
  1030. Version 1.9.0
  1031. -------------
  1032. * API
  1033. + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with
  1034. type-specific attributes such as Cache/Group depth and Cache type.
  1035. hwloc_obj_type_of_string() is moved to hwloc/deprecated.h.
  1036. + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the
  1037. last CPU where a Linux thread given by TID ran.
  1038. + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions.
  1039. hwloc_distribute[v]() is moved to hwloc/deprecated.h.
  1040. + Don't mix total and local memory when displaying verbose object attributes
  1041. with hwloc_obj_attr_snprintf() or in lstopo.
  1042. * Backends
  1043. + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for
  1044. x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific
  1045. support added in v1.8.1. Requested by Ralph Castain.
  1046. + Add many CPU- and Platform-related info attributes on ARM and POWER
  1047. platforms, in the Machine and Socket objects.
  1048. + Add CUDA info attributes describing the number of multiprocessors and
  1049. cores and the size of the global, shared and L2 cache memories in CUDA
  1050. OS devices.
  1051. + Add OpenCL info attributes describing the number of compute units and
  1052. the global memory size in OpenCL OS devices.
  1053. + The synthetic backend now accepts extended types such as L2Cache, L1i or
  1054. Group3. lstopo also exports synthetic strings using these extended types.
  1055. * Tools
  1056. + lstopo
  1057. - Do not overwrite output files by default anymore.
  1058. Pass -f or --force to enforce it.
  1059. - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes
  1060. in the graphical output.
  1061. - Fix export to stdout when specifying a Cairo-based output type
  1062. with --of.
  1063. + hwloc-ps
  1064. - Add -e or --get-last-cpu-location to report where processes/threads
  1065. run instead of where they are bound.
  1066. - Report locations as likely-more-useful objects such as Cores or Sockets
  1067. instead of Caches when possible.
  1068. + hwloc-bind
  1069. - Fix failure on Windows when not using --pid.
  1070. - Add -e as a synonym to --get-last-cpu-location.
  1071. + hwloc-distrib
  1072. - Add --reverse to distribute using last objects first and singlify
  1073. into last bits first. Thanks to Jirka Hladky for the suggestion.
  1074. + hwloc-info
  1075. - Report unified caches when looking for data or instruction cache
  1076. ancestor objects.
  1077. * Misc
  1078. + Add experimental Visual Studio support under contrib/windows.
  1079. Thanks to Eloi Gaudry for his help and for providing the first draft.
  1080. + Fix some overzealous assertions and warnings about the ordering of
  1081. objects on a level with respect to cpusets. The ordering is only
  1082. guaranteed for complete cpusets (based on the first bit in sets).
  1083. + Fix some memory leaks when importing xml diffs and when exporting a
  1084. "too complex" entry.
  1085. Version 1.8.1
  1086. -------------
  1087. * Fix the cpuid code on Windows 64bits so that the x86 backend gets
  1088. enabled as expected and can populate CPU information.
  1089. Thanks to Robin Scher for reporting the problem.
  1090. * Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running
  1091. on x86 architecture. Thanks to Ralph Castain for the suggestion.
  1092. * Work around buggy BIOS reporting duplicate NUMA nodes on Linux.
  1093. Thanks to Jeff Becker for reporting the problem and testing the patch.
  1094. * Add a name to the lstopo graphical window. Thanks to Michael Prokop
  1095. for reporting the issue.
  1096. Version 1.8.0
  1097. -------------
  1098. * New components
  1099. + Add the "linuxpci" component that always works on Linux even when
  1100. libpciaccess and libpci aren't available (and even with a modified
  1101. file-system root). By default the old "pci" component runs first
  1102. because "linuxpci" lacks device names (obj->name is always NULL).
  1103. * API
  1104. + Add the topology difference API in hwloc/diff.h for manipulating
  1105. many similar topologies.
  1106. + Add hwloc_topology_dup() for duplicating an entire topology.
  1107. + hwloc.h and hwloc/helper.h have been reorganized to clarify the
  1108. documentation sections. The actual inline code has moved out of hwloc.h
  1109. into the new hwloc/inlines.h.
  1110. + Deprecated functions are now in hwloc/deprecated.h, and not in the
  1111. official documentation anymore.
  1112. * Tools
  1113. + Add hwloc-diff and hwloc-patch tools together with the new diff API.
  1114. + Add hwloc-compress-dir to (de)compress an entire directory of XML files
  1115. using hwloc-diff and hwloc-patch.
  1116. + Object colors in the graphical output of lstopo may be changed by adding
  1117. a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage
  1118. for details. Thanks to Jirka Hladky for discussing the idea.
  1119. + hwloc-gather-topology may now gather I/O-related files on Linux when
  1120. --io is given. Only the linuxpci component supports discovering I/O
  1121. objects from these extended tarballs.
  1122. + hwloc-annotate now supports --ri to remove/replace info attributes with
  1123. a given name.
  1124. + hwloc-info supports "root" and "all" special locations for dumping
  1125. information about the root object.
  1126. + lstopo now supports --append-legend to append custom lines of text
  1127. to the legend in the graphical output. Thanks to Jirka Hladky for
  1128. discussing the idea.
  1129. + hwloc-calc and friends have a more robust parsing of locations given
  1130. on the command-line and they report useful error messages about it.
  1131. + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and
  1132. hwloc-distrib, and add --restrict to hwloc-bind for uniformity among
  1133. tools.
  1134. * Misc
  1135. + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already
  1136. loaded topology now returns an error (deprecated since release 1.6.1).
  1137. + Fix the initialisation of cpusets and nodesets in Group objects added
  1138. when inserting PCI hostbridges.
  1139. + Never merge Group objects that were added explicitly by the user with
  1140. hwloc_custom_insert_group_object_by_parent().
  1141. + Add a sanity check during dynamic plugin loading to prevent some
  1142. crashes when hwloc is dynamically loaded by another plugin mechanisms.
  1143. + Add --with-hwloc-plugins-path to specify the install/load directories
  1144. of plugins.
  1145. + Add the MICSerialNumber info attribute to the root object when running
  1146. hwloc inside a Xeon Phi to match the same attribute in the MIC OS device
  1147. when running in the host.
  1148. Version 1.7.2
  1149. -------------
  1150. * Do not create invalid block OS devices on very old Linux kernel such
  1151. as RHEL4 2.6.9.
  1152. * Fix PCI subvendor/device IDs.
  1153. * Fix the management of Misc objects inserted by parent.
  1154. Thanks to Jirka Hladky for reporting the problem.
  1155. * Add a Port<n>State into attribute to OpenFabrics OS devices.
  1156. * Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices.
  1157. * Improve verbose error messages when failing to load from XML.
  1158. Version 1.7.1
  1159. -------------
  1160. * Fix a failed assertion in the distance grouping code when loading a XML
  1161. file that already contains some groups.
  1162. Thanks to Laercio Lima Pilla for reporting the problem.
  1163. * Remove unexpected Group objects when loading XML topologies with I/O
  1164. objects and NUMA distances.
  1165. Thanks to Elena Elkina for reporting the problem and testing patches.
  1166. * Fix PCI link speed discovery when using libpciaccess.
  1167. * Fix invalid libpciaccess virtual function device/vendor IDs when using
  1168. SR-IOV PCI devices on Linux.
  1169. * Fix GL component build with old NVCtrl releases.
  1170. Thanks to Jirka Hladky for reporting the problem.
  1171. * Fix embedding breakage caused by libltdl.
  1172. Thanks to Pavan Balaji for reporting the problem.
  1173. * Always use the system-wide libltdl instead of shipping one inside hwloc.
  1174. * Document issues when enabling plugins while embedding hwloc in another
  1175. project, in the documentation section Embedding hwloc in Other Software.
  1176. * Add a FAQ entry "How to get useful topology information on NetBSD?"
  1177. in the documentation.
  1178. * Somes fixes in the renaming code for embedding.
  1179. * Miscellaneous minor build fixes.
  1180. Version 1.7.0
  1181. -------------
  1182. * New operating system backends
  1183. + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the
  1184. documentation for details. Thanks to Jeff Hammond, Christopher Samuel
  1185. and Erik Schnetter for their help.
  1186. + Add NetBSD support, thanks to Aleksej Saushev.
  1187. * New I/O device discovery
  1188. + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC)
  1189. on Linux. Thanks to Jerome Vienne for helping.
  1190. + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs.
  1191. + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices
  1192. on the AMD OpenCL implementation.
  1193. + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays.
  1194. + Add GPU OS devices such as "nvml0" for NVIDIA GPUs.
  1195. Thanks to Marwan Abdellah and Stefan Eilemann for helping.
  1196. These new OS devices have some string info attributes such as CoProcType,
  1197. GPUModel, etc. to better identify them.
  1198. See the I/O Devices and Attributes documentation sections for details.
  1199. * New components
  1200. + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device
  1201. discovery.
  1202. + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed.
  1203. All of these new components may be built as plugins. They may also be
  1204. disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure.
  1205. See the I/O Devices, Components and Plugins, and FAQ documentation
  1206. sections for details.
  1207. * API
  1208. + Add hwloc_topology_get_flags().
  1209. + Add hwloc/plugins.h for building external plugins.
  1210. See the Adding new discovery components and plugins section.
  1211. * Interoperability
  1212. + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h
  1213. to retrieve the locality of OS devices that correspond to AMD OpenCL
  1214. GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11
  1215. displays, or to Intel Xeon Phi (MIC) device indexes.
  1216. + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert
  1217. between CUDA devices or indexes and hwloc OS devices.
  1218. + Add hwloc_ibv_get_device_osdev() and clarify the requirements
  1219. of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h.
  1220. * Tools
  1221. + hwloc-info is not only a synonym of lstopo -s anymore, it also
  1222. dumps information about objects given on the command-line.
  1223. * Documentation
  1224. + Add a section "Existing components and plugins".
  1225. + Add a list of common OS devices in section "Software devices".
  1226. + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness
  1227. issues because of GPUs.
  1228. + Clarify the documentation of inline helpers in hwloc/myriexpress.h
  1229. and hwloc/openfabrics-verbs.h.
  1230. * Misc
  1231. + Improve cache detection on AIX.
  1232. + The HWLOC_COMPONENTS variable now excludes the components whose
  1233. names are prefixed with '-'.
  1234. + lstopo --ignore PU now works when displaying the topology in
  1235. graphical and textual mode (not when exporting to XML).
  1236. + Make sure I/O options always appear in lstopo usage, not only when
  1237. using pciutils/libpci.
  1238. + Remove some unneeded Linux specific includes from some interoperability
  1239. headers.
  1240. + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote
  1241. manpages. Thanks to Guy Streeter for the report.
  1242. + Fix a memory leak on AIX when getting memory binding.
  1243. + Fix many small memory leaks on Linux.
  1244. + The `libpci' component is now called `pci' but the old name is still
  1245. accepted in the HWLOC_COMPONENTS variable for backward compatibility.
  1246. Version 1.6.2
  1247. -------------
  1248. * Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
  1249. pciutils/libpci is only used if --enable-libpci is given to configure
  1250. because its GPL license may taint hwloc. See the Installation section
  1251. in the documentation for details.
  1252. * Fix get_cpubind on Solaris when bound to a single PU with
  1253. processor_bind(). Thanks to Eugene Loh for reporting the problem
  1254. and providing a patch.
  1255. Version 1.6.1
  1256. -------------
  1257. * Fix some crash or buggy detection in the x86 backend when Linux
  1258. cgroups/cpusets restrict the available CPUs.
  1259. * Fix the pkg-config output with --libs --static.
  1260. Thanks to Erik Schnetter for reporting one of the problems.
  1261. * Fix the output of hwloc-calc -H --hierarchical when using logical
  1262. indexes in the output.
  1263. * Calling hwloc_topology_load() multiple times on the same topology
  1264. is officially deprecated. hwloc will warn in such cases.
  1265. * Add some documentation about existing plugins/components, package
  1266. dependencies, and I/O devices specification on the command-line.
  1267. Version 1.6.0
  1268. -------------
  1269. * Major changes
  1270. + Reorganize the backend infrastructure to support dynamic selection
  1271. of components and dynamic loading of plugins. For details, see the
  1272. new documentation section Components and plugins.
  1273. - The HWLOC_COMPONENTS variable lets one replace the default discovery
  1274. components.
  1275. - Dynamic loading of plugins may be enabled with --enable-plugins
  1276. (except on AIX and Windows). It will build libxml2 and libpci
  1277. support as separated modules. This helps reducing the dependencies
  1278. of the core hwloc library when distributed as a binary package.
  1279. * Backends
  1280. + Add CPUModel detection on Darwin and x86/FreeBSD.
  1281. Thanks to Robin Scher for providing ways to implement this.
  1282. + The x86 backend now adds CPUModel info attributes to socket objects
  1283. created by other backends that do not natively support this attribute.
  1284. + Fix detection on FreeBSD in case of cpuset restriction. Thanks to
  1285. Sebastian Kuzminsky for reporting the problem.
  1286. * XML
  1287. + Add hwloc_topology_set_userdata_import/export_callback(),
  1288. hwloc_export_obj_userdata() and _userdata_base64() to let
  1289. applications specify how to save/restore the custom data they placed
  1290. in the userdata private pointer field of hwloc objects.
  1291. * Tools
  1292. + Add hwloc-annotate program to add string info attributes to XML
  1293. topologies.
  1294. + Add --pid-cmd to hwloc-ps to append the output of a command to each
  1295. PID line. May be used for showing Open MPI process ranks, see the
  1296. hwloc-ps(1) manpage for details.
  1297. + hwloc-bind now exits with an error if binding fails; the executable
  1298. is not launched unless binding suceeeded or --force was given.
  1299. + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error
  1300. messages.
  1301. + Fix command-line pid support in windows tools.
  1302. + All programs accept --verbose as a synonym to -v.
  1303. * Misc
  1304. + Fix some DIR descriptor leaks on Linux.
  1305. + Fix I/O device lists when some were filtered out after a XML import.
  1306. + Fix the removal of I/O objects when importing a I/O-enabled XML topology
  1307. without any I/O topology flag.
  1308. + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or
  1309. lstopo --merge, compare object types before deciding which one of two
  1310. identical object to remove (e.g. keep sockets in favor of caches).
  1311. + Add some GUID- and LID-related info attributes to OpenFabrics
  1312. OS devices.
  1313. + Only add CPUType socket attributes on Solaris/Sparc. Other cases
  1314. don't report reliable information (Solaris/x86), and a replacement
  1315. is available as the Architecture string info in the Machine object.
  1316. + Add missing Backend string info on Solaris in most cases.
  1317. + Document object attributes and string infos in a new Attributes
  1318. section in the documentation.
  1319. + Add a section about Synthetic topologies in the documentation.
  1320. Version 1.5.2 (some of these changes are in 1.6.2 but not in 1.6)
  1321. -------------
  1322. * Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
  1323. pciutils/libpci is only used if --enable-libpci is given to configure
  1324. because its GPL license may taint hwloc. See the Installation section
  1325. in the documentation for details.
  1326. * Fix get_cpubind on Solaris when bound to a single PU with
  1327. processor_bind(). Thanks to Eugene Loh for reporting the problem
  1328. and providing a patch.
  1329. * Fix some DIR descriptor leaks on Linux.
  1330. * Fix I/O device lists when some were filtered out after a XML import.
  1331. * Add missing Backend string info on Solaris in most cases.
  1332. * Fix the removal of I/O objects when importing a I/O-enabled XML topology
  1333. without any I/O topology flag.
  1334. * Fix the output of hwloc-calc -H --hierarchical when using logical
  1335. indexes in the output.
  1336. * Fix the pkg-config output with --libs --static.
  1337. Thanks to Erik Schnetter for reporting one of the problems.
  1338. Version 1.5.1
  1339. -------------
  1340. * Fix block OS device detection on Linux kernel 3.3 and later.
  1341. Thanks to Guy Streeter for reporting the problem and testing the fix.
  1342. * Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to
  1343. Sebastian Kuzminsky for reporting problems and testing patches.
  1344. * Fix 64bit detection on FreeBSD.
  1345. * Fix some corner cases in the management of the thissystem flag with
  1346. respect to topology flags and environment variables.
  1347. * Fix some corner cases in command-line parsing checks in hwloc-distrib
  1348. and hwloc-distances.
  1349. * Make sure we do not miss some block OS devices on old Linux kernels
  1350. when a single PCI device has multiple IDE hosts/devices behind it.
  1351. * Do not disable I/O devices or instruction caches in hwloc-assembler output.
  1352. Version 1.5.0
  1353. -------------
  1354. * Backends
  1355. + Do not limit the number of processors to 1024 on Solaris anymore.
  1356. + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt.
  1357. + XML topology files do not depend on the locale anymore. Float numbers
  1358. such as NUMA distances or PCI link speeds now always use a dot as a
  1359. decimal separator.
  1360. + Add instruction caches detection on Linux, AIX, Windows and Darwin.
  1361. + Add get_last_cpu_location() support for the current thread on AIX.
  1362. + Support binding on AIX when threads or processes were bound with
  1363. bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue
  1364. and testing patches, and to Farid Parpia for explaining the binding
  1365. interfaces.
  1366. + Improve AMD topology detection in the x86 backend (for FreeBSD) using
  1367. the topoext feature.
  1368. * API
  1369. + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be
  1370. detected at build-time.
  1371. + Add a cache type attribute describind Data, Instruction and Unified
  1372. caches. Caches with different types but same depth (for instance L1d
  1373. and L1i) are placed on different levels.
  1374. + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of
  1375. of the given cache depth and type, for instance L1i or L2.
  1376. It helps disambiguating the case where hwloc_get_type_depth() returns
  1377. HWLOC_TYPE_DEPTH_MULTIPLE.
  1378. + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is
  1379. passed to hwloc_topology_set_flags() before load.
  1380. + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in
  1381. openfabrics-verbs.h to find the hwloc OS device object corresponding to
  1382. an OpenFabrics device.
  1383. * Tools
  1384. + Add lstopo-no-graphics, a lstopo built without graphical support to
  1385. avoid dependencies on external libraries such as Cairo and X11. When
  1386. supported, graphical outputs are only available in the original lstopo
  1387. program.
  1388. - Packagers splitting lstopo and lstopo-no-graphics into different
  1389. packages are advised to use the alternatives system so that lstopo
  1390. points to the best available binary.
  1391. + Instruction caches are enabled in lstopo by default. Use --no-icaches
  1392. to disable them.
  1393. + Add -t/--threads to show threads in hwloc-ps.
  1394. * Removal of obsolete components
  1395. + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and
  1396. superseded by the bitmap API (hwloc/bitmap.h) since v1.1.
  1397. hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_*
  1398. compatibility wrappers are now gone.
  1399. + Remove Linux libnuma conversion helpers for the deprecated and
  1400. broken nodemask_t interface.
  1401. + Remove support for "Proc" type name, it was superseded by "PU" in v1.0.
  1402. + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0.
  1403. * Misc
  1404. + Fix PCIe 3.0 link speed computation.
  1405. + Non-printable characters are dropped from strings during XML export.
  1406. + Fix importing of escaped characters with the minimalistic XML backend.
  1407. + Assert hwloc_is_thissystem() in several I/O related helpers.
  1408. + Fix some memory leaks in the x86 backend for FreeBSD.
  1409. + Minor fixes to ease native builds on Windows.
  1410. + Limit the number of retries when operating on all threads within a
  1411. process on Linux if the list of threads is heavily getting modified.
  1412. Version 1.4.3
  1413. -------------
  1414. * This release is only meant to fix the pciutils license issue when upgrading
  1415. to hwloc v1.5 or later is not possible. It contains several other minor
  1416. fixes but ignores many of them that are only in v1.5 or later.
  1417. * Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
  1418. pciutils/libpci is only used if --enable-libpci is given to configure
  1419. because its GPL license may taint hwloc. See the Installation section
  1420. in the documentation for details.
  1421. * Fix PCIe 3.0 link speed computation.
  1422. * Fix importing of escaped characters with the minimalistic XML backend.
  1423. * Fix a memory leak in the x86 backend.
  1424. Version 1.4.2
  1425. -------------
  1426. * Fix build on Solaris 9 and earlier when fabsf() is not a compiler
  1427. built-in. Thanks to Igor Galić for reporting the problem.
  1428. * Fix support for more than 32 processors on Windows. Thanks to Hartmut
  1429. Kaiser for reporting the problem.
  1430. * Fix process-wide binding and cpulocation routines on Linux when some
  1431. threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting
  1432. the issue.
  1433. * Make installed scripts executable. Thanks to Jirka Hladky for reporting
  1434. the problem.
  1435. * Fix libtool revision management when building for Windows. This fix was
  1436. also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser
  1437. for reporting the problem.
  1438. * Fix the __hwloc_inline keyword in public headers when compiling with a
  1439. C++ compiler.
  1440. * Add Port info attribute to network OS devices inside OpenFabrics PCI
  1441. devices so as to identify which interface corresponds to which port.
  1442. * Document requirements for interoperability helpers: I/O devices discovery
  1443. is required for some of them; the topology must match the current host
  1444. for most of them.
  1445. Version 1.4.1 (contains all 1.3.2 changes)
  1446. -------------
  1447. * Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue.
  1448. * Fix memory leaks in some get_membind() functions.
  1449. * Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h)
  1450. in case of out-of-order NUMA node ids.
  1451. * Fix some overzealous assertions in the distance grouping code.
  1452. * Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics
  1453. helpers on Linux. Thanks to Albert Solernou for reporting the problem.
  1454. * Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ).
  1455. * Fix memory binding documentation. Thanks to Karl Napf for reporting the
  1456. issues.
  1457. Version 1.4.0 (does not contain all 1.3.2 changes)
  1458. -------------
  1459. * Major features
  1460. + Add "custom" interface and "assembler" tools to build multi-node
  1461. topology. See the Multi-node Topologies section in the documentation
  1462. for details.
  1463. * Interface improvements
  1464. + Add symmetric_subtree object attribute to ease assumptions when consulting
  1465. regular symmetric topologies.
  1466. + Add a CPUModel and CPUType info attribute to Socket objects on Linux
  1467. and Solaris.
  1468. + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index
  1469. of an object within a subtree of the topology.
  1470. + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects
  1471. corresponding to CUDA devices.
  1472. * Discovery improvements
  1473. + Add a group object above partial distance matrices to make sure
  1474. the matrices are available in the final topology, except when this
  1475. new object would contradict the existing hierarchy.
  1476. + Grouping by distances now also works when loading from XML.
  1477. + Fix some corner cases in object insertion, for instance when dealing
  1478. with NUMA nodes without any CPU.
  1479. * Backends
  1480. + Implement hwloc_get_area_membind() on Linux.
  1481. + Honor I/O topology flags when importing from XML.
  1482. + Further improve XML-related error checking and reporting.
  1483. + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1.
  1484. * Tools
  1485. + Add synthetic exporting of symmetric topologies to lstopo.
  1486. + lstopo --horiz and --vert can now be applied to some specific object types.
  1487. + lstopo -v -p now displays distance matrices with physical indexes.
  1488. + Add hwloc-distances utility to list distances.
  1489. * Documentation
  1490. + Fix and/or document the behavior of most inline functions in hwloc/helper.h
  1491. when the topology contains some I/O or Misc objects.
  1492. + Backend documentation enhancements.
  1493. * Bug fixes
  1494. + Fix missing last bit in hwloc_linux_get_thread_cpubind().
  1495. Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue.
  1496. + Fix FreeBSD build without cpuid support.
  1497. + Fix several Windows build issues.
  1498. + Fix inline keyword definition in public headers.
  1499. + Fix dependencies in the embedded library.
  1500. + Improve visibility support detection. Thanks to Dave Love for providing
  1501. the patch.
  1502. + Remove references to internal symbols in the tools.
  1503. Version 1.3.3
  1504. -------------
  1505. * This release is only meant to fix the pciutils license issue when upgrading
  1506. to hwloc v1.4 or later is not possible. It contains several other minor
  1507. fixes but ignores many of them that are only in v1.4 or later.
  1508. * Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
  1509. pciutils/libpci is only used if --enable-libpci is given to configure
  1510. because its GPL license may taint hwloc. See the Installation section
  1511. in the documentation for details.
  1512. Version 1.3.2
  1513. -------------
  1514. * Fix missing last bit in hwloc_linux_get_thread_cpubind().
  1515. Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue.
  1516. * Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting
  1517. the issue.
  1518. * Fix build with Solaris Studio 12 compiler when XML is disabled.
  1519. Thanks to Paul H. Hargrove for reporting the problem.
  1520. * Fix installation with old GNU sed, for instance on Red Hat 8.
  1521. Thanks to Paul H. Hargrove for reporting the problem.
  1522. * Fix PCI locality when Linux cgroups restrict the available CPUs.
  1523. * Fix floating point issue when grouping by distance on mips64 architecture.
  1524. Thanks to Paul H. Hargrove for reporting the problem.
  1525. * Fix conversion from/to Linux libnuma when some NUMA nodes have no memory.
  1526. * Fix support for gccfss compilers with broken ffs() support. Thanks to
  1527. Paul H. Hargrove for reporting the problem and providing a patch.
  1528. * Fix FreeBSD build without cpuid support.
  1529. * Fix several Windows build issues.
  1530. * Fix inline keyword definition in public headers.
  1531. * Fix dependencies in the embedded library.
  1532. * Detect when a compiler such as xlc may not report compile errors
  1533. properly, causing some configure checks to be wrong. Thanks to
  1534. Paul H. Hargrove for reporting the problem and providing a patch.
  1535. * Improve visibility support detection. Thanks to Dave Love for providing
  1536. the patch.
  1537. * Remove references to internal symbols in the tools.
  1538. * Fix installation on systems with limited command-line size.
  1539. Thanks to Paul H. Hargrove for reporting the problem.
  1540. * Further improve XML-related error checking and reporting.
  1541. Version 1.3.1
  1542. -------------
  1543. * Fix pciutils detection with pkg-config when not installed in standard
  1544. directories.
  1545. * Fix visibility options detection with the Solaris Studio compiler.
  1546. Thanks to Igor Galić and Terry Dontje for reporting the problems.
  1547. * Fix support for old Linux sched.h headers such as those found
  1548. on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems.
  1549. * Fix inline and attribute support for Solaris compilers. Thanks to
  1550. Dave Love for reporting the problems.
  1551. * Print a short summary at the end of the configure output. Thanks to
  1552. Stefan Eilemann for the suggestion.
  1553. * Add --disable-libnuma configure option to disable libnuma-based
  1554. memory binding support on Linux. Thanks to Rayson Ho for the
  1555. suggestion.
  1556. * Make hwloc's configure script properly obey $PKG_CONFIG. Thanks to
  1557. Nathan Phillip Brink for raising the issue.
  1558. * Silence some harmless pciutils warnings, thanks to Paul H. Hargrove
  1559. for reporting the problem.
  1560. * Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t
  1561. being either pid_t and pthread_t on Unix, or HANDLE on Windows.
  1562. Version 1.3.0
  1563. -------------
  1564. * Major features
  1565. + Add I/O devices and bridges to the topology using the pciutils
  1566. library. Only enabled after setting the relevant flag with
  1567. hwloc_topology_set_flags() before hwloc_topology_load(). See the
  1568. I/O Devices section in the documentation for details.
  1569. * Discovery improvements
  1570. + Add associativity to the cache attributes.
  1571. + Add support for s390/z11 "books" on Linux.
  1572. + Add the HWLOC_GROUPING_ACCURACY environment variable to relax
  1573. distance-based grouping constraints. See the Environment Variables
  1574. section in the documentation for details about grouping behavior
  1575. and configuration.
  1576. + Allow user-given distance matrices to remove or replace those
  1577. discovered by the OS backend.
  1578. * XML improvements
  1579. + XML is now always supported: a minimalistic custom import/export
  1580. code is used when libxml2 is not available. It is only guaranteed
  1581. to read XML files generated by hwloc.
  1582. + hwloc_topology_export_xml() and export_xmlbuffer() now return an
  1583. integer.
  1584. + Add hwloc_free_xmlbuffer() to free the buffer allocated by
  1585. hwloc_topology_export_xmlbuffer().
  1586. + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1.
  1587. * Minor API updates
  1588. + Add hwloc_obj_add_info to customize object info attributes.
  1589. * Tools
  1590. + lstopo now displays I/O devices by default. Several options are
  1591. added to configure the I/O discovery.
  1592. + hwloc-calc and hwloc-bind now accept I/O devices as input.
  1593. + Add --restrict option to hwloc-calc and hwloc-distribute.
  1594. + Add --sep option to change the output field separator in hwloc-calc.
  1595. + Add --whole-system option to hwloc-ps.
  1596. Version 1.2.2
  1597. -------------
  1598. * Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report.
  1599. * Fix XML import of very large page sizes or counts on 32bits platform,
  1600. thanks to Karsten Hopp for the RedHat ticket.
  1601. * Fix crash when administrator limitations such as Linux cgroup require
  1602. to restrict distance matrices. Thanks to Ake Sandgren for reporting the
  1603. problem.
  1604. * Fix the removal of objects such as AMD Magny-Cours dual-node sockets
  1605. in case of administrator restrictions.
  1606. * Improve error reporting and messages in case of wrong synthetic topology
  1607. description.
  1608. * Several other minor internal fixes and documentation improvements.
  1609. Version 1.2.1
  1610. -------------
  1611. * Improve support of AMD Bulldozer "Compute-Unit" modules by detecting
  1612. logical processors with different core IDs on Linux.
  1613. * Fix hwloc-ps crash when listing processes from another Linux cpuset.
  1614. Thanks to Carl Smith for reporting the problem.
  1615. * Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries
  1616. for reporting the problems.
  1617. * Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting
  1618. the problem.
  1619. * Make configure fail if --enable-xml or --enable-cairo is given and
  1620. proper support cannot be found. Thanks to Andreas Kupries for reporting
  1621. the XML problem.
  1622. * Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann
  1623. for reporting the problem.
  1624. * Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele
  1625. Fatigati for reporting the problem.
  1626. * Fix object distance detection on Solaris.
  1627. * Add pthread_self weak symbol to ease static linking.
  1628. * Minor documentation fixes.
  1629. Version 1.2.0
  1630. -------------
  1631. * Major features
  1632. + Expose latency matrices in the API as an array of distance structures
  1633. within objects. Add several helpers to find distances.
  1634. + Add hwloc_topology_set_distance_matrix() and environment variables
  1635. to provide a matrix of distances between a given set of objects.
  1636. + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location()
  1637. to retrieve the processors where a process or thread recently ran.
  1638. - Add the corresponding --get-last-cpu-location option to hwloc-bind.
  1639. + Add hwloc_topology_restrict() to restrict an existing topology to a
  1640. given cpuset.
  1641. - Add the corresponding --restrict option to lstopo.
  1642. * Minor API updates
  1643. + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps
  1644. and strings such as 4-5,7-9,12,15-
  1645. + hwloc_bitmap_set/clr_range() now support infinite ranges.
  1646. + Clarify the difference between inserting Misc objects by cpuset or by
  1647. parent.
  1648. + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error.
  1649. * Discovery improvements
  1650. + x86 backend (for freebsd): add x2APIC support
  1651. + Support standard device-tree phandle, to get better support on e.g. ARM
  1652. systems providing it.
  1653. + Detect cache size on AIX. Thanks Christopher and IBM.
  1654. + Improve grouping to support asymmetric topologies.
  1655. * Tools
  1656. + Command-line tools now support "all" and "root" special locations
  1657. consisting in the entire topology, as well as type names with depth
  1658. attributes such as L2 or Group4.
  1659. + hwloc-calc improvements:
  1660. - Add --number-of/-N option to report the number of objects of a given
  1661. type or depth.
  1662. - -I is now equivalent to --intersect for listing the indexes of
  1663. objects of a given type or depth that intersects the input.
  1664. - Add -H to report the output as a hierarchical combination of types
  1665. and depths.
  1666. + Add --thissystem to lstopo.
  1667. + Add lstopo-win, a console-less lstopo variant on Windows.
  1668. * Miscellaneous
  1669. + Remove C99 usage from code base.
  1670. + Rename hwloc-gather-topology.sh into hwloc-gather-topology
  1671. + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks
  1672. Andriy Gapon for the fix.
  1673. Version 1.1.2
  1674. -------------
  1675. * Fix a segfault in the distance-based grouping code when some objects
  1676. are not placed in any group. Thanks to Bernd Kallies for reporting
  1677. the problem and providing a patch.
  1678. * Fix the command-line parsing of hwloc-bind --mempolicy interleave.
  1679. Thanks to Guy Streeter for reporting the problem.
  1680. * Stop truncating the output in hwloc_obj_attr_snprintf() and in the
  1681. corresponding lstopo output. Thanks to Guy Streeter for reporting the
  1682. problem.
  1683. * Fix object levels ordering in synthetic topologies.
  1684. * Fix potential incoherency between device tree and kernel information,
  1685. when SMT is disabled on Power machines.
  1686. * Fix and document the behavior of hwloc_topology_set_synthetic() in case
  1687. of invalid argument. Thanks to Guy Streeter for reporting the problem.
  1688. * Add some verbose error message reporting when it looks like the OS
  1689. gives erroneous information.
  1690. * Do not include unistd.h and stdint.h in public headers on Windows.
  1691. * Move config.h files into their own subdirectories to avoid name
  1692. conflicts when AC_CONFIG_HEADERS adds -I's for them.
  1693. * Remove the use of declaring variables inside "for" loops.
  1694. * Some other minor fixes.
  1695. * Many minor documentation fixes.
  1696. Version 1.1.1
  1697. -------------
  1698. * Add hwloc_get_api_version() which returns the version of hwloc used
  1699. at runtime. Thanks to Guy Streeter for the suggestion.
  1700. * Fix the number of hugepages reported for NUMA nodes on Linux.
  1701. * Fix hwloc_bitmap_to_ulong() right after allocating the bitmap.
  1702. Thanks to Bernd Kallies for reporting the problem.
  1703. * Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong.
  1704. Thanks to Guy Streeter for reporting the problem.
  1705. * Fix hwloc_get_membind_nodeset() on Linux.
  1706. Thanks to Bernd Kallies for reporting the problem and providing a patch.
  1707. * Fix some file descriptor leaks in the Linux discovery.
  1708. * Fix the minimum width of NUMA nodes, caches and the legend in the graphical
  1709. lstopo output. Thanks to Jirka Hladky for reporting the problem.
  1710. * Various fixes to bitmap conversion from/to taskset-strings.
  1711. * Fix and document snprintf functions behavior when the buffer size is too
  1712. small or zero. Thanks to Guy Streeter for reporting the problem.
  1713. * Fix configure to avoid spurious enabling of the cpuid backend.
  1714. Thanks to Tim Anderson for reporting the problem.
  1715. * Cleanup error management in hwloc-gather-topology.sh.
  1716. Thanks to Jirka Hladky for reporting the problem and providing a patch.
  1717. * Add a manpage and usage for hwloc-gather-topology.sh on Linux.
  1718. Thanks to Jirka Hladky for providing a patch.
  1719. * Memory binding documentation enhancements.
  1720. Version 1.1.0
  1721. -------------
  1722. * API
  1723. + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be
  1724. detected at build-time.
  1725. + Add a memory binding interface.
  1726. + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by
  1727. the bitmap API (hwloc/bitmap.h) which offers the same features with more
  1728. generic names since it applies to CPU sets, node sets and more.
  1729. Backward compatibility with the cpuset API and ABI is still provided but
  1730. it will be removed in a future release.
  1731. Old types (hwloc_cpuset_t, ...) are still available as a way to clarify
  1732. what kind of hwloc_bitmap_t each API function manipulates.
  1733. Upgrading to the new API only requires to replace hwloc_cpuset_ function
  1734. calls with the corresponding hwloc_bitmap_ calls, with the following
  1735. renaming exceptions:
  1736. - hwloc_cpuset_cpu -> hwloc_bitmap_only
  1737. - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut
  1738. - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf
  1739. + Add an `infos' array in each object to store couples of info names and
  1740. values. It enables generic storage of things like the old dmi board infos
  1741. that were previously stored in machine specific attributes.
  1742. + Add linesize cache attribute.
  1743. * Features
  1744. + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated,
  1745. the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed.
  1746. + Improve the distance-based grouping code to better support irregular
  1747. distance matrices.
  1748. + Add support for device-tree to get cache information (useful on Power
  1749. architectures).
  1750. * Helpers
  1751. + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability
  1752. with CUDA Runtime and Driver APIs.
  1753. + Add Myrinet Express helper in myriexpress.h to ease interoperability.
  1754. * Tools
  1755. + lstopo now displays physical/OS indexes by default in graphical mode
  1756. (use -l to switch back to logical indexes). The textual output still uses
  1757. logical by default (use -p to switch to physical indexes).
  1758. + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'.
  1759. Physical indexes are also printed as `P#N' instead of `phys=N' within
  1760. object attributes (in parentheses).
  1761. + Add a legend at the bottom of the lstopo graphical output, use --no-legend
  1762. to remove it.
  1763. + Add hwloc-ps to list process' bindings.
  1764. + Add --membind and --mempolicy options to hwloc-bind.
  1765. + Improve tools command-line options by adding a generic --input option
  1766. (and more) which replaces the old --xml, --synthetic and --fsys-root.
  1767. + Cleanup lstopo output configuration by adding --output-format.
  1768. + Add --intersect in hwloc-calc, and replace --objects with --largest.
  1769. + Add the ability to work on standard input in hwloc-calc.
  1770. + Add --from, --to and --at in hwloc-distrib.
  1771. + Add taskset-specific functions and command-line tools options to
  1772. manipulate CPU set strings in the format of the taskset program.
  1773. + Install hwloc-gather-topology.sh on Linux.
  1774. Version 1.0.3
  1775. -------------
  1776. * Fix support for Linux cpuset when emulated by a cgroup mount point.
  1777. * Remove unneeded runtime dependency on libibverbs.so in the library and
  1778. all utils programs.
  1779. * Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes
  1780. for NUMA nodes.
  1781. * lstopo now displays physical/OS indexes by default in graphical mode
  1782. (use -l to switch back to logical indexes). The textual output still uses
  1783. logical by default (use -p to switch to physical indexes).
  1784. Version 1.0.2
  1785. -------------
  1786. * Public headers can now be included directly from C++ programs.
  1787. * Solaris fix for non-contiguous cpu numbers. Thanks to Rolf vandeVaart for
  1788. reporting the issue.
  1789. * Darwin 10.4 fix. Thanks to Olivier Cessenat for reporting the issue.
  1790. * Revert 1.0.1 patch that ignored sockets with unknown ID values since it
  1791. only slightly helped POWER7 machines with old Linux kernels while it
  1792. prevents recent kernels from getting the complete POWER7 topology.
  1793. * Fix hwloc_get_common_ancestor_obj().
  1794. * Remove arch-specific bits in public headers.
  1795. * Some fixes in the lstopo graphical output.
  1796. * Various man page clarifications and minor updates.
  1797. Version 1.0.1
  1798. -------------
  1799. * Various Solaris fixes. Thanks to Yannick Martin for reporting the issue.
  1800. * Fix "non-native" builds on x86 platforms (e.g., when building 32
  1801. bit executables with compilers that natively build 64 bit).
  1802. * Ignore sockets with unknown ID values (which fixes issues on POWER7
  1803. machines). Thanks to Greg Bauer for reporting the issue.
  1804. * Various man page clarifications and minor updates.
  1805. * Fixed memory leaks in hwloc_setup_group_from_min_distance_clique().
  1806. * Fix cache type filtering on MS Windows 7. Thanks to Αλέξανδρος
  1807. Παπαδογιαννάκ for reporting the issue.
  1808. * Fixed warnings when compiling with -DNDEBUG.
  1809. Version 1.0.0
  1810. -------------
  1811. * The ABI of the library has changed.
  1812. * Backend updates
  1813. + Add FreeBSD support.
  1814. + Add x86 cpuid based backend.
  1815. + Add Linux cgroup support to the Linux cpuset code.
  1816. + Support binding of entire multithreaded process on Linux.
  1817. + Fix and enable Group support in Windows.
  1818. + Cleanup XML export/import.
  1819. * Objects
  1820. + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit",
  1821. its stringified type name is now "PU".
  1822. + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping
  1823. objects according to NUMA distances or arbitrary OS aggregation.
  1824. + Rework memory attributes.
  1825. + Add different cpusets in each object to specify processors that
  1826. are offline, unavailable, ...
  1827. + Cleanup the storage of object names and DMI infos.
  1828. * Features
  1829. + Add support for looking up specific PID topology information.
  1830. + Add hwloc_topology_export_xml() to export the topology in a XML file.
  1831. + Add hwloc_topology_get_support() to retrieve the supported features
  1832. for the current topology context.
  1833. + Support non-SYSTEM object as the root of the tree, use MACHINE in
  1834. most common cases.
  1835. + Add hwloc_get_*cpubind() routines to retrieve the current binding
  1836. of processes and threads.
  1837. * API
  1838. + Add HWLOC_API_VERSION to help detect the currently used API version.
  1839. + Add missing ending "e" to *compare* functions.
  1840. + Add several routines to emulate PLPA functions.
  1841. + Rename and rework the cpuset and/or/xor/not/clear operators to output
  1842. their result in a dedicated argument instead of modifying one input.
  1843. + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf().
  1844. + Clarify the use of parent and ancestor in the API, do not use father.
  1845. + Replace hwloc_get_system_obj() with hwloc_get_root_obj().
  1846. + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter
  1847. isn't public.
  1848. + Relax constraints in hwloc_obj_type_of_string().
  1849. + Improve displaying of memory sizes.
  1850. + Add 0x prefix to cpuset strings.
  1851. * Tools
  1852. + lstopo now displays logical indexes by default, use --physical to
  1853. revert back to OS/physical indexes.
  1854. + Add colors in the lstopo graphical outputs to distinguish between online,
  1855. offline, reserved, ... objects.
  1856. + Extend lstopo to show cpusets, filter objects by type, ...
  1857. + Renamed hwloc-mask into hwloc-calc which supports many new options.
  1858. * Documentation
  1859. + Add a hwloc(7) manpage containing general information.
  1860. + Add documentation about how to switch from PLPA to hwloc.
  1861. + Cleanup the distributed documentation files.
  1862. * Miscellaneous
  1863. + Many compilers warning fixes.
  1864. + Cleanup the ABI by using the visibility attribute.
  1865. + Add project embedding support.
  1866. Version 0.9.4 (unreleased)
  1867. -------------
  1868. * Fix reseting colors to normal in lstopo -.txt output.
  1869. * Fix Linux pthread_t binding error report.
  1870. Version 0.9.3
  1871. -------------
  1872. * Fix autogen.sh to work with Autoconf 2.63.
  1873. * Fix various crashes in particular conditions:
  1874. - xml files with root attributes
  1875. - offline CPUs
  1876. - partial sysfs support
  1877. - unparseable /proc/cpuinfo
  1878. - ignoring NUMA level while Misc level have been generated
  1879. * Tweak documentation a bit
  1880. * Do not require the pthread library for binding the current thread on Linux
  1881. * Do not erroneously consider the sched_setaffinity prototype is the old version
  1882. when there is actually none.
  1883. * Fix _syscall3 compilation on archs for which we do not have the
  1884. sched_setaffinity system call number.
  1885. * Fix AIX binding.
  1886. * Fix libraries dependencies: now only lstopo depends on libtermcap, fix
  1887. binutils-gold link
  1888. * Have make check always build and run hwloc-hello.c
  1889. * Do not limit size of a cpuset.
  1890. Version 0.9.2
  1891. -------------
  1892. * Trivial documentation changes.
  1893. Version 0.9.1
  1894. -------------
  1895. * Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the
  1896. BSD license.
  1897. * The prefix of all functions and tools is now hwloc, and some public
  1898. functions were also renamed for real.
  1899. * Group NUMA nodes into Misc objects according to their physical distance
  1900. that may be reported by the OS/BIOS.
  1901. May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment.
  1902. * Ignore offline CPUs on Solaris.
  1903. * Improved binding support on AIX.
  1904. * Add HP-UX support.
  1905. * CPU sets are now allocated/freed dynamically.
  1906. * Add command line options to tune the lstopo graphical output, add
  1907. semi-graphical textual output
  1908. * Extend topobind to support multiple cpusets or objects on the command
  1909. line as topomask does.
  1910. * Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve
  1911. the physical location of IB devices.
  1912. Version 0.9 (formerly named "libtopology")
  1913. -----------
  1914. * First release.