collect_env.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. from __future__ import print_function
  2. # Unlike the rest of the PyTorch this file must be python2 compliant.
  3. # This script outputs relevant system environment info
  4. # Run it with `python collect_env.py`.
  5. import datetime
  6. import locale
  7. import re
  8. import subprocess
  9. import sys
  10. import os
  11. from collections import namedtuple
  12. try:
  13. import torch
  14. TORCH_AVAILABLE = True
  15. except (ImportError, NameError, AttributeError, OSError):
  16. TORCH_AVAILABLE = False
  17. # System Environment Information
  18. SystemEnv = namedtuple('SystemEnv', [
  19. 'torch_version',
  20. 'is_debug_build',
  21. 'cuda_compiled_version',
  22. 'gcc_version',
  23. 'clang_version',
  24. 'cmake_version',
  25. 'os',
  26. 'libc_version',
  27. 'python_version',
  28. 'python_platform',
  29. 'is_cuda_available',
  30. 'cuda_runtime_version',
  31. 'nvidia_driver_version',
  32. 'nvidia_gpu_models',
  33. 'cudnn_version',
  34. 'pip_version', # 'pip' or 'pip3'
  35. 'pip_packages',
  36. 'conda_packages',
  37. 'hip_compiled_version',
  38. 'hip_runtime_version',
  39. 'miopen_runtime_version',
  40. 'caching_allocator_config',
  41. 'is_xnnpack_available',
  42. ])
  43. def run(command):
  44. """Returns (return-code, stdout, stderr)"""
  45. p = subprocess.Popen(command, stdout=subprocess.PIPE,
  46. stderr=subprocess.PIPE, shell=True)
  47. raw_output, raw_err = p.communicate()
  48. rc = p.returncode
  49. if get_platform() == 'win32':
  50. enc = 'oem'
  51. else:
  52. enc = locale.getpreferredencoding()
  53. output = raw_output.decode(enc)
  54. err = raw_err.decode(enc)
  55. return rc, output.strip(), err.strip()
  56. def run_and_read_all(run_lambda, command):
  57. """Runs command using run_lambda; reads and returns entire output if rc is 0"""
  58. rc, out, _ = run_lambda(command)
  59. if rc != 0:
  60. return None
  61. return out
  62. def run_and_parse_first_match(run_lambda, command, regex):
  63. """Runs command using run_lambda, returns the first regex match if it exists"""
  64. rc, out, _ = run_lambda(command)
  65. if rc != 0:
  66. return None
  67. match = re.search(regex, out)
  68. if match is None:
  69. return None
  70. return match.group(1)
  71. def run_and_return_first_line(run_lambda, command):
  72. """Runs command using run_lambda and returns first line if output is not empty"""
  73. rc, out, _ = run_lambda(command)
  74. if rc != 0:
  75. return None
  76. return out.split('\n')[0]
  77. def get_conda_packages(run_lambda):
  78. conda = os.environ.get('CONDA_EXE', 'conda')
  79. out = run_and_read_all(run_lambda, f"{conda} list")
  80. if out is None:
  81. return out
  82. return "\n".join(
  83. line
  84. for line in out.splitlines()
  85. if not line.startswith("#")
  86. and any(
  87. name in line
  88. for name in {
  89. "torch",
  90. "numpy",
  91. "cudatoolkit",
  92. "soumith",
  93. "mkl",
  94. "magma",
  95. "mkl",
  96. }
  97. )
  98. )
  99. def get_gcc_version(run_lambda):
  100. return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
  101. def get_clang_version(run_lambda):
  102. return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
  103. def get_cmake_version(run_lambda):
  104. return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
  105. def get_nvidia_driver_version(run_lambda):
  106. if get_platform() == 'darwin':
  107. cmd = 'kextstat | grep -i cuda'
  108. return run_and_parse_first_match(run_lambda, cmd,
  109. r'com[.]nvidia[.]CUDA [(](.*?)[)]')
  110. smi = get_nvidia_smi()
  111. return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
  112. def get_gpu_info(run_lambda):
  113. if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
  114. if TORCH_AVAILABLE and torch.cuda.is_available():
  115. return torch.cuda.get_device_name(None)
  116. return None
  117. smi = get_nvidia_smi()
  118. uuid_regex = re.compile(r' \(UUID: .+?\)')
  119. rc, out, _ = run_lambda(smi + ' -L')
  120. if rc != 0:
  121. return None
  122. # Anonymize GPUs by removing their UUID
  123. return re.sub(uuid_regex, '', out)
  124. def get_running_cuda_version(run_lambda):
  125. return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
  126. def get_cudnn_version(run_lambda):
  127. """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
  128. if get_platform() == 'win32':
  129. system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
  130. cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
  131. where_cmd = os.path.join(system_root, 'System32', 'where')
  132. cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
  133. elif get_platform() == 'darwin':
  134. # CUDA libraries and drivers can be found in /usr/local/cuda/. See
  135. # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
  136. # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
  137. # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
  138. cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
  139. else:
  140. cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
  141. rc, out, _ = run_lambda(cudnn_cmd)
  142. # find will return 1 if there are permission errors or if not found
  143. if len(out) == 0 or (rc != 1 and rc != 0):
  144. l = os.environ.get('CUDNN_LIBRARY')
  145. if l is not None and os.path.isfile(l):
  146. return os.path.realpath(l)
  147. return None
  148. files_set = set()
  149. for fn in out.split('\n'):
  150. fn = os.path.realpath(fn) # eliminate symbolic links
  151. if os.path.isfile(fn):
  152. files_set.add(fn)
  153. if not files_set:
  154. return None
  155. # Alphabetize the result because the order is non-deterministic otherwise
  156. files = list(sorted(files_set))
  157. if len(files) == 1:
  158. return files[0]
  159. result = '\n'.join(files)
  160. return 'Probably one of the following:\n{}'.format(result)
  161. def get_nvidia_smi():
  162. # Note: nvidia-smi is currently available only on Windows and Linux
  163. smi = 'nvidia-smi'
  164. if get_platform() == 'win32':
  165. system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
  166. program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
  167. legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
  168. new_path = os.path.join(system_root, 'System32', smi)
  169. smis = [new_path, legacy_path]
  170. for candidate_smi in smis:
  171. if os.path.exists(candidate_smi):
  172. smi = '"{}"'.format(candidate_smi)
  173. break
  174. return smi
  175. def get_platform():
  176. if sys.platform.startswith('linux'):
  177. return 'linux'
  178. elif sys.platform.startswith('win32'):
  179. return 'win32'
  180. elif sys.platform.startswith('cygwin'):
  181. return 'cygwin'
  182. elif sys.platform.startswith('darwin'):
  183. return 'darwin'
  184. else:
  185. return sys.platform
  186. def get_mac_version(run_lambda):
  187. return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
  188. def get_windows_version(run_lambda):
  189. system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
  190. wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
  191. findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
  192. return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
  193. def get_lsb_version(run_lambda):
  194. return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
  195. def check_release_file(run_lambda):
  196. return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
  197. r'PRETTY_NAME="(.*)"')
  198. def get_os(run_lambda):
  199. from platform import machine
  200. platform = get_platform()
  201. if platform == 'win32' or platform == 'cygwin':
  202. return get_windows_version(run_lambda)
  203. if platform == 'darwin':
  204. version = get_mac_version(run_lambda)
  205. if version is None:
  206. return None
  207. return 'macOS {} ({})'.format(version, machine())
  208. if platform == 'linux':
  209. # Ubuntu/Debian based
  210. desc = get_lsb_version(run_lambda)
  211. if desc is not None:
  212. return '{} ({})'.format(desc, machine())
  213. # Try reading /etc/*-release
  214. desc = check_release_file(run_lambda)
  215. if desc is not None:
  216. return '{} ({})'.format(desc, machine())
  217. return '{} ({})'.format(platform, machine())
  218. # Unknown platform
  219. return platform
  220. def get_python_platform():
  221. import platform
  222. return platform.platform()
  223. def get_libc_version():
  224. import platform
  225. if get_platform() != 'linux':
  226. return 'N/A'
  227. return '-'.join(platform.libc_ver())
  228. def get_pip_packages(run_lambda):
  229. """Returns `pip list` output. Note: will also find conda-installed pytorch
  230. and numpy packages."""
  231. # People generally have `pip` as `pip` or `pip3`
  232. # But here it is incoved as `python -mpip`
  233. def run_with_pip(pip):
  234. out = run_and_read_all(run_lambda, f"{pip} list --format=freeze")
  235. return "\n".join(
  236. line
  237. for line in out.splitlines()
  238. if any(
  239. name in line
  240. for name in {
  241. "torch",
  242. "numpy",
  243. "mypy",
  244. }
  245. )
  246. )
  247. pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
  248. out = run_with_pip(sys.executable + ' -mpip')
  249. return pip_version, out
  250. def get_cachingallocator_config():
  251. ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
  252. return ca_config
  253. def is_xnnpack_available():
  254. if TORCH_AVAILABLE:
  255. import torch.backends.xnnpack
  256. return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
  257. else:
  258. return "N/A"
  259. def get_env_info():
  260. run_lambda = run
  261. pip_version, pip_list_output = get_pip_packages(run_lambda)
  262. if TORCH_AVAILABLE:
  263. version_str = torch.__version__
  264. debug_mode_str = str(torch.version.debug)
  265. cuda_available_str = str(torch.cuda.is_available())
  266. cuda_version_str = torch.version.cuda
  267. if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version
  268. hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
  269. else: # HIP version
  270. cfg = torch._C._show_config().split('\n')
  271. hip_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'HIP Runtime' in s][0]
  272. miopen_runtime_version = [s.rsplit(None, 1)[-1] for s in cfg if 'MIOpen' in s][0]
  273. cuda_version_str = 'N/A'
  274. hip_compiled_version = torch.version.hip
  275. else:
  276. version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
  277. hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
  278. sys_version = sys.version.replace("\n", " ")
  279. return SystemEnv(
  280. torch_version=version_str,
  281. is_debug_build=debug_mode_str,
  282. python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
  283. python_platform=get_python_platform(),
  284. is_cuda_available=cuda_available_str,
  285. cuda_compiled_version=cuda_version_str,
  286. cuda_runtime_version=get_running_cuda_version(run_lambda),
  287. nvidia_gpu_models=get_gpu_info(run_lambda),
  288. nvidia_driver_version=get_nvidia_driver_version(run_lambda),
  289. cudnn_version=get_cudnn_version(run_lambda),
  290. hip_compiled_version=hip_compiled_version,
  291. hip_runtime_version=hip_runtime_version,
  292. miopen_runtime_version=miopen_runtime_version,
  293. pip_version=pip_version,
  294. pip_packages=pip_list_output,
  295. conda_packages=get_conda_packages(run_lambda),
  296. os=get_os(run_lambda),
  297. libc_version=get_libc_version(),
  298. gcc_version=get_gcc_version(run_lambda),
  299. clang_version=get_clang_version(run_lambda),
  300. cmake_version=get_cmake_version(run_lambda),
  301. caching_allocator_config=get_cachingallocator_config(),
  302. is_xnnpack_available=is_xnnpack_available(),
  303. )
  304. env_info_fmt = """
  305. PyTorch version: {torch_version}
  306. Is debug build: {is_debug_build}
  307. CUDA used to build PyTorch: {cuda_compiled_version}
  308. ROCM used to build PyTorch: {hip_compiled_version}
  309. OS: {os}
  310. GCC version: {gcc_version}
  311. Clang version: {clang_version}
  312. CMake version: {cmake_version}
  313. Libc version: {libc_version}
  314. Python version: {python_version}
  315. Python platform: {python_platform}
  316. Is CUDA available: {is_cuda_available}
  317. CUDA runtime version: {cuda_runtime_version}
  318. GPU models and configuration: {nvidia_gpu_models}
  319. Nvidia driver version: {nvidia_driver_version}
  320. cuDNN version: {cudnn_version}
  321. HIP runtime version: {hip_runtime_version}
  322. MIOpen runtime version: {miopen_runtime_version}
  323. Is XNNPACK available: {is_xnnpack_available}
  324. Versions of relevant libraries:
  325. {pip_packages}
  326. {conda_packages}
  327. """.strip()
  328. def pretty_str(envinfo):
  329. def replace_nones(dct, replacement='Could not collect'):
  330. for key in dct.keys():
  331. if dct[key] is not None:
  332. continue
  333. dct[key] = replacement
  334. return dct
  335. def replace_bools(dct, true='Yes', false='No'):
  336. for key in dct.keys():
  337. if dct[key] is True:
  338. dct[key] = true
  339. elif dct[key] is False:
  340. dct[key] = false
  341. return dct
  342. def prepend(text, tag='[prepend]'):
  343. lines = text.split('\n')
  344. updated_lines = [tag + line for line in lines]
  345. return '\n'.join(updated_lines)
  346. def replace_if_empty(text, replacement='No relevant packages'):
  347. if text is not None and len(text) == 0:
  348. return replacement
  349. return text
  350. def maybe_start_on_next_line(string):
  351. # If `string` is multiline, prepend a \n to it.
  352. if string is not None and len(string.split('\n')) > 1:
  353. return '\n{}\n'.format(string)
  354. return string
  355. mutable_dict = envinfo._asdict()
  356. # If nvidia_gpu_models is multiline, start on the next line
  357. mutable_dict['nvidia_gpu_models'] = \
  358. maybe_start_on_next_line(envinfo.nvidia_gpu_models)
  359. # If the machine doesn't have CUDA, report some fields as 'No CUDA'
  360. dynamic_cuda_fields = [
  361. 'cuda_runtime_version',
  362. 'nvidia_gpu_models',
  363. 'nvidia_driver_version',
  364. ]
  365. all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
  366. all_dynamic_cuda_fields_missing = all(
  367. mutable_dict[field] is None for field in dynamic_cuda_fields)
  368. if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
  369. for field in all_cuda_fields:
  370. mutable_dict[field] = 'No CUDA'
  371. if envinfo.cuda_compiled_version is None:
  372. mutable_dict['cuda_compiled_version'] = 'None'
  373. # Replace True with Yes, False with No
  374. mutable_dict = replace_bools(mutable_dict)
  375. # Replace all None objects with 'Could not collect'
  376. mutable_dict = replace_nones(mutable_dict)
  377. # If either of these are '', replace with 'No relevant packages'
  378. mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
  379. mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
  380. # Tag conda and pip packages with a prefix
  381. # If they were previously None, they'll show up as ie '[conda] Could not collect'
  382. if mutable_dict['pip_packages']:
  383. mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
  384. '[{}] '.format(envinfo.pip_version))
  385. if mutable_dict['conda_packages']:
  386. mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
  387. '[conda] ')
  388. return env_info_fmt.format(**mutable_dict)
  389. def get_pretty_env_info():
  390. return pretty_str(get_env_info())
  391. def main():
  392. print("Collecting environment information...")
  393. output = get_pretty_env_info()
  394. print(output)
  395. if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
  396. minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
  397. if sys.platform == "linux" and os.path.exists(minidump_dir):
  398. dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
  399. latest = max(dumps, key=os.path.getctime)
  400. ctime = os.path.getctime(latest)
  401. creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
  402. msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
  403. "if this is related to your bug please include it when you file a report ***"
  404. print(msg, file=sys.stderr)
  405. if __name__ == '__main__':
  406. main()