cmuarctic.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. import csv
  2. import os
  3. from pathlib import Path
  4. from typing import Tuple, Union
  5. import torchaudio
  6. from torch import Tensor
  7. from torch.hub import download_url_to_file
  8. from torch.utils.data import Dataset
  9. from torchaudio.datasets.utils import extract_archive
  10. URL = "aew"
  11. FOLDER_IN_ARCHIVE = "ARCTIC"
  12. _CHECKSUMS = {
  13. "http://festvox.org/cmu_arctic/packed/cmu_us_aew_arctic.tar.bz2": "645cb33c0f0b2ce41384fdd8d3db2c3f5fc15c1e688baeb74d2e08cab18ab406", # noqa: E501
  14. "http://festvox.org/cmu_arctic/packed/cmu_us_ahw_arctic.tar.bz2": "024664adeb892809d646a3efd043625b46b5bfa3e6189b3500b2d0d59dfab06c", # noqa: E501
  15. "http://festvox.org/cmu_arctic/packed/cmu_us_aup_arctic.tar.bz2": "2c55bc3050caa996758869126ad10cf42e1441212111db034b3a45189c18b6fc", # noqa: E501
  16. "http://festvox.org/cmu_arctic/packed/cmu_us_awb_arctic.tar.bz2": "d74a950c9739a65f7bfc4dfa6187f2730fa03de5b8eb3f2da97a51b74df64d3c", # noqa: E501
  17. "http://festvox.org/cmu_arctic/packed/cmu_us_axb_arctic.tar.bz2": "dd65c3d2907d1ee52f86e44f578319159e60f4bf722a9142be01161d84e330ff", # noqa: E501
  18. "http://festvox.org/cmu_arctic/packed/cmu_us_bdl_arctic.tar.bz2": "26b91aaf48b2799b2956792b4632c2f926cd0542f402b5452d5adecb60942904", # noqa: E501
  19. "http://festvox.org/cmu_arctic/packed/cmu_us_clb_arctic.tar.bz2": "3f16dc3f3b97955ea22623efb33b444341013fc660677b2e170efdcc959fa7c6", # noqa: E501
  20. "http://festvox.org/cmu_arctic/packed/cmu_us_eey_arctic.tar.bz2": "8a0ee4e5acbd4b2f61a4fb947c1730ab3adcc9dc50b195981d99391d29928e8a", # noqa: E501
  21. "http://festvox.org/cmu_arctic/packed/cmu_us_fem_arctic.tar.bz2": "3fcff629412b57233589cdb058f730594a62c4f3a75c20de14afe06621ef45e2", # noqa: E501
  22. "http://festvox.org/cmu_arctic/packed/cmu_us_gka_arctic.tar.bz2": "dc82e7967cbd5eddbed33074b0699128dbd4482b41711916d58103707e38c67f", # noqa: E501
  23. "http://festvox.org/cmu_arctic/packed/cmu_us_jmk_arctic.tar.bz2": "3a37c0e1dfc91e734fdbc88b562d9e2ebca621772402cdc693bbc9b09b211d73", # noqa: E501
  24. "http://festvox.org/cmu_arctic/packed/cmu_us_ksp_arctic.tar.bz2": "8029cafce8296f9bed3022c44ef1e7953332b6bf6943c14b929f468122532717", # noqa: E501
  25. "http://festvox.org/cmu_arctic/packed/cmu_us_ljm_arctic.tar.bz2": "b23993765cbf2b9e7bbc3c85b6c56eaf292ac81ee4bb887b638a24d104f921a0", # noqa: E501
  26. "http://festvox.org/cmu_arctic/packed/cmu_us_lnh_arctic.tar.bz2": "4faf34d71aa7112813252fb20c5433e2fdd9a9de55a00701ffcbf05f24a5991a", # noqa: E501
  27. "http://festvox.org/cmu_arctic/packed/cmu_us_rms_arctic.tar.bz2": "c6dc11235629c58441c071a7ba8a2d067903dfefbaabc4056d87da35b72ecda4", # noqa: E501
  28. "http://festvox.org/cmu_arctic/packed/cmu_us_rxr_arctic.tar.bz2": "1fa4271c393e5998d200e56c102ff46fcfea169aaa2148ad9e9469616fbfdd9b", # noqa: E501
  29. "http://festvox.org/cmu_arctic/packed/cmu_us_slp_arctic.tar.bz2": "54345ed55e45c23d419e9a823eef427f1cc93c83a710735ec667d068c916abf1", # noqa: E501
  30. "http://festvox.org/cmu_arctic/packed/cmu_us_slt_arctic.tar.bz2": "7c173297916acf3cc7fcab2713be4c60b27312316765a90934651d367226b4ea", # noqa: E501
  31. }
  32. def load_cmuarctic_item(line: str, path: str, folder_audio: str, ext_audio: str) -> Tuple[Tensor, int, str, str]:
  33. utterance_id, transcript = line[0].strip().split(" ", 2)[1:]
  34. # Remove space, double quote, and single parenthesis from transcript
  35. transcript = transcript[1:-3]
  36. file_audio = os.path.join(path, folder_audio, utterance_id + ext_audio)
  37. # Load audio
  38. waveform, sample_rate = torchaudio.load(file_audio)
  39. return (waveform, sample_rate, transcript, utterance_id.split("_")[1])
  40. class CMUARCTIC(Dataset):
  41. """Create a Dataset for *CMU ARCTIC* [:footcite:`Kominek03cmuarctic`].
  42. Args:
  43. root (str or Path): Path to the directory where the dataset is found or downloaded.
  44. url (str, optional):
  45. The URL to download the dataset from or the type of the dataset to download.
  46. (default: ``"aew"``)
  47. Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
  48. ``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
  49. ``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
  50. folder_in_archive (str, optional):
  51. The top-level directory of the dataset. (default: ``"ARCTIC"``)
  52. download (bool, optional):
  53. Whether to download the dataset if it is not found at root path. (default: ``False``).
  54. """
  55. _file_text = "txt.done.data"
  56. _folder_text = "etc"
  57. _ext_audio = ".wav"
  58. _folder_audio = "wav"
  59. def __init__(
  60. self, root: Union[str, Path], url: str = URL, folder_in_archive: str = FOLDER_IN_ARCHIVE, download: bool = False
  61. ) -> None:
  62. if url in [
  63. "aew",
  64. "ahw",
  65. "aup",
  66. "awb",
  67. "axb",
  68. "bdl",
  69. "clb",
  70. "eey",
  71. "fem",
  72. "gka",
  73. "jmk",
  74. "ksp",
  75. "ljm",
  76. "lnh",
  77. "rms",
  78. "rxr",
  79. "slp",
  80. "slt",
  81. ]:
  82. url = "cmu_us_" + url + "_arctic"
  83. ext_archive = ".tar.bz2"
  84. base_url = "http://www.festvox.org/cmu_arctic/packed/"
  85. url = os.path.join(base_url, url + ext_archive)
  86. # Get string representation of 'root' in case Path object is passed
  87. root = os.fspath(root)
  88. basename = os.path.basename(url)
  89. root = os.path.join(root, folder_in_archive)
  90. if not os.path.isdir(root):
  91. os.mkdir(root)
  92. archive = os.path.join(root, basename)
  93. basename = basename.split(".")[0]
  94. self._path = os.path.join(root, basename)
  95. if download:
  96. if not os.path.isdir(self._path):
  97. if not os.path.isfile(archive):
  98. checksum = _CHECKSUMS.get(url, None)
  99. download_url_to_file(url, archive, hash_prefix=checksum)
  100. extract_archive(archive)
  101. else:
  102. if not os.path.exists(self._path):
  103. raise RuntimeError(
  104. f"The path {self._path} doesn't exist. "
  105. "Please check the ``root`` path or set `download=True` to download it"
  106. )
  107. self._text = os.path.join(self._path, self._folder_text, self._file_text)
  108. with open(self._text, "r") as text:
  109. walker = csv.reader(text, delimiter="\n")
  110. self._walker = list(walker)
  111. def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
  112. """Load the n-th sample from the dataset.
  113. Args:
  114. n (int): The index of the sample to be loaded
  115. Returns:
  116. (Tensor, int, str, str): ``(waveform, sample_rate, transcript, utterance_id)``
  117. """
  118. line = self._walker[n]
  119. return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
  120. def __len__(self) -> int:
  121. return len(self._walker)