kaldi_io.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. # To use this file, the dependency (https://github.com/vesis84/kaldi-io-for-python)
  2. # needs to be installed. This is a light wrapper around kaldi_io that returns
  3. # torch.Tensors.
  4. from typing import Any, Callable, Iterable, Tuple
  5. import torch
  6. from torch import Tensor
  7. from torchaudio._internal import module_utils as _mod_utils
  8. if _mod_utils.is_module_available("kaldi_io", "numpy"):
  9. import kaldi_io
  10. import numpy as np
  11. __all__ = [
  12. "read_vec_int_ark",
  13. "read_vec_flt_scp",
  14. "read_vec_flt_ark",
  15. "read_mat_scp",
  16. "read_mat_ark",
  17. ]
  18. def _convert_method_output_to_tensor(
  19. file_or_fd: Any, fn: Callable, convert_contiguous: bool = False
  20. ) -> Iterable[Tuple[str, Tensor]]:
  21. r"""Takes a method invokes it. The output is converted to a tensor.
  22. Args:
  23. file_or_fd (str/FileDescriptor): File name or file descriptor
  24. fn (Callable): Function that has the signature (file name/descriptor) and converts it to
  25. Iterable[Tuple[str, Tensor]].
  26. convert_contiguous (bool, optional): Determines whether the array should be converted into a
  27. contiguous layout. (Default: ``False``)
  28. Returns:
  29. Iterable[Tuple[str, Tensor]]: The string is the key and the tensor is vec/mat
  30. """
  31. for key, np_arr in fn(file_or_fd):
  32. if convert_contiguous:
  33. np_arr = np.ascontiguousarray(np_arr)
  34. yield key, torch.from_numpy(np_arr)
  35. @_mod_utils.requires_module("kaldi_io", "numpy")
  36. def read_vec_int_ark(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]:
  37. r"""Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
  38. Args:
  39. file_or_fd (str/FileDescriptor): ark, gzipped ark, pipe or opened file descriptor
  40. Returns:
  41. Iterable[Tuple[str, Tensor]]: The string is the key and the tensor is the vector read from file
  42. Example
  43. >>> # read ark to a 'dictionary'
  44. >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
  45. """
  46. # Requires convert_contiguous to be True because elements from int32 vector are
  47. # sorted in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error
  48. # in from_numpy as it expects strides to be a multiple of 4 (int32).
  49. return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_int_ark, convert_contiguous=True)
  50. @_mod_utils.requires_module("kaldi_io", "numpy")
  51. def read_vec_flt_scp(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]:
  52. r"""Create generator of (key,vector<float32/float64>) tuples, read according to Kaldi scp.
  53. Args:
  54. file_or_fd (str/FileDescriptor): scp, gzipped scp, pipe or opened file descriptor
  55. Returns:
  56. Iterable[Tuple[str, Tensor]]: The string is the key and the tensor is the vector read from file
  57. Example
  58. >>> # read scp to a 'dictionary'
  59. >>> # d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) }
  60. """
  61. return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_flt_scp)
  62. @_mod_utils.requires_module("kaldi_io", "numpy")
  63. def read_vec_flt_ark(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]:
  64. r"""Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
  65. Args:
  66. file_or_fd (str/FileDescriptor): ark, gzipped ark, pipe or opened file descriptor
  67. Returns:
  68. Iterable[Tuple[str, Tensor]]: The string is the key and the tensor is the vector read from file
  69. Example
  70. >>> # read ark to a 'dictionary'
  71. >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
  72. """
  73. return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_flt_ark)
  74. @_mod_utils.requires_module("kaldi_io", "numpy")
  75. def read_mat_scp(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]:
  76. r"""Create generator of (key,matrix<float32/float64>) tuples, read according to Kaldi scp.
  77. Args:
  78. file_or_fd (str/FileDescriptor): scp, gzipped scp, pipe or opened file descriptor
  79. Returns:
  80. Iterable[Tuple[str, Tensor]]: The string is the key and the tensor is the matrix read from file
  81. Example
  82. >>> # read scp to a 'dictionary'
  83. >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
  84. """
  85. return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_mat_scp)
  86. @_mod_utils.requires_module("kaldi_io", "numpy")
  87. def read_mat_ark(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]:
  88. r"""Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
  89. Args:
  90. file_or_fd (str/FileDescriptor): ark, gzipped ark, pipe or opened file descriptor
  91. Returns:
  92. Iterable[Tuple[str, Tensor]]: The string is the key and the tensor is the matrix read from file
  93. Example
  94. >>> # read ark to a 'dictionary'
  95. >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
  96. """
  97. return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_mat_ark)