File size: 3,301 Bytes
ad16788 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import collections.abc
from pathlib import Path
from typing import Union
import numpy as np
import soundfile
from typeguard import check_argument_types
from espnet2.fileio.read_text import read_2column_text
class SoundScpReader(collections.abc.Mapping):
"""Reader class for 'wav.scp'.
Examples:
key1 /some/path/a.wav
key2 /some/path/b.wav
key3 /some/path/c.wav
key4 /some/path/d.wav
...
>>> reader = SoundScpReader('wav.scp')
>>> rate, array = reader['key1']
"""
def __init__(
self,
fname,
dtype=np.int16,
always_2d: bool = False,
normalize: bool = False,
):
assert check_argument_types()
self.fname = fname
self.dtype = dtype
self.always_2d = always_2d
self.normalize = normalize
self.data = read_2column_text(fname)
def __getitem__(self, key):
wav = self.data[key]
if self.normalize:
# soundfile.read normalizes data to [-1,1] if dtype is not given
array, rate = soundfile.read(wav, always_2d=self.always_2d)
else:
array, rate = soundfile.read(
wav, dtype=self.dtype, always_2d=self.always_2d
)
return rate, array
def get_path(self, key):
return self.data[key]
def __contains__(self, item):
return item
def __len__(self):
return len(self.data)
def __iter__(self):
return iter(self.data)
def keys(self):
return self.data.keys()
class SoundScpWriter:
"""Writer class for 'wav.scp'
Examples:
key1 /some/path/a.wav
key2 /some/path/b.wav
key3 /some/path/c.wav
key4 /some/path/d.wav
...
>>> writer = SoundScpWriter('./data/', './data/feat.scp')
>>> writer['aa'] = 16000, numpy_array
>>> writer['bb'] = 16000, numpy_array
"""
def __init__(
self,
outdir: Union[Path, str],
scpfile: Union[Path, str],
format="wav",
dtype=None,
):
assert check_argument_types()
self.dir = Path(outdir)
self.dir.mkdir(parents=True, exist_ok=True)
scpfile = Path(scpfile)
scpfile.parent.mkdir(parents=True, exist_ok=True)
self.fscp = scpfile.open("w", encoding="utf-8")
self.format = format
self.dtype = dtype
self.data = {}
def __setitem__(self, key: str, value):
rate, signal = value
assert isinstance(rate, int), type(rate)
assert isinstance(signal, np.ndarray), type(signal)
if signal.ndim not in (1, 2):
raise RuntimeError(f"Input signal must be 1 or 2 dimension: {signal.ndim}")
if signal.ndim == 1:
signal = signal[:, None]
wav = self.dir / f"{key}.{self.format}"
wav.parent.mkdir(parents=True, exist_ok=True)
soundfile.write(str(wav), signal, rate)
self.fscp.write(f"{key} {wav}\n")
# Store the file path
self.data[key] = str(wav)
def get_path(self, key):
return self.data[key]
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def close(self):
self.fscp.close()
|