| from pathlib import Path | |
| from typing import Union | |
| import warnings | |
| from typeguard import check_argument_types | |
| from typeguard import check_return_type | |
| class DatadirWriter: | |
| """Writer class to create kaldi like data directory. | |
| Examples: | |
| >>> with DatadirWriter("output") as writer: | |
| ... # output/sub.txt is created here | |
| ... subwriter = writer["sub.txt"] | |
| ... # Write "uttidA some/where/a.wav" | |
| ... subwriter["uttidA"] = "some/where/a.wav" | |
| ... subwriter["uttidB"] = "some/where/b.wav" | |
| """ | |
| def __init__(self, p: Union[Path, str]): | |
| assert check_argument_types() | |
| self.path = Path(p) | |
| self.chilidren = {} | |
| self.fd = None | |
| self.has_children = False | |
| self.keys = set() | |
| def __enter__(self): | |
| return self | |
| def __getitem__(self, key: str) -> "DatadirWriter": | |
| assert check_argument_types() | |
| if self.fd is not None: | |
| raise RuntimeError("This writer points out a file") | |
| if key not in self.chilidren: | |
| w = DatadirWriter((self.path / key)) | |
| self.chilidren[key] = w | |
| self.has_children = True | |
| retval = self.chilidren[key] | |
| assert check_return_type(retval) | |
| return retval | |
| def __setitem__(self, key: str, value: str): | |
| assert check_argument_types() | |
| if self.has_children: | |
| raise RuntimeError("This writer points out a directory") | |
| if key in self.keys: | |
| warnings.warn(f"Duplicated: {key}") | |
| if self.fd is None: | |
| self.path.parent.mkdir(parents=True, exist_ok=True) | |
| self.fd = self.path.open("w", encoding="utf-8") | |
| self.keys.add(key) | |
| self.fd.write(f"{key} {value}\n") | |
| def __exit__(self, exc_type, exc_val, exc_tb): | |
| self.close() | |
| def close(self): | |
| if self.has_children: | |
| prev_child = None | |
| for child in self.chilidren.values(): | |
| child.close() | |
| if prev_child is not None and prev_child.keys != child.keys: | |
| warnings.warn( | |
| f"Ids are mismatching between " | |
| f"{prev_child.path} and {child.path}" | |
| ) | |
| prev_child = child | |
| elif self.fd is not None: | |
| self.fd.close() | |