| | from pathlib import Path |
| | import soundfile as sf |
| | import xml.etree.ElementTree as ET |
| |
|
| | split = "train" |
| |
|
| | |
| | |
| | archive_path = Path("data/train") |
| |
|
| | wav_dir = archive_path / "wav" |
| | segments_file = archive_path / "xml" / "utf8" |
| | |
| | output_wav_dir = archive_path / "dataset" / split /"wav" |
| | output_txt_dir = archive_path / "dataset" / split /"txt" |
| |
|
| | |
| | output_wav_dir.mkdir(parents=True, exist_ok=True) |
| | output_txt_dir.mkdir(parents=True, exist_ok=True) |
| |
|
| | |
| | for s_file in segments_file.glob("*.xml"): |
| | tree = ET.parse(str(s_file)) |
| | root = tree.getroot() |
| | head = root[0] |
| | segments = root[1][0] |
| | |
| | |
| | for child in head: |
| | if child.tag == "recording": |
| | print(child.attrib) |
| | file_name = child.attrib.get("filename") |
| |
|
| | |
| | |
| | for segment in segments: |
| | start_time = int(float(segment.attrib.get("starttime")) *16_000) |
| | end_time = int(float(segment.attrib.get("endtime")) * 16_000) |
| |
|
| | text = " ".join([x.text for x in segment]) |
| |
|
| |
|
| | |
| | |
| | wav_path = wav_dir / f"{file_name}.wav" |
| | sound, _ = sf.read(wav_path, start=start_time, stop=end_time) |
| | sf.write(output_wav_dir / f"{file_name}_seg{start_time}_{end_time}.wav", sound, 16_000) |
| | open(output_txt_dir / f"{file_name}_seg{start_time}_{end_time}.txt", "w").write(text) |