|
import os |
|
import shutil |
|
import torch |
|
import torchaudio |
|
from pathlib import Path |
|
|
|
def prepare_data_stage2(data_dir="data8", exp_dir="logs/s2"): |
|
"""Prepare data for stage 2 training""" |
|
|
|
|
|
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
|
|
|
data_dir = os.path.join(root_dir, data_dir) |
|
exp_dir = os.path.join(root_dir, exp_dir) |
|
|
|
print(f"Data directory: {data_dir}") |
|
print(f"Experiment directory: {exp_dir}") |
|
|
|
|
|
os.makedirs(exp_dir, exist_ok=True) |
|
os.makedirs(os.path.join(exp_dir, "4-cnhubert"), exist_ok=True) |
|
os.makedirs(os.path.join(exp_dir, "5-wav32k"), exist_ok=True) |
|
|
|
|
|
phoneme_path = os.path.join(data_dir, "phoneme.txt") |
|
name2text_path = os.path.join(exp_dir, "2-name2text.txt") |
|
|
|
print(f"Reading phoneme data from: {phoneme_path}") |
|
print(f"Writing text data to: {name2text_path}") |
|
|
|
with open(phoneme_path, "r", encoding="utf8") as f_in, \ |
|
open(name2text_path, "w", encoding="utf8") as f_out: |
|
for line in f_in: |
|
parts = line.strip().split("|") |
|
if len(parts) >= 2: |
|
wav_name = os.path.basename(parts[0]) |
|
text = parts[1] |
|
|
|
f_out.write(f"{wav_name}\t{text}\t0\tHindi\n") |
|
|
|
|
|
wav_dir = os.path.join(data_dir, "wavs") |
|
wav32k_dir = os.path.join(exp_dir, "5-wav32k") |
|
|
|
print(f"Processing wav files from: {wav_dir}") |
|
print(f"Saving to: {wav32k_dir}") |
|
|
|
for wav_file in os.listdir(wav_dir): |
|
if wav_file.endswith(".wav"): |
|
src_path = os.path.join(wav_dir, wav_file) |
|
dst_path = os.path.join(wav32k_dir, wav_file) |
|
|
|
|
|
waveform, sr = torchaudio.load(src_path) |
|
if sr != 32000: |
|
resampler = torchaudio.transforms.Resample(sr, 32000) |
|
waveform = resampler(waveform) |
|
|
|
|
|
torchaudio.save(dst_path, waveform, 32000) |
|
|
|
print("Data preparation complete. Please run the Hubert feature extraction before training.") |
|
|
|
if __name__ == "__main__": |
|
prepare_data_stage2() |