Spaces:
Sleeping
Sleeping
import os | |
import librosa | |
import numpy as np | |
from scipy.io import wavfile | |
from tqdm import tqdm | |
from text import _clean_text | |
def prepare_align(config): | |
in_dir = config["path"]["corpus_path"] | |
out_dir = config["path"]["raw_path"] | |
sampling_rate = config["preprocessing"]["audio"]["sampling_rate"] | |
max_wav_value = config["preprocessing"]["audio"]["max_wav_value"] | |
cleaners = config["preprocessing"]["text"]["text_cleaners"] | |
speaker = "EmoV_DB" | |
with open(os.path.join(in_dir, "metadata.csv"), encoding="utf-8") as f: | |
for line in tqdm(f): | |
parts = line.strip().split("|") | |
base_name = parts[0] | |
text = parts[1] | |
text = _clean_text(text, cleaners) | |
wav_path = os.path.join(in_dir, "wavs", "{}.wav".format(base_name)) | |
if os.path.exists(wav_path): | |
os.makedirs(os.path.join(out_dir, speaker), exist_ok=True) | |
wav, _ = librosa.load(wav_path, sampling_rate) | |
wav = wav / max(abs(wav)) * max_wav_value | |
wavfile.write( | |
os.path.join(out_dir, speaker, "{}.wav".format(base_name)), | |
sampling_rate, | |
wav.astype(np.int16), | |
) | |
with open( | |
os.path.join(out_dir, speaker, "{}.lab".format(base_name)), | |
"w", | |
) as f1: | |
f1.write(text) | |