Ionut-Bostan's picture
Upload 82 files
feec0bf
raw
history blame
No virus
1.44 kB
import os
import librosa
import numpy as np
from scipy.io import wavfile
from tqdm import tqdm
from text import _clean_text
def prepare_align(config):
in_dir = config["path"]["corpus_path"]
out_dir = config["path"]["raw_path"]
sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
max_wav_value = config["preprocessing"]["audio"]["max_wav_value"]
cleaners = config["preprocessing"]["text"]["text_cleaners"]
speaker = "LJSpeech"
with open(os.path.join(in_dir, "metadata.csv"), encoding="utf-8") as f:
for line in tqdm(f):
parts = line.strip().split("|")
base_name = parts[0]
text = parts[1]
text = _clean_text(text, cleaners)
wav_path = os.path.join(in_dir, "wavs", "{}.wav".format(base_name))
if os.path.exists(wav_path):
os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
wav, _ = librosa.load(wav_path, sampling_rate)
wav = wav / max(abs(wav)) * max_wav_value
wavfile.write(
os.path.join(out_dir, speaker, "{}.wav".format(base_name)),
sampling_rate,
wav.astype(np.int16),
)
with open(
os.path.join(out_dir, speaker, "{}.lab".format(base_name)),
"w",
) as f1:
f1.write(text)