import os | |
import hydra | |
import librosa | |
import utils | |
from os.path import expanduser, exists, basename, join | |
from utils import read_filelist, write_filelist, find_all_files | |
from tqdm import tqdm | |
def preprocess(cfg): | |
os.makedirs('filelists', exist_ok=True) | |
# train | |
root = cfg.preprocess.datasets.LibriSpeech.root_val | |
root = expanduser(root) | |
trainfiles = [] | |
print(f'Root: {root}') | |
for subset in cfg.preprocess.datasets.LibriSpeech.testsets: | |
files = find_all_files(join(root, subset), '.flac') | |
print(f'Found {len(files)} flac files in {subset}') | |
for i in range(len(files)): | |
files[i][1] = files[i][1].replace(root, '').lstrip('/') | |
trainfiles.extend(files) | |
print(f'Write train filelist to {cfg.preprocess.view.test_filelist}') | |
os.makedirs('filelists', exist_ok=True) | |
utils.write_filelist(trainfiles, cfg.preprocess.view.test_filelist) | |
if __name__ == '__main__': | |
preprocess() | |