Spaces:
Build error
Build error
| import argparse | |
| import glob | |
| import importlib | |
| import os | |
| import subprocess | |
| from utils.commons.hparams import set_hparams, hparams | |
| from utils.commons.multiprocess_utils import multiprocess_run_tqdm | |
| from utils.os_utils import remove_file | |
| from utils.text.encoding import get_encoding | |
| def process_item(idx, txt_fn): | |
| base_fn = os.path.splitext(txt_fn)[0] | |
| basename = os.path.basename(base_fn) | |
| if os.path.exists(base_fn + '.wav'): | |
| wav_fn = base_fn + '.wav' | |
| elif os.path.exists(base_fn + '.mp3'): | |
| wav_fn = base_fn + '.mp3' | |
| else: | |
| return | |
| # process text | |
| encoding = get_encoding(txt_fn) | |
| with open(txt_fn, encoding=encoding) as f: | |
| txt_raw = " ".join(f.readlines()).strip() | |
| phs, _, phs_for_align, _ = preprocesser.process_text(txt_processor, txt_raw, hparams['preprocess_args']) | |
| os.makedirs(f'{mfa_process_dir}/{basename}', exist_ok=True) | |
| with open(f'{mfa_process_dir}/{basename}/{basename}.lab', 'w') as f: | |
| f.write(phs_for_align) | |
| # process wav | |
| new_wav_fn = preprocesser.process_wav(basename, wav_fn, mfa_process_dir, preprocess_args) | |
| subprocess.check_call(f'cp "{new_wav_fn}" "{mfa_process_dir}/{basename}/{basename}.wav"', shell=True) | |
| if __name__ == "__main__": | |
| set_hparams() | |
| parser = argparse.ArgumentParser(description='') | |
| parser.add_argument('--input_dir', type=str, default='', help='input dir') | |
| args, unknown = parser.parse_known_args() | |
| input_dir = args.input_dir | |
| processed_data_dir = hparams['processed_data_dir'] | |
| preprocess_args = hparams['preprocess_args'] | |
| preprocess_args['sox_to_wav'] = True | |
| preprocess_args['trim_all_sil'] = True | |
| # preprocess_args['trim_sil'] = True | |
| # preprocess_args['denoise'] = True | |
| pkg = ".".join(hparams["preprocess_cls"].split(".")[:-1]) | |
| cls_name = hparams["preprocess_cls"].split(".")[-1] | |
| process_cls = getattr(importlib.import_module(pkg), cls_name) | |
| preprocesser = process_cls() | |
| txt_processor = preprocesser.txt_processor | |
| num_workers = int(os.getenv('N_PROC', os.cpu_count())) | |
| mfa_process_dir = f'{input_dir}/mfa_inputs' | |
| remove_file(mfa_process_dir, f'{input_dir}/mfa_tmp') | |
| os.makedirs(mfa_process_dir, exist_ok=True) | |
| os.makedirs(f'{mfa_process_dir}/processed_tmp', exist_ok=True) | |
| for res in multiprocess_run_tqdm( | |
| process_item, list(enumerate(glob.glob(f'{input_dir}/*.txt')))): | |
| pass | |
| remove_file(f'{mfa_process_dir}/processed_tmp') | |
| subprocess.check_call( | |
| f'mfa align {mfa_process_dir} ' # process dir | |
| f'{hparams["processed_data_dir"]}/mfa_dict.txt ' # dict | |
| f'{input_dir}/mfa_model.zip ' # model | |
| f'{input_dir}/mfa_outputs -t {input_dir}/mfa_tmp -j {num_workers} ' | |
| f' && cp -rf {input_dir}/mfa_outputs/*/* {input_dir}/' | |
| f' && cp -rf {mfa_process_dir}/*/* {input_dir}/' | |
| f' && rm -rf {input_dir}/mfa_tmp {input_dir}/mfa_outputs {mfa_process_dir}', # remove tmp dir | |
| shell=True) | |