audio_root: N/A | |
standardize_audio: true | |
use_audio_input: true | |
vocab_filename: dict.txt | |
tgt_lang: <lang:en> | |
eos_token: <lang:en> | |
hub: | |
input_type: standardized_waveform | |
tts_model_id: facebookresearch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur | |
unit_vocoder: true | |
generation_args: | |
beam: 10 | |
max_len_a: 0.003125 |