data_root: N/A input_channels: 1 input_feat_per_channel: 80 multitask: source_unit: data: N/A decoder_type: transformer dict: N/A encoder_layer: 6 loss_weight: 8.0 target_type: text output_channels: 1 output_feat_per_channel: 1 output_feat_reduction_rate: 0 output_sample_rate: 16000 specaugment: freq_mask_F: 27 freq_mask_N: 1 time_mask_N: 1 time_mask_T: 100 time_mask_p: 1.0 time_wrap_W: 0 transforms: _eval: - utterance_cmvn _train: - utterance_cmvn - specaugment vocoder: dur_prediction: true model_path: N/A speaker: false type: code_hifigan hub: input_type: fbank80_w_utt_cmvn tts_model_id: facebookresearch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10 unit_vocoder: true generation_args: beam: 10 max_len_a: 1