data_root: N/A | |
input_channels: 1 | |
input_feat_per_channel: 80 | |
multitask: | |
source_unit: | |
data: N/A | |
decoder_type: transformer | |
dict: N/A | |
encoder_layer: 6 | |
loss_weight: 8.0 | |
target_type: text | |
output_channels: 1 | |
output_feat_per_channel: 1 | |
output_feat_reduction_rate: 0 | |
output_sample_rate: 16000 | |
specaugment: | |
freq_mask_F: 27 | |
freq_mask_N: 1 | |
time_mask_N: 1 | |
time_mask_T: 100 | |
time_mask_p: 1.0 | |
time_wrap_W: 0 | |
transforms: | |
_eval: | |
- utterance_cmvn | |
_train: | |
- utterance_cmvn | |
- specaugment | |
vocoder: | |
dur_prediction: true | |
model_path: N/A | |
speaker: false | |
type: code_hifigan | |
hub: | |
input_type: fbank80_w_utt_cmvn | |
tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur | |
unit_vocoder: true | |
generation_args: | |
beam: 10 | |
max_len_a: 1 | |