File size: 815 Bytes
24defad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
data_root: N/A
input_channels: 1
input_feat_per_channel: 80
multitask:
  source_unit:
    data: N/A
    decoder_type: transformer
    dict: N/A
    encoder_layer: 6
    loss_weight: 8.0
    target_type: text
output_channels: 1
output_feat_per_channel: 1
output_feat_reduction_rate: 0
output_sample_rate: 16000
specaugment:
  freq_mask_F: 27
  freq_mask_N: 1
  time_mask_N: 1
  time_mask_T: 100
  time_mask_p: 1.0
  time_wrap_W: 0
transforms:
  _eval:
  - utterance_cmvn
  _train:
  - utterance_cmvn
  - specaugment
vocoder:
  dur_prediction: true
  model_path: N/A
  speaker: false
  type: code_hifigan
hub:
  input_type: fbank80_w_utt_cmvn
  tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur
  unit_vocoder: true
  generation_args:
      beam: 10
      max_len_a: 1