Spaces:
Build error
Build error
File size: 1,644 Bytes
b93970c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
base_config: configs/tts/base.yaml
task_cls: tasks.tts.fs2.FastSpeech2Task
# model
hidden_size: 256
dropout: 0.1
encoder_type: fft # fft|tacotron|tacotron2|conformer
encoder_K: 8 # for tacotron encoder
decoder_type: fft # fft|rnn|conv|conformer
use_pos_embed: true
# duration
predictor_hidden: -1
predictor_kernel: 5
predictor_layers: 2
dur_predictor_kernel: 3
dur_predictor_layers: 2
predictor_dropout: 0.5
# pitch and energy
use_pitch_embed: true
pitch_type: ph # frame|ph|cwt
use_uv: true
cwt_hidden_size: 128
cwt_layers: 2
cwt_loss: l1
cwt_add_f0_loss: false
cwt_std_scale: 0.8
pitch_ar: false
#pitch_embed_type: 0q
pitch_loss: 'l1' # l1|l2|ssim
pitch_norm: log
use_energy_embed: false
# reference encoder and speaker embedding
use_spk_id: false
use_split_spk_id: false
use_spk_embed: false
use_var_enc: false
lambda_commit: 0.25
ref_norm_layer: bn
pitch_enc_hidden_stride_kernel:
- 0,2,5 # conv_hidden_size, conv_stride, conv_kernel_size. conv_hidden_size=0: use hidden_size
- 0,2,5
- 0,2,5
dur_enc_hidden_stride_kernel:
- 0,2,3 # conv_hidden_size, conv_stride, conv_kernel_size. conv_hidden_size=0: use hidden_size
- 0,2,3
- 0,1,3
# mel
mel_loss: l1:0.5|ssim:0.5 # l1|l2|gdl|ssim or l1:0.5|ssim:0.5
# loss lambda
lambda_f0: 1.0
lambda_uv: 1.0
lambda_energy: 0.1
lambda_ph_dur: 1.0
lambda_sent_dur: 1.0
lambda_word_dur: 1.0
predictor_grad: 0.1
# train and eval
pretrain_fs_ckpt: ''
warmup_updates: 2000
max_tokens: 32000
max_sentences: 100000
max_eval_sentences: 1
max_updates: 120000
num_valid_plots: 5
num_test_samples: 0
test_ids: []
use_gt_dur: false
use_gt_f0: false
# exp
dur_loss: mse # huber|mol
norm_type: gn |