dathudeptrai commited on
Commit
6482e82
β€’
1 Parent(s): cd8a09b

😘 Update config file.

Browse files
Files changed (1) hide show
  1. config.yml +85 -58
config.yml CHANGED
@@ -1,59 +1,86 @@
1
- allow_cache: true
2
- batch_size: 32
3
- config: egs/ljspeech/conf/tacotron2.v1.yaml
4
- dev_dir: ./egs/ljspeech/dump/valid/
5
- end_ratio_value: 0.0
6
- eval_interval_steps: 500
7
- format: npy
8
- hop_size: 256
9
- is_shuffle: true
10
- log_interval_steps: 100
11
- mel_length_threshold: 32
12
- mixed_precision: false
13
- num_save_intermediate_results: 1
14
- optimizer_params:
15
- decay_steps: 150000
16
- end_learning_rate: 1.0e-05
17
- initial_learning_rate: 0.001
18
- warmup_proportion: 0.02
19
- weight_decay: 0.001
20
- outdir: ./egs/ljspeech/exp/tacotron2.v1
21
- remove_short_samples: true
22
- resume: ./egs/ljspeech/exp/tacotron2.v1/checkpoints/ckpt-45000
23
- save_interval_steps: 5000
24
- schedule_decay_steps: 50000
25
- start_ratio_value: 0.5
26
- start_schedule_teacher_forcing: 250000
27
  tacotron2_params:
28
- attention_dim: 128
29
- attention_filters: 32
30
- attention_kernel: 31
31
- attention_type: lsa
32
- decoder_lstm_units: 1024
33
- embedding_dropout_prob: 0.1
34
- embedding_hidden_size: 512
35
- encoder_conv_activation: relu
36
- encoder_conv_dropout_rate: 0.5
37
- encoder_conv_filters: 512
38
- encoder_conv_kernel_sizes: 5
39
- encoder_lstm_units: 256
40
- initializer_range: 0.02
41
- n_conv_encoder: 5
42
- n_conv_postnet: 5
43
- n_lstm_decoder: 1
44
- n_mels: 80
45
- n_prenet_layers: 2
46
- n_speakers: 1
47
- postnet_conv_filters: 512
48
- postnet_conv_kernel_sizes: 5
49
- postnet_dropout_rate: 0.1
50
- prenet_activation: relu
51
- prenet_dropout_rate: 0.5
52
- prenet_units: 256
53
- reduction_factor: 1
54
- train_dir: ./egs/ljspeech/dump/train/
55
- train_max_steps: 200000
56
- use_fixed_shapes: true
57
- use_norm: true
58
- verbose: 1
59
- version: 0.3.4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is the hyperparameter configuration file for Tacotron2 v1.
2
+ # Please make sure this is adjusted for the LJSpeech dataset. If you want to
3
+ # apply to the other dataset, you might need to carefully change some parameters.
4
+ # This configuration performs 200k iters but 65k iters is enough to get a good models.
5
+
6
+ ###########################################################
7
+ # FEATURE EXTRACTION SETTING #
8
+ ###########################################################
9
+ hop_size: 256 # Hop size.
10
+ format: "npy"
11
+
12
+
13
+ ###########################################################
14
+ # NETWORK ARCHITECTURE SETTING #
15
+ ###########################################################
16
+ model_type: "tacotron2"
17
+
 
 
 
 
 
 
 
 
 
18
  tacotron2_params:
19
+ dataset: ljspeech
20
+ embedding_hidden_size: 512
21
+ initializer_range: 0.02
22
+ embedding_dropout_prob: 0.1
23
+ n_speakers: 1
24
+ n_conv_encoder: 5
25
+ encoder_conv_filters: 512
26
+ encoder_conv_kernel_sizes: 5
27
+ encoder_conv_activation: 'relu'
28
+ encoder_conv_dropout_rate: 0.5
29
+ encoder_lstm_units: 256
30
+ n_prenet_layers: 2
31
+ prenet_units: 256
32
+ prenet_activation: 'relu'
33
+ prenet_dropout_rate: 0.5
34
+ n_lstm_decoder: 1
35
+ reduction_factor: 1
36
+ decoder_lstm_units: 1024
37
+ attention_dim: 128
38
+ attention_filters: 32
39
+ attention_kernel: 31
40
+ n_mels: 80
41
+ n_conv_postnet: 5
42
+ postnet_conv_filters: 512
43
+ postnet_conv_kernel_sizes: 5
44
+ postnet_dropout_rate: 0.1
45
+ attention_type: "lsa"
46
+
47
+ ###########################################################
48
+ # DATA LOADER SETTING #
49
+ ###########################################################
50
+ batch_size: 32 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
51
+ remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
52
+ allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
53
+ mel_length_threshold: 32 # remove all targets has mel_length <= 32
54
+ is_shuffle: true # shuffle dataset after each epoch.
55
+ use_fixed_shapes: true # use_fixed_shapes for training (2x speed-up)
56
+ # refer (https://github.com/dathudeptrai/TensorflowTTS/issues/34#issuecomment-642309118)
57
+
58
+ ###########################################################
59
+ # OPTIMIZER & SCHEDULER SETTING #
60
+ ###########################################################
61
+ optimizer_params:
62
+ initial_learning_rate: 0.001
63
+ end_learning_rate: 0.00001
64
+ decay_steps: 150000 # < train_max_steps is recommend.
65
+ warmup_proportion: 0.02
66
+ weight_decay: 0.001
67
+
68
+ gradient_accumulation_steps: 1
69
+ var_train_expr: null # trainable variable expr (eg. 'embeddings|decoder_cell' )
70
+ # must separate by |. if var_train_expr is null then we
71
+ # training all variables.
72
+ ###########################################################
73
+ # INTERVAL SETTING #
74
+ ###########################################################
75
+ train_max_steps: 200000 # Number of training steps.
76
+ save_interval_steps: 2000 # Interval steps to save checkpoint.
77
+ eval_interval_steps: 500 # Interval steps to evaluate the network.
78
+ log_interval_steps: 200 # Interval steps to record the training log.
79
+ start_schedule_teacher_forcing: 200001 # don't need to apply schedule teacher forcing.
80
+ start_ratio_value: 0.5 # start ratio of scheduled teacher forcing.
81
+ schedule_decay_steps: 50000 # decay step scheduled teacher forcing.
82
+ end_ratio_value: 0.0 # end ratio of scheduled teacher forcing.
83
+ ###########################################################
84
+ # OTHER SETTING #
85
+ ###########################################################
86
+ num_save_intermediate_results: 1 # Number of results to be saved as intermediate results.