hahunavth commited on
Commit
229cbd0
1 Parent(s): d203e7c

Upload config for debug

Browse files
output/config/debug/model.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ num_emotion: 5
3
+ conformer:
4
+ encoder_dim: 256
5
+ decoder_dim: 256
6
+ num_encode_layers: 4
7
+ num_decode_layers: 6
8
+ num_attention_heads: 2
9
+ feed_forward_expansion_factor: 4
10
+ conv_expansion_factor: 2
11
+ feed_forward_dropout_p: 0.2
12
+ attention_dropout_p: 0.2
13
+ conv_dropout_p: 0.2
14
+ conv_kernel_size: 7
15
+ half_step_residual: true
16
+
17
+ reference_encoder:
18
+ encoder_dim: 128
19
+ dropout: 0.2
20
+
21
+ variance_predictor:
22
+ filter_size: 256
23
+ kernel_size: 3
24
+ dropout: 0.5
25
+
26
+ variance_embedding:
27
+ pitch_quantization: "linear"
28
+ energy_quantization: "linear"
29
+ n_bins: 256
30
+
31
+ max_seq_len: 1000
32
+
33
+ vocoder:
34
+ model: "HiFi-GAN"
35
+ speaker: "tth"
output/config/debug/preprocess.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset: "vlsp2023emo"
2
+
3
+ path:
4
+ corpus_path: "./data/pretrained_tts_dataset/tuyendv.dict"
5
+ lexicon_path: "../datasets/ess-vlsp2023-lexicon/lexicon.dict" # "data/lexicon"
6
+ raw_path: "./data/pretrained_tts_dataset_raw"
7
+ preprocessed_path: "../datasets/ess-vlsp2023-emo-processed-phoneme-level" # "processed_vlsp_data_phoneme_level"
8
+
9
+ emotion2id:
10
+ neutral: 0
11
+ happy: 1
12
+ sad: 2
13
+ angry: 3
14
+ surprise: 4
15
+
16
+ id2emotion:
17
+ 0: neutral
18
+ 1: happy
19
+ 2: sad
20
+ 3: angry
21
+ 4: surprise
22
+
23
+ smoothing_label: 0.1
24
+
25
+ preprocessing:
26
+ val_size: 512
27
+ text:
28
+ text_cleaners: []
29
+ language: "en"
30
+ audio:
31
+ sampling_rate: 22050
32
+ max_wav_value: 32768.0
33
+ stft:
34
+ filter_length: 1024
35
+ hop_length: 256
36
+ win_length: 1024
37
+ mel:
38
+ n_mel_channels: 80
39
+ mel_fmin: 0
40
+ mel_fmax: 8000
41
+
42
+ # phoneme_level
43
+ pitch:
44
+ feature: "phoneme_level"
45
+ normalization: True
46
+ energy:
47
+ feature: "phoneme_level"
48
+ normalization: True
output/config/debug/train.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ path:
2
+ ckpt_path: "../output/ckpt/vlsp2023emo"
3
+ log_path: "../output/log/vlsp2023emo"
4
+ result_path: "../output/result/vlsp2023emo"
5
+ hf:
6
+ use_hf: True
7
+ hf_repo: "hahunavth/abc"
8
+ restore_from_hf: True
9
+ push_to_hf: True
10
+
11
+ optimizer:
12
+ batch_size: 48 # 64
13
+ betas: [0.9, 0.98]
14
+ eps: 0.000000001
15
+ weight_decay: 0.0
16
+ grad_clip_thresh: 1.0
17
+ grad_acc_step: 1
18
+ warm_up_step: 2000
19
+ anneal_steps: [300000, 400000, 500000]
20
+ anneal_rate: 0.3
21
+ step:
22
+ total_step: 400000
23
+ log_step: 1000
24
+ synth_step: 5000
25
+ val_step: 1000
26
+ save_step: 2000