lmxue commited on
Commit
0020e84
1 Parent(s): f537952

Upload 5 files

Browse files
args.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "config/valle.json",
3
+ "dataset": [
4
+ "LibriTTS",
5
+ ],
6
+ "dataset_path": {
7
+ "LibriTTS": "/home/aiscuser/librilight",
8
+ },
9
+ "exp_name": "amphion_librilight_valle_nar_gpus_adamw_coswarmup_16k_2e-4_load_libritts_mi200_new_symbols_dict",
10
+ "log_dir": "/blob/v-yuancwang/librilight_valle",
11
+ "model": {
12
+ "add_prenet": false,
13
+ "decoder_dim": 1024,
14
+ "nar_scale_factor": 1,
15
+ "nhead": 16,
16
+ "norm_first": true,
17
+ "num_decoder_layers": 12,
18
+ "num_quantizers": 8,
19
+ "prefix_mode": 1,
20
+ "prepend_bos": false,
21
+ "scaling_xformers": false,
22
+ "share_embedding": true,
23
+ },
24
+ "model_type": "VALLE",
25
+ "preprocess": {
26
+ "acoustic_token_dir": "acoutic_tokens",
27
+ "acoustic_token_extractor": "Encodec",
28
+ "align_mel_duration": false,
29
+ "audio_dir": "audios",
30
+ "bits": 8,
31
+ "contentvec_dir": "contentvec",
32
+ "data_augment": false,
33
+ "dur_dir": "durs",
34
+ "duration_dir": "duration",
35
+ "emo2id": "emo2id.json",
36
+ "energy_dir": "energys",
37
+ "energy_extract_mode": "from_mel",
38
+ "energy_norm": false,
39
+ "energy_remove_outlier": false,
40
+ "extract_acoustic_token": true,
41
+ "extract_audio": false,
42
+ "extract_contentvec_feature": false,
43
+ "extract_duration": false,
44
+ "extract_energy": false,
45
+ "extract_label": false,
46
+ "extract_linear_spec": false,
47
+ "extract_mcep": false,
48
+ "extract_mel": false,
49
+ "extract_mert_feature": false,
50
+ "extract_phoneme": true,
51
+ "extract_pitch": false,
52
+ "extract_uv": false,
53
+ "extract_wenet_feature": false,
54
+ "extract_whisper_feature": false,
55
+ "file_lst": "file.lst",
56
+ "fmax": 12000,
57
+ "fmin": 0,
58
+ "hop_size": 120,
59
+ "lab_dir": "labs",
60
+ "label_dir": "labels",
61
+ "lexicon_path": "./text/lexicon/librispeech-lexicon.txt",
62
+ "linear_dir": "linears",
63
+ "max_duration": 30,
64
+ "max_token": 1600,
65
+ "mcep_dir": "mcep",
66
+ "mel_dir": "mels",
67
+ "mel_extract_mode": "",
68
+ "mel_min_max_norm": false,
69
+ "min_duration": 0.5,
70
+ "min_level_db": -115,
71
+ "min_token": 45,
72
+ "n_fft": 1024,
73
+ "n_mel": 80,
74
+ "num_silent_frames": 8,
75
+ "phone_dir": "phones",
76
+ "phone_energy_dir": "phone_energys",
77
+ "phone_pitch_dir": "phone_pitches",
78
+ "phone_seq_file": "phone_seq_file",
79
+ "pitch_dir": "pitches",
80
+ "pitch_extractor": "parselmouth",
81
+ "pitch_norm": false,
82
+ "pitch_remove_outlier": false,
83
+ "processed_dir": "/home/aiscuser/librilight",
84
+ "raw_data": "raw_data",
85
+ "ref_level_db": 20,
86
+ "sample_rate": 24000,
87
+ "sampling_rate": "24000",
88
+ "spk2id": "spk2id.json",
89
+ "symbols_dict": "symbols_from_libritts.dict",
90
+ "text_extractor": "espeak",
91
+ "train_file": "train_filter.json",
92
+ "trim_fft_size": 512,
93
+ "trim_hop_size": 128,
94
+ "trim_silence": false,
95
+ "trim_top_db": 30,
96
+ "trimmed_wav_dir": "trimmed_wavs",
97
+ "use_acoustic_token": true,
98
+ "use_audio": false,
99
+ "use_dur": false,
100
+ "use_emoid": false,
101
+ "use_frame_duration": false,
102
+ "use_frame_energy": false,
103
+ "use_frame_pitch": false,
104
+ "use_lab": false,
105
+ "use_label": false,
106
+ "use_linear": false,
107
+ "use_log_scale_energy": false,
108
+ "use_log_scale_pitch": false,
109
+ "use_mel": false,
110
+ "use_min_max_norm_mel": false,
111
+ "use_one_hot": false,
112
+ "use_phn_seq": false,
113
+ "use_phone": true,
114
+ "use_phone_duration": false,
115
+ "use_phone_energy": false,
116
+ "use_phone_pitch": false,
117
+ "use_spkid": false,
118
+ "use_text": false,
119
+ "use_uv": false,
120
+ "use_wav": false,
121
+ "use_wenet": false,
122
+ "utt2emo": "utt2emo",
123
+ "utt2spk": "utt2spk",
124
+ "uv_dir": "uvs",
125
+ "valid_file": "test_filter.json",
126
+ "wav_dir": "wavs",
127
+ "wenet_dir": "wenet",
128
+ "win_size": 480,
129
+ },
130
+ "supported_model_type": [
131
+ "GANVocoder",
132
+ "Fastspeech2",
133
+ "DiffSVC",
134
+ "Transformer",
135
+ "EDM",
136
+ "CD",
137
+ ],
138
+ "train": {
139
+ "adamw": {
140
+ "lr": 0.0004,
141
+ },
142
+ "base_lr": 0.0001,
143
+ "batch_size": 5,
144
+ "clip_norm": false,
145
+ "clip_value": true,
146
+ "dataloader": {
147
+ "num_worker": 28,
148
+ "pin_memory": true,
149
+ },
150
+ "ddp": false,
151
+ "gradient_accumulation_step": 1,
152
+ "keep_checkpoint_max": 5,
153
+ "keep_last": [
154
+ 3,
155
+ -1,
156
+ ],
157
+ "load_pretrained_model": false,
158
+ "load_pretrained_model_path": "/blob/v-yuancwang/librilight_valle/libritts_ckpt/final_epoch-0100_step-0837900_loss-3.883116",
159
+ "max_epoch": 40,
160
+ "max_sentences": 8,
161
+ "max_steps": 1000000,
162
+ "max_tokens": 4800,
163
+ "multi_speaker_training": false,
164
+ "optimizer": "AdamW",
165
+ "optimizer_name": "AdamW",
166
+ "random_seed": 10086,
167
+ "reducelronplateau": {
168
+ "factor": 0.8,
169
+ "min_lr": 0.0001,
170
+ "patience": 10,
171
+ },
172
+ "reset_interval": 200,
173
+ "run_eval": [
174
+ false,
175
+ true,
176
+ ],
177
+ "sampler": {
178
+ "drop_last": true,
179
+ "holistic_shuffle": true,
180
+ },
181
+ "save_checkpoint_stride": [
182
+ 1,
183
+ 1,
184
+ ],
185
+ "save_checkpoints_steps": 10000,
186
+ "save_summary_steps": 500,
187
+ "scheduler": "ReduceLROnPlateau",
188
+ "scheduler_name": "Cosine",
189
+ "start_epoch": 1,
190
+ "total_training_steps": 50000,
191
+ "tracker": [
192
+ "tensorboard",
193
+ ],
194
+ "train_stage": 0,
195
+ "valid_interval": 1000,
196
+ "warmup_steps": 16000,
197
+ },
198
+ "use_custom_dataset": false,
199
+ }
samples/.DS_Store ADDED
Binary file (6.15 kB). View file
 
samples/Amphion_VALL-E_Librilight_sample1.wav ADDED
Binary file (220 kB). View file
 
samples/Amphion_VALL-E_Librilight_sample2.wav ADDED
Binary file (216 kB). View file
 
symbols.dict ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <eps> 0
2
+ ! 1
3
+ " 2
4
+ , 3
5
+ . 4
6
+ : 5
7
+ ; 6
8
+ ? 7
9
+ _ 8
10
+ aɪ 9
11
+ aɪw 10
12
+ aɪə 11
13
+ aɪɚ 12
14
+ aʊ 13
15
+ b 14
16
+ d 15
17
+ dh 16
18
+ dt 17
19
+ dʒ 18
20
+ eɪ 19
21
+ f 20
22
+ fɛ 21
23
+ h 22
24
+ i 23
25
+ iə 24
26
+ iː 25
27
+ iːd 26
28
+ j 27
29
+ k 28
30
+ l 29
31
+ m 30
32
+ n 31
33
+ nʲ 32
34
+ oʊ 33
35
+ oː 34
36
+ oːɹ 35
37
+ p 36
38
+ r 37
39
+ s 38
40
+ t 39
41
+ tw 40
42
+ tʃ 41
43
+ uː 42
44
+ uːj 43
45
+ v 44
46
+ w 45
47
+ x 46
48
+ z 47
49
+ æ 48
50
+ ç 49
51
+ ð 50
52
+ ŋ 51
53
+ ɐ 52
54
+ ɑ 53
55
+ ɑː 54
56
+ ɑːɹ 55
57
+ ɔ 56
58
+ ɔɪ 57
59
+ ɔː 58
60
+ ɔːɹ 59
61
+ ɔːɹt 60
62
+ ə 61
63
+ əl 62
64
+ ən 63
65
+ ɚ 64
66
+ ɛ 65
67
+ ɛɹ 66
68
+ ɜː 67
69
+ ɡ 68
70
+ ɡʲ 69
71
+ ɪ 70
72
+ ɪɹ 71
73
+ ɬ 72
74
+ ɹ 73
75
+ ɾ 74
76
+ ʃ 75
77
+ ʃm 76
78
+ ʊ 77
79
+ ʊɹ 78
80
+ ʌ 79
81
+ ʒ 80
82
+ ʔ 81
83
+ ̃ 82
84
+ ̩ 83
85
+ θ 84
86
+ ᵻ 85
87
+ — 86
88
+ aɪʊɹ 87
89
+ … 88
90
+ aɪʊ 89
91
+ ɒ 90
92
+ a 91
93
+ əʊ 92
94
+ ʊə 93
95
+ eə 94
96
+ ɫ 95
97
+ » 96
98
+ ɛː 97
99
+ æː 98
100
+ 1 99
101
+ ɪː 100
102
+ ( 101
103
+ ko 102
104
+ ) 103
105
+ u 104
106
+ enus 105
107
+ « 106
108
+ es 107
109
+ e 108
110
+ ɲ 109
111
+ ɣ 110
112
+ β 111
113
+ o 112
114
+ tɕ 113
115
+ ɯ 114
116
+ ʁ 115
117
+ iːː 116
118
+ aʊə 117
119
+ ta 118
120
+ ɭ 119
121
+ q 120
122
+ ɐː 121
123
+ ph 122
124
+ kh 123
125
+ dʑ 124
126
+ aː 125
127
+ ¡ 126
128
+ ¿ 127
129
+ hy 128
130
+ ʰχ 129
131
+ fr 130
132
+ ø 131
133
+ hi 132
134
+ ʈ 133
135
+ ʉ 134
136
+ ʲ 135
137
+ ɖ 136
138
+ oːː 137