lmxue commited on
Commit
7bbced2
1 Parent(s): a7e33c6

Add statistics.json and update args.json

Browse files
LJSpeech/phone_energys/statistics.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "LJSpeech_LJSpeech": {
3
+ "voiced_positions": {
4
+ "mean": 37.32167273078069,
5
+ "std": 26.03995642040225,
6
+ "median": 33.35212326049805,
7
+ "min": 0.055834684520959854,
8
+ "max": 193.185302734375
9
+ },
10
+ "total_positions": {
11
+ "mean": 39.04022008133081,
12
+ "std": 28.566342788309033,
13
+ "median": 34.09701156616211,
14
+ "min": 0.055834684520959854,
15
+ "max": 250.4805908203125
16
+ }
17
+ }
18
+ }
LJSpeech/phone_pitches/statistics.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "LJSpeech_LJSpeech": {
3
+ "voiced_positions": {
4
+ "mean": 207.58058673202913,
5
+ "std": 46.75336553769516,
6
+ "median": 200.32668035755094,
7
+ "min": 71.18287971496915,
8
+ "max": 548.2513848778569
9
+ },
10
+ "total_positions": {
11
+ "mean": 210.5202303777099,
12
+ "std": 51.35927989141311,
13
+ "median": 201.6134682945546,
14
+ "min": 71.18287971496915,
15
+ "max": 740.4636406694369
16
+ }
17
+ }
18
+ }
args.json ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "config/fs2.json",
3
+ "dataset": [
4
+ "LJSpeech",
5
+ ],
6
+ "dataset_path": {
7
+ "LJSpeech": "/home/datasets/LJSpeech-1.1",
8
+ },
9
+ "model": {
10
+ "max_seq_len": 1000,
11
+ "transformer": {
12
+ "conv_filter_size": 1024,
13
+ "conv_kernel_size": [
14
+ 9,
15
+ 1,
16
+ ],
17
+ "decoder_dropout": 0.2,
18
+ "decoder_head": 2,
19
+ "decoder_hidden": 256,
20
+ "decoder_layer": 6,
21
+ "encoder_dropout": 0.2,
22
+ "encoder_head": 2,
23
+ "encoder_hidden": 256,
24
+ "encoder_layer": 4,
25
+ },
26
+ "variance_embedding": {
27
+ "energy_quantization": "linear",
28
+ "n_bins": 256,
29
+ "pitch_quantization": "linear",
30
+ },
31
+ "variance_predictor": {
32
+ "dropout": 0.5,
33
+ "filter_size": 256,
34
+ "kernel_size": 3,
35
+ },
36
+ },
37
+ "model_type": "FastSpeech2",
38
+ "preprocess": {
39
+ "processed_dir": "ckpts/tts/fastspeech/",
40
+ "align_mel_duration": true,
41
+ "audio_dir": "audios",
42
+ "bits": 8,
43
+ "content_vector_dir": "content_vector",
44
+ "contentvec_dir": "contentvec",
45
+ "data_augment": false,
46
+ "dur_dir": "durs",
47
+ "duration_dir": "duration",
48
+ "emo2id": "emo2id.json",
49
+ "energy_dir": "energys",
50
+ "energy_extract_mode": "from_tacotron_stft",
51
+ "energy_norm": true,
52
+ "energy_remove_outlier": true,
53
+ "extract_amplitude_phase": false,
54
+ "extract_audio": true,
55
+ "extract_contentvec_feature": false,
56
+ "extract_duration": true,
57
+ "extract_energy": true,
58
+ "extract_label": false,
59
+ "extract_linear_spec": false,
60
+ "extract_mcep": false,
61
+ "extract_mel": true,
62
+ "extract_mert_feature": false,
63
+ "extract_pitch": true,
64
+ "extract_uv": false,
65
+ "extract_wenet_feature": false,
66
+ "extract_whisper_feature": false,
67
+ "f0_max": 800,
68
+ "f0_min": 71,
69
+ "file_lst": "file.lst",
70
+ "fmax": 8000,
71
+ "fmin": 0,
72
+ "hop_size": 256,
73
+ "imaginary_dir": "imaginarys",
74
+ "is_label": true,
75
+ "is_mu_law": true,
76
+ "lab_dir": "labs",
77
+ "label_dir": "labels",
78
+ "lexicon_path": "./text/lexicon/librispeech-lexicon.txt",
79
+ "linear_dir": "linears",
80
+ "log_amplitude_dir": "log_amplitudes",
81
+ "mcep_dir": "mcep",
82
+ "mel_dir": "mels",
83
+ "mel_extract_mode": "taco",
84
+ "mel_min_max_norm": false,
85
+ "mel_min_max_stats_dir": "mel_min_max_stats",
86
+ "mert_dir": "mert",
87
+ "min_level_db": -115,
88
+ "n_fft": 1024,
89
+ "n_mel": 80,
90
+ "num_silent_frames": 8,
91
+ "phase_dir": "phases",
92
+ "phone_energy_dir": "phone_energys",
93
+ "phone_pitch_dir": "phone_pitches",
94
+ "phone_seq_file": "phone_seq_file",
95
+ "pitch_bin": 256,
96
+ "pitch_dir": "pitches",
97
+ "pitch_extractor": "dio",
98
+ "pitch_max": 1100.0,
99
+ "pitch_min": 50.0,
100
+ "pitch_norm": true,
101
+ "pitch_remove_outlier": true,
102
+ "raw_data": "raw_data",
103
+ "real_dir": "reals",
104
+ "ref_level_db": 20,
105
+ "sample_rate": 22050,
106
+ "spk2id": "spk2id.json",
107
+ "text_cleaners": [
108
+ "english_cleaners",
109
+ ],
110
+ "train_file": "train.json",
111
+ "trim_fft_size": 512,
112
+ "trim_hop_size": 128,
113
+ "trim_silence": false,
114
+ "trim_top_db": 30,
115
+ "trimmed_wav_dir": "trimmed_wavs",
116
+ "use_amplitude_phase": false,
117
+ "use_audio": false,
118
+ "use_dur": false,
119
+ "use_emoid": false,
120
+ "use_frame_duration": false,
121
+ "use_frame_energy": false,
122
+ "use_frame_pitch": false,
123
+ "use_lab": false,
124
+ "use_label": false,
125
+ "use_linear": false,
126
+ "use_log_scale_energy": false,
127
+ "use_log_scale_pitch": false,
128
+ "use_mel": true,
129
+ "use_min_max_norm_mel": false,
130
+ "use_one_hot": false,
131
+ "use_phn_seq": false,
132
+ "use_phone": true,
133
+ "use_phone_duration": false,
134
+ "use_phone_energy": true,
135
+ "use_phone_pitch": true,
136
+ "use_spkid": false,
137
+ "use_text": false,
138
+ "use_uv": false,
139
+ "use_wav": false,
140
+ "use_wenet": false,
141
+ "utt2emo": "utt2emo",
142
+ "utt2spk": "utt2spk",
143
+ "uv_dir": "uvs",
144
+ "valid_file": "test.json",
145
+ "wav_dir": "wavs",
146
+ "wenet_dir": "wenet",
147
+ "whisper_dir": "whisper",
148
+ "win_size": 1024,
149
+ },
150
+ "supported_model_type": [
151
+ "GANVocoder",
152
+ "Fastspeech2",
153
+ "DiffSVC",
154
+ "Transformer",
155
+ "EDM",
156
+ "CD",
157
+ ],
158
+ "task_type": "",
159
+ "train": {
160
+ "adam": {
161
+ "betas": [
162
+ 0.9,
163
+ 0.98,
164
+ ],
165
+ "eps": 1e-09,
166
+ "lr": 0.0625,
167
+ "weight_decay": 0.0,
168
+ },
169
+ "adamw": {
170
+ "lr": 0.0004,
171
+ },
172
+ "batch_size": 16,
173
+ "dataloader": {
174
+ "num_worker": 8,
175
+ "pin_memory": true,
176
+ },
177
+ "ddp": false,
178
+ "drop_last": true,
179
+ "grad_clip_thresh": 1.0,
180
+ "gradient_accumulation_step": 1,
181
+ "group_size": 4,
182
+ "keep_checkpoint_max": 5,
183
+ "keep_last": [
184
+ 3,
185
+ -1,
186
+ ],
187
+ "lr_scheduler": {
188
+ "num_warmup": 4000,
189
+ },
190
+ "max_epoch": -1,
191
+ "max_steps": 1000000,
192
+ "multi_speaker_training": false,
193
+ "optimizer": "Adam",
194
+ "random_seed": 10086,
195
+ "reducelronplateau": {
196
+ "factor": 0.8,
197
+ "min_lr": 0.0001,
198
+ "patience": 10,
199
+ },
200
+ "run_eval": [
201
+ false,
202
+ true,
203
+ ],
204
+ "sampler": {
205
+ "drop_last": true,
206
+ "holistic_shuffle": true,
207
+ },
208
+ "save_checkpoint_stride": [
209
+ 5,
210
+ 20,
211
+ ],
212
+ "save_checkpoints_steps": 10000,
213
+ "save_summary_steps": 500,
214
+ "scheduler": "NoamLR",
215
+ "sort_sample": true,
216
+ "total_training_steps": 50000,
217
+ "tracker": [
218
+ "tensorboard",
219
+ ],
220
+ "valid_interval": 10000,
221
+ },
222
+ "use_custom_dataset": false,
223
+ }