Siddhant commited on
Commit
d6b81d9
1 Parent(s): a4bc019

import from zenodo

Browse files
Files changed (26) hide show
  1. README.md +50 -0
  2. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/50epoch.pth +3 -0
  3. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/config.yaml +393 -0
  4. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_backward_time.png +0 -0
  5. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_fake_loss.png +0 -0
  6. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_forward_time.png +0 -0
  7. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_loss.png +0 -0
  8. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_optim_step_time.png +0 -0
  9. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_real_loss.png +0 -0
  10. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_train_time.png +0 -0
  11. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_adv_loss.png +0 -0
  12. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_backward_time.png +0 -0
  13. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_dur_loss.png +0 -0
  14. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_feat_match_loss.png +0 -0
  15. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_forward_time.png +0 -0
  16. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_kl_loss.png +0 -0
  17. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_loss.png +0 -0
  18. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_mel_loss.png +0 -0
  19. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_optim_step_time.png +0 -0
  20. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_train_time.png +0 -0
  21. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/gpu_max_cached_mem_GB.png +0 -0
  22. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/iter_time.png +0 -0
  23. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim0_lr0.png +0 -0
  24. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim1_lr0.png +0 -0
  25. exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/train_time.png +0 -0
  26. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - text-to-speech
6
+ language: ja
7
+ datasets:
8
+ - jvs
9
+ license: cc-by-4.0
10
+ ---
11
+ ## ESPnet2 TTS pretrained model
12
+ ### `kan-bayashi/jvs_tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause_latest`
13
+ ♻️ Imported from https://zenodo.org/record/5432540/
14
+
15
+ This model was trained by kan-bayashi using jvs/tts1 recipe in [espnet](https://github.com/espnet/espnet/).
16
+ ### Demo: How to use in ESPnet2
17
+ ```python
18
+ # coming soon
19
+ ```
20
+ ### Citing ESPnet
21
+ ```BibTex
22
+ @inproceedings{watanabe2018espnet,
23
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
24
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
25
+ year={2018},
26
+ booktitle={Proceedings of Interspeech},
27
+ pages={2207--2211},
28
+ doi={10.21437/Interspeech.2018-1456},
29
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
30
+ }
31
+ @inproceedings{hayashi2020espnet,
32
+ title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
33
+ author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
34
+ booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
35
+ pages={7654--7658},
36
+ year={2020},
37
+ organization={IEEE}
38
+ }
39
+ ```
40
+ or arXiv:
41
+ ```bibtex
42
+ @misc{watanabe2018espnet,
43
+ title={ESPnet: End-to-End Speech Processing Toolkit},
44
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
45
+ year={2018},
46
+ eprint={1804.00015},
47
+ archivePrefix={arXiv},
48
+ primaryClass={cs.CL}
49
+ }
50
+ ```
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/50epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:507e8ba23d0e06a772ebf5ec37826efce3b94782af6045313b5203a19eb32695
3
+ size 372578383
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/config.yaml ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/tuning/finetune_vits.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause
7
+ ngpu: 1
8
+ seed: 777
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 41512
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: true
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - train
38
+ - total_count
39
+ - max
40
+ keep_nbest_models: 10
41
+ grad_clip: -1
42
+ grad_clip_type: 2.0
43
+ grad_noise: false
44
+ accum_grad: 1
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ use_amp: false
49
+ log_interval: 50
50
+ use_tensorboard: true
51
+ use_wandb: false
52
+ wandb_project: null
53
+ wandb_id: null
54
+ wandb_entity: null
55
+ wandb_name: null
56
+ wandb_model_log_interval: -1
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param:
60
+ - ../../jsut/tts1/exp/tts_train_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/latest.pth:tts:tts
61
+ ignore_init_mismatch: false
62
+ freeze_param: []
63
+ num_iters_per_epoch: 1000
64
+ batch_size: 20
65
+ valid_batch_size: null
66
+ batch_bins: 5000000
67
+ valid_batch_bins: null
68
+ train_shape_file:
69
+ - exp/tts_stats_jvs001_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/train/text_shape.phn
70
+ - exp/tts_stats_jvs001_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/train/speech_shape
71
+ valid_shape_file:
72
+ - exp/tts_stats_jvs001_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/valid/text_shape.phn
73
+ - exp/tts_stats_jvs001_raw_linear_spectrogram_phn_jaconv_pyopenjtalk_accent_with_pause/valid/speech_shape
74
+ batch_type: numel
75
+ valid_batch_type: null
76
+ fold_length:
77
+ - 150
78
+ - 204800
79
+ sort_in_batch: descending
80
+ sort_batch: descending
81
+ multiple_iterator: false
82
+ chunk_length: 500
83
+ chunk_shift_ratio: 0.5
84
+ num_cache_chunks: 1024
85
+ train_data_path_and_name_and_type:
86
+ - - dump/22k/raw/jvs001_tr_no_dev/text
87
+ - text
88
+ - text
89
+ - - dump/22k/raw/jvs001_tr_no_dev/wav.scp
90
+ - speech
91
+ - sound
92
+ valid_data_path_and_name_and_type:
93
+ - - dump/22k/raw/jvs001_dev/text
94
+ - text
95
+ - text
96
+ - - dump/22k/raw/jvs001_dev/wav.scp
97
+ - speech
98
+ - sound
99
+ allow_variable_data_keys: false
100
+ max_cache_size: 0.0
101
+ max_cache_fd: 32
102
+ valid_max_cache_size: null
103
+ optim: adamw
104
+ optim_conf:
105
+ lr: 2.0e-05
106
+ betas:
107
+ - 0.8
108
+ - 0.99
109
+ eps: 1.0e-09
110
+ weight_decay: 0.0
111
+ scheduler: exponentiallr
112
+ scheduler_conf:
113
+ gamma: 0.999875
114
+ optim2: adamw
115
+ optim2_conf:
116
+ lr: 2.0e-05
117
+ betas:
118
+ - 0.8
119
+ - 0.99
120
+ eps: 1.0e-09
121
+ weight_decay: 0.0
122
+ scheduler2: exponentiallr
123
+ scheduler2_conf:
124
+ gamma: 0.999875
125
+ generator_first: false
126
+ token_list:
127
+ - <blank>
128
+ - <unk>
129
+ - '1'
130
+ - '2'
131
+ - '0'
132
+ - '3'
133
+ - '4'
134
+ - '-1'
135
+ - '5'
136
+ - a
137
+ - o
138
+ - '-2'
139
+ - i
140
+ - '-3'
141
+ - u
142
+ - e
143
+ - k
144
+ - n
145
+ - t
146
+ - '6'
147
+ - r
148
+ - '-4'
149
+ - s
150
+ - N
151
+ - m
152
+ - pau
153
+ - '7'
154
+ - sh
155
+ - d
156
+ - g
157
+ - w
158
+ - '8'
159
+ - U
160
+ - '-5'
161
+ - I
162
+ - cl
163
+ - h
164
+ - y
165
+ - b
166
+ - '9'
167
+ - j
168
+ - ts
169
+ - ch
170
+ - '-6'
171
+ - z
172
+ - p
173
+ - '-7'
174
+ - f
175
+ - ky
176
+ - ry
177
+ - '-8'
178
+ - gy
179
+ - '-9'
180
+ - hy
181
+ - ny
182
+ - '-10'
183
+ - by
184
+ - my
185
+ - '-11'
186
+ - '-12'
187
+ - '-13'
188
+ - py
189
+ - '-14'
190
+ - '-15'
191
+ - v
192
+ - '10'
193
+ - '-16'
194
+ - '-17'
195
+ - '11'
196
+ - '-21'
197
+ - '-20'
198
+ - '12'
199
+ - '-19'
200
+ - '13'
201
+ - '-18'
202
+ - '14'
203
+ - dy
204
+ - '15'
205
+ - ty
206
+ - '-22'
207
+ - '16'
208
+ - '18'
209
+ - '19'
210
+ - '17'
211
+ - <sos/eos>
212
+ odim: null
213
+ model_conf: {}
214
+ use_preprocessor: true
215
+ token_type: phn
216
+ bpemodel: null
217
+ non_linguistic_symbols: null
218
+ cleaner: jaconv
219
+ g2p: pyopenjtalk_accent_with_pause
220
+ feats_extract: linear_spectrogram
221
+ feats_extract_conf:
222
+ n_fft: 1024
223
+ hop_length: 256
224
+ win_length: null
225
+ normalize: null
226
+ normalize_conf: {}
227
+ tts: vits
228
+ tts_conf:
229
+ generator_type: vits_generator
230
+ generator_params:
231
+ hidden_channels: 192
232
+ spks: -1
233
+ global_channels: -1
234
+ segment_size: 32
235
+ text_encoder_attention_heads: 2
236
+ text_encoder_ffn_expand: 4
237
+ text_encoder_blocks: 6
238
+ text_encoder_positionwise_layer_type: conv1d
239
+ text_encoder_positionwise_conv_kernel_size: 3
240
+ text_encoder_positional_encoding_layer_type: rel_pos
241
+ text_encoder_self_attention_layer_type: rel_selfattn
242
+ text_encoder_activation_type: swish
243
+ text_encoder_normalize_before: true
244
+ text_encoder_dropout_rate: 0.1
245
+ text_encoder_positional_dropout_rate: 0.0
246
+ text_encoder_attention_dropout_rate: 0.1
247
+ use_macaron_style_in_text_encoder: true
248
+ use_conformer_conv_in_text_encoder: false
249
+ text_encoder_conformer_kernel_size: -1
250
+ decoder_kernel_size: 7
251
+ decoder_channels: 512
252
+ decoder_upsample_scales:
253
+ - 8
254
+ - 8
255
+ - 2
256
+ - 2
257
+ decoder_upsample_kernel_sizes:
258
+ - 16
259
+ - 16
260
+ - 4
261
+ - 4
262
+ decoder_resblock_kernel_sizes:
263
+ - 3
264
+ - 7
265
+ - 11
266
+ decoder_resblock_dilations:
267
+ - - 1
268
+ - 3
269
+ - 5
270
+ - - 1
271
+ - 3
272
+ - 5
273
+ - - 1
274
+ - 3
275
+ - 5
276
+ use_weight_norm_in_decoder: true
277
+ posterior_encoder_kernel_size: 5
278
+ posterior_encoder_layers: 16
279
+ posterior_encoder_stacks: 1
280
+ posterior_encoder_base_dilation: 1
281
+ posterior_encoder_dropout_rate: 0.0
282
+ use_weight_norm_in_posterior_encoder: true
283
+ flow_flows: 4
284
+ flow_kernel_size: 5
285
+ flow_base_dilation: 1
286
+ flow_layers: 4
287
+ flow_dropout_rate: 0.0
288
+ use_weight_norm_in_flow: true
289
+ use_only_mean_in_flow: true
290
+ stochastic_duration_predictor_kernel_size: 3
291
+ stochastic_duration_predictor_dropout_rate: 0.5
292
+ stochastic_duration_predictor_flows: 4
293
+ stochastic_duration_predictor_dds_conv_layers: 3
294
+ vocabs: 85
295
+ aux_channels: 513
296
+ discriminator_type: hifigan_multi_scale_multi_period_discriminator
297
+ discriminator_params:
298
+ scales: 1
299
+ scale_downsample_pooling: AvgPool1d
300
+ scale_downsample_pooling_params:
301
+ kernel_size: 4
302
+ stride: 2
303
+ padding: 2
304
+ scale_discriminator_params:
305
+ in_channels: 1
306
+ out_channels: 1
307
+ kernel_sizes:
308
+ - 15
309
+ - 41
310
+ - 5
311
+ - 3
312
+ channels: 128
313
+ max_downsample_channels: 1024
314
+ max_groups: 16
315
+ bias: true
316
+ downsample_scales:
317
+ - 2
318
+ - 2
319
+ - 4
320
+ - 4
321
+ - 1
322
+ nonlinear_activation: LeakyReLU
323
+ nonlinear_activation_params:
324
+ negative_slope: 0.1
325
+ use_weight_norm: true
326
+ use_spectral_norm: false
327
+ follow_official_norm: false
328
+ periods:
329
+ - 2
330
+ - 3
331
+ - 5
332
+ - 7
333
+ - 11
334
+ period_discriminator_params:
335
+ in_channels: 1
336
+ out_channels: 1
337
+ kernel_sizes:
338
+ - 5
339
+ - 3
340
+ channels: 32
341
+ downsample_scales:
342
+ - 3
343
+ - 3
344
+ - 3
345
+ - 3
346
+ - 1
347
+ max_downsample_channels: 1024
348
+ bias: true
349
+ nonlinear_activation: LeakyReLU
350
+ nonlinear_activation_params:
351
+ negative_slope: 0.1
352
+ use_weight_norm: true
353
+ use_spectral_norm: false
354
+ generator_adv_loss_params:
355
+ average_by_discriminators: false
356
+ loss_type: mse
357
+ discriminator_adv_loss_params:
358
+ average_by_discriminators: false
359
+ loss_type: mse
360
+ feat_match_loss_params:
361
+ average_by_discriminators: false
362
+ average_by_layers: false
363
+ include_final_outputs: true
364
+ mel_loss_params:
365
+ fs: 22050
366
+ n_fft: 1024
367
+ hop_length: 256
368
+ win_length: null
369
+ window: hann
370
+ n_mels: 80
371
+ fmin: 0
372
+ fmax: null
373
+ log_base: null
374
+ lambda_adv: 1.0
375
+ lambda_mel: 45.0
376
+ lambda_feat_match: 2.0
377
+ lambda_dur: 1.0
378
+ lambda_kl: 1.0
379
+ sampling_rate: 22050
380
+ cache_generator_outputs: true
381
+ pitch_extract: null
382
+ pitch_extract_conf: {}
383
+ pitch_normalize: null
384
+ pitch_normalize_conf: {}
385
+ energy_extract: null
386
+ energy_extract_conf: {}
387
+ energy_normalize: null
388
+ energy_normalize_conf: {}
389
+ required:
390
+ - output_dir
391
+ - token_list
392
+ version: 0.10.3a1
393
+ distributed: true
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_backward_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_fake_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_forward_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_optim_step_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_real_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/discriminator_train_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_adv_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_backward_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_dur_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_feat_match_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_forward_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_kl_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_mel_loss.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_optim_step_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/generator_train_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/gpu_max_cached_mem_GB.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/iter_time.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim0_lr0.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/optim1_lr0.png ADDED
exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.3a1
2
+ files:
3
+ model_file: exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/50epoch.pth
4
+ python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
5
+ timestamp: 1630722941.449679
6
+ torch: 1.7.1
7
+ yaml_files:
8
+ train_config: exp/tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause/config.yaml