{ "model": "glow_tts", "run_name": "glowtts-v2", "run_description": "glow-tts model for sce-tts project", "epochs": 10000, "batch_size": 32, "eval_batch_size": 16, "mixed_precision": false, "run_eval": true, "test_delay_epochs": 0, "print_eval": false, "print_step": 25, "tb_plot_step": 100, "tb_model_param_stats": false, "save_step": 1000, "checkpoint": true, "keep_all_best": false, "keep_after": 10000, "num_loader_workers": 2, "num_val_loader_workers": 2, "use_noise_augment": true, "output_path": "/content/drive/My Drive/Colab Notebooks/data/glowtts-v2/", "distributed_backend": "nccl", "distributed_url": "tcp://localhost:54321", "audio": { "fft_size": 1024, "win_length": 1024, "hop_length": 256, "frame_shift_ms": null, "frame_length_ms": null, "stft_pad_mode": "reflect", "sample_rate": 22050, "resample": false, "preemphasis": 0.98, "ref_level_db": 20, "do_sound_norm": false, "do_trim_silence": false, "trim_db": 60, "power": 1.1, "griffin_lim_iters": 60, "num_mels": 80, "mel_fmin": 0.0, "mel_fmax": 8000.0, "spec_gain": 20, "signal_norm": true, "min_level_db": -100, "symmetric_norm": true, "max_norm": 4.0, "clip_norm": true, "stats_path": "D:\\myown-tts\\server\\models\\glowtts-v2\\scale_stats.npy" }, "use_phonemes": false, "phoneme_language": "ko", "compute_input_seq_cache": true, "text_cleaner": "korean_cleaners", "enable_eos_bos_chars": false, "test_sentences_file": "/content/TTS/test_sentences.txt", "phoneme_cache_path": null, "characters": { "pad": "_", "eos": "~", "bos": "^", "characters": " .!?\u1100\u1101\u1102\u1103\u1104\u1105\u1106\u1107\u1108\u1109\u110a\u110b\u110c\u110d\u110e\u110f\u1110\u1111\u1112\u1161\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116a\u116b\u116c\u116d\u116e\u116f\u1170\u1171\u1172\u1173\u1174\u1175\u11a8\u11a9\u11aa\u11ab\u11ac\u11ad\u11ae\u11af\u11b0\u11b1\u11b2\u11b3\u11b4\u11b5\u11b6\u11b7\u11b8\u11b9\u11ba\u11bb\u11bc\u11bd\u11be\u11bf\u11c0\u11c1\u11c2", "punctuations": " .!?", "phonemes": "\u1100\u1101\u1102\u1103\u1104\u1105\u1106\u1107\u1108\u1109\u110a\u110b\u110c\u110d\u110e\u110f\u1110\u1111\u1112\u1161\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116a\u116b\u116c\u116d\u116e\u116f\u1170\u1171\u1172\u1173\u1174\u1175\u11a8\u11a9\u11aa\u11ab\u11ac\u11ad\u11ae\u11af\u11b0\u11b1\u11b2\u11b3\u11b4\u11b5\u11b6\u11b7\u11b8\u11b9\u11ba\u11bb\u11bc\u11bd\u11be\u11bf\u11c0\u11c1\u11c2", "unique": true }, "batch_group_size": 4, "loss_masking": true, "min_seq_len": 3, "max_seq_len": 500, "compute_f0": false, "add_blank": false, "datasets": [ { "name": "ljspeech", "path": "/content/TTS/filelists", "meta_file_train": "metadata.csv", "ununsed_speakers": null, "meta_file_val": null, "meta_file_attn_mask": "" } ], "encoder_type": "rel_pos_transformer", "encoder_params": { "kernel_size": 3, "dropout_p": 0.1, "num_layers": 6, "num_heads": 2, "hidden_channels_ffn": 768, "input_length": null }, "use_encoder_prenet": true, "hidden_channels_encoder": 192, "hidden_channels_decoder": 192, "hidden_channels_duration_predictor": 256, "data_dep_init_steps": 10, "style_wav_for_test": null, "inference_noise_scale": 0.0, "use_speaker_embedding": false, "use_external_speaker_embedding_file": false, "external_speaker_embedding_file": false, "noam_schedule": true, "warmup_steps": 4000, "grad_clip": 0.05, "lr": 0.0001, "wd": 1e-06, "r": 1 }