{ "model": "hifigan", "run_name": "hifigan-v2", "run_description": "hifigan model for sce-tts project", "epochs": 10000, "batch_size": 16, "eval_batch_size": 16, "mixed_precision": false, "run_eval": true, "test_delay_epochs": 0, "print_eval": false, "print_step": 25, "tb_plot_step": 100, "tb_model_param_stats": false, "save_step": 5000, "checkpoint": true, "keep_all_best": false, "keep_after": 10000, "num_loader_workers": 2, "num_val_loader_workers": 2, "use_noise_augment": false, "output_path": "/content/drive/My Drive/Colab Notebooks/data/hifigan-v2/", "distributed_backend": "nccl", "distributed_url": "tcp://localhost:54321", "audio": { "fft_size": 1024, "win_length": 1024, "hop_length": 256, "frame_shift_ms": null, "frame_length_ms": null, "stft_pad_mode": "reflect", "sample_rate": 22050, "resample": false, "preemphasis": 0.98, "ref_level_db": 20, "do_sound_norm": false, "do_trim_silence": false, "trim_db": 60, "power": 1.1, "griffin_lim_iters": 60, "num_mels": 80, "mel_fmin": 0.0, "mel_fmax": 8000.0, "spec_gain": 20, "signal_norm": true, "min_level_db": -100, "symmetric_norm": true, "max_norm": 4.0, "clip_norm": true, "stats_path": "D:\\myown-tts\\server\\models\\hifigan-v2\\scale_stats.npy" }, "eval_split_size": 10, "data_path": "/content/TTS/filelists/wavs", "feature_path": null, "seq_len": 8192, "pad_short": 2000, "conv_pad": 0, "use_cache": true, "wd": 0.0, "use_stft_loss": false, "use_subband_stft_loss": false, "use_mse_gan_loss": true, "use_hinge_gan_loss": false, "use_feat_match_loss": true, "use_l1_spec_loss": true, "stft_loss_weight": 0.0, "subband_stft_loss_weight": 0.0, "mse_G_loss_weight": 1.0, "hinge_G_loss_weight": 0.0, "feat_match_loss_weight": 10.0, "l1_spec_loss_weight": 45.0, "stft_loss_params": { "n_ffts": [ 1024, 2048, 512 ], "hop_lengths": [ 120, 240, 50 ], "win_lengths": [ 600, 1200, 240 ] }, "l1_spec_loss_params": { "use_mel": true, "sample_rate": 22050, "n_fft": 1024, "hop_length": 256, "win_length": 1024, "n_mels": 80, "mel_fmin": 0.0, "mel_fmax": null }, "target_loss": "avg_G_loss", "gen_clip_grad": -1.0, "disc_clip_grad": -1.0, "lr_gen": 0.0002, "lr_disc": 0.0002, "optimizer": "AdamW", "optimizer_params": { "betas": [ 0.8, 0.99 ], "weight_decay": 0.0 }, "lr_scheduler_gen": "ExponentialLR", "lr_scheduler_gen_params": { "gamma": 0.99, "last_epoch": -1 }, "lr_scheduler_disc": "ExponentialLR", "lr_scheduler_disc_params": { "gamma": 0.99, "last_epoch": -1 }, "use_pqmf": false, "diff_samples_for_G_and_D": false, "discriminator_model": "hifigan_discriminator", "generator_model": "hifigan_generator", "generator_model_params": { "resblock_type": "1", "upsample_factors": [ 8, 8, 2, 2 ], "upsample_kernel_sizes": [ 16, 16, 4, 4 ], "upsample_initial_channel": 128, "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ] }, "lr": 0.0001 }