{ "output_path": "/output/vocoder", "logger_uri": null, "run_name": "run", "project_name": null, "run_description": "\ud83d\udc38Coqui trainer run.", "print_step": 50, "plot_step": 100, "model_param_stats": false, "wandb_entity": null, "dashboard_logger": "tensorboard", "save_on_interrupt": true, "log_model_step": null, "save_step": 5000, "save_n_checkpoints": 2, "save_checkpoints": true, "save_all_best": false, "save_best_after": 5000, "target_loss": "loss_0", "print_eval": true, "test_delay_epochs": 5, "run_eval": true, "run_eval_steps": null, "distributed_backend": "nccl", "distributed_url": "tcp://localhost:54321", "mixed_precision": false, "precision": "fp16", "epochs": 1000, "batch_size": 74, "eval_batch_size": 16, "grad_clip": [ 5, 5 ], "scheduler_after_epoch": true, "lr": 0.0001, "optimizer": "AdamW", "optimizer_params": { "betas": [ 0.8, 0.99 ], "weight_decay": 0.0 }, "lr_scheduler": null, "lr_scheduler_params": {}, "use_grad_scaler": false, "allow_tf32": false, "cudnn_enable": true, "cudnn_deterministic": false, "cudnn_benchmark": false, "training_seed": 54321, "model": "hifigan", "num_loader_workers": 8, "num_eval_loader_workers": 8, "use_noise_augment": true, "audio": { "fft_size": 1024, "win_length": 1024, "hop_length": 256, "frame_shift_ms": null, "frame_length_ms": null, "stft_pad_mode": "reflect", "sample_rate": 22050, "resample": false, "preemphasis": 0.0, "ref_level_db": 20, "do_sound_norm": false, "log_func": "np.log10", "do_trim_silence": true, "trim_db": 45, "do_rms_norm": false, "db_level": null, "power": 1.5, "griffin_lim_iters": 60, "num_mels": 80, "mel_fmin": 50, "mel_fmax": 8000, "spec_gain": 20, "do_amp_to_db_linear": true, "do_amp_to_db_mel": true, "pitch_fmax": 640.0, "pitch_fmin": 1.0, "signal_norm": true, "min_level_db": -100, "symmetric_norm": true, "max_norm": 4.0, "clip_norm": true, "stats_path": "scale_stats_female_1.npy" }, "eval_split_size": 30, "data_path": "/output/dataset/female_1", "feature_path": null, "seq_len": 8192, "pad_short": 2000, "conv_pad": 0, "use_cache": false, "wd": 1e-06, "use_stft_loss": false, "use_subband_stft_loss": false, "use_mse_gan_loss": true, "use_hinge_gan_loss": false, "use_feat_match_loss": true, "use_l1_spec_loss": true, "stft_loss_weight": 0, "subband_stft_loss_weight": 0, "mse_G_loss_weight": 1, "hinge_G_loss_weight": 0, "feat_match_loss_weight": 108, "l1_spec_loss_weight": 45, "stft_loss_params": { "n_ffts": [ 1024, 2048, 512 ], "hop_lengths": [ 120, 240, 50 ], "win_lengths": [ 600, 1200, 240 ] }, "l1_spec_loss_params": { "use_mel": true, "sample_rate": 22050, "n_fft": 1024, "hop_length": 256, "win_length": 1024, "n_mels": 80, "mel_fmin": 0.0, "mel_fmax": null }, "lr_gen": 0.0002, "lr_disc": 0.0002, "lr_scheduler_gen": "ExponentialLR", "lr_scheduler_gen_params": { "gamma": 0.999, "last_epoch": -1 }, "lr_scheduler_disc": "ExponentialLR", "lr_scheduler_disc_params": { "gamma": 0.999, "last_epoch": -1 }, "use_pqmf": false, "diff_samples_for_G_and_D": false, "discriminator_model": "hifigan_discriminator", "generator_model": "hifigan_generator", "generator_model_params": { "upsample_factors": [ 8, 8, 2, 2 ], "upsample_kernel_sizes": [ 16, 16, 4, 4 ], "upsample_initial_channel": 512, "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "resblock_type": "1" }, "github_branch": "* dev" }