{ "train": { "log_interval": 50, "eval_interval": 1000, "seed": 1234, "port": 8001, "epochs": 10000, "learning_rate": 0.0002, "betas": [ 0.8, 0.99 ], "eps": 1e-09, "batch_size": 6, "accumulation_steps": 1, "fp16_run": false, "lr_decay": 0.998, "segment_size": 10240, "init_lr_ratio": 1, "warmup_epochs": 0, "c_mel": 45, "keep_ckpts": 4 }, "data": { "data_dir": "dataset", "dataset_type": "SingDataset", "collate_type": "SingCollate", "training_filelist": "filelists/train.txt", "validation_filelist": "filelists/val.txt", "max_wav_value": 32768.0, "sampling_rate": 44100, "n_fft": 2048, "fmin": 0, "fmax": 22050, "hop_length": 512, "win_size": 2048, "acoustic_dim": 80, "c_dim": 256, "min_level_db": -115, "ref_level_db": 20, "min_db": -115, "max_abs_value": 4.0, "n_speakers": 200 }, "model": { "hidden_channels": 192, "spk_channels": 192, "filter_channels": 768, "n_heads": 2, "n_layers": 4, "kernel_size": 3, "p_dropout": 0.1, "prior_hidden_channels": 192, "prior_filter_channels": 768, "prior_n_heads": 2, "prior_n_layers": 4, "prior_kernel_size": 3, "prior_p_dropout": 0.1, "resblock": "1", "use_spectral_norm": false, "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "upsample_rates": [ 8, 8, 4, 2 ], "upsample_initial_channel": 256, "upsample_kernel_sizes": [ 16, 16, 8, 4 ], "n_harmonic": 64, "n_bands": 65 }, "spk": { "alice": 0 } }