train: model: "sovits" seed: 1234 epochs: 10000 learning_rate: 2e-4 betas: [0.8, 0.99] lr_decay: 0.999875 eps: 1e-9 batch_size: 8 c_stft: 9 c_mel: 1. c_kl: 0.2 port: 8001 pretrain: "" ############################# data: training_files: "files/train.txt" validation_files: "files/valid.txt" segment_size: 8000 # WARNING: base on hop_length max_wav_value: 32768.0 sampling_rate: 32000 filter_length: 1024 hop_length: 320 win_length: 1024 mel_channels: 100 mel_fmin: 50.0 mel_fmax: 16000.0 ############################# vits: ppg_dim: 1024 vec_dim: 256 spk_dim: 256 gin_channels: 256 inter_channels: 192 hidden_channels: 192 filter_channels: 640 ############################# gen: upsample_input: 192 upsample_rates: [5,4,4,2,2] upsample_kernel_sizes: [15,8,8,4,4] upsample_initial_channel: 320 resblock_kernel_sizes: [3,7,11] resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]] ############################# mpd: periods: [2,3,5,7,11] kernel_size: 5 stride: 3 use_spectral_norm: False lReLU_slope: 0.2 ############################# mrd: resolutions: "[(1024, 120, 600), (2048, 240, 1200), (4096, 480, 2400), (512, 50, 240)]" # (filter_length, hop_length, win_length) use_spectral_norm: False lReLU_slope: 0.2 ############################# log: info_interval: 100 eval_interval: 1 save_interval: 5 num_audio: 6 pth_dir: 'chkpt' log_dir: 'logs' ############################# dist_config: dist_backend: "nccl" dist_url: "tcp://localhost:54321" world_size: 1