train: model: "sovits" seed: 1234 epochs: 10000 learning_rate: 2e-4 betas: [0.8, 0.99] lr_decay: 0.999875 eps: 1e-9 batch_size: 8 c_stft: 5 c_mel: 2.5 c_kl: 1.0 port: 8001 pretrain: "" ############################# data: training_files: "files/train.txt" validation_files: "files/valid.txt" segment_size: 12000 # WARNING: base on hop_length max_wav_value: 32768.0 sampling_rate: 48000 filter_length: 2048 hop_length: 480 win_length: 2048 mel_channels: 80 mel_fmin: 0.0 mel_fmax: 24000.0 ############################# vits: ppg_dim: 1024 spk_dim: 256 gin_channels: 256 inter_channels: 192 hidden_channels: 192 filter_channels: 512 ############################# gen: upsample_input: 192 upsample_rates: [6,5,4,2,2] upsample_kernel_sizes: [20,15,8,4,4] upsample_initial_channel: 256 resblock_kernel_sizes: [3,7,11] resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]] ############################# mpd: periods: [2,3,5,7,11] kernel_size: 5 stride: 3 use_spectral_norm: False lReLU_slope: 0.2 ############################# mrd: resolutions: "[(1024, 120, 600), (2048, 240, 1200), (512, 50, 240)]" # (filter_length, hop_length, win_length) use_spectral_norm: False lReLU_slope: 0.2 ############################# log: info_interval: 100 eval_interval: 5 save_interval: 5 num_audio: 6 pth_dir: 'chkpt' log_dir: 'logs' ############################# dist_config: dist_backend: "nccl" dist_url: "tcp://localhost:54321" world_size: 1