File size: 1,565 Bytes
10f957b
 
 
 
 
 
 
 
 
c24b656
 
 
10f957b
 
 
 
 
 
c24b656
10f957b
c24b656
 
 
 
 
 
 
10f957b
 
 
755994c
10f957b
 
 
 
755994c
10f957b
 
 
c24b656
 
 
10f957b
 
 
 
 
 
 
 
 
 
 
c24b656
10f957b
 
 
 
 
c24b656
10f957b
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
train:
  model: "sovits"
  seed: 1234
  epochs: 10000
  learning_rate: 2e-4
  betas: [0.8, 0.99]
  lr_decay: 0.999875
  eps: 1e-9
  batch_size: 8
  c_stft: 9
  c_mel: 1.
  c_kl: 0.2
  port: 8001
  pretrain: ""
#############################
data: 
  training_files: "files/train.txt"
  validation_files: "files/valid.txt"
  segment_size: 8000  # WARNING: base on hop_length
  max_wav_value: 32768.0
  sampling_rate: 32000
  filter_length: 1024
  hop_length: 320
  win_length: 1024
  mel_channels: 100
  mel_fmin: 50.0
  mel_fmax: 16000.0
#############################
vits:
  ppg_dim: 1024
  vec_dim: 256
  spk_dim: 256
  gin_channels: 256
  inter_channels: 192
  hidden_channels: 192
  filter_channels: 640
#############################
gen:
  upsample_input: 192
  upsample_rates: [5,4,4,2,2]
  upsample_kernel_sizes: [15,8,8,4,4]
  upsample_initial_channel: 320
  resblock_kernel_sizes: [3,7,11]
  resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
#############################
mpd:
  periods: [2,3,5,7,11]
  kernel_size: 5
  stride: 3
  use_spectral_norm: False
  lReLU_slope: 0.2
#############################
mrd:
  resolutions: "[(1024, 120, 600), (2048, 240, 1200), (4096, 480, 2400), (512, 50, 240)]" # (filter_length, hop_length, win_length)
  use_spectral_norm: False
  lReLU_slope: 0.2
#############################
log:
  info_interval: 100
  eval_interval: 1
  save_interval: 5
  num_audio: 6
  pth_dir: 'chkpt'
  log_dir: 'logs'
#############################
dist_config:
  dist_backend: "nccl"
  dist_url: "tcp://localhost:54321"
  world_size: 1