Miuzarte commited on
Commit
7b9f580
1 Parent(s): e086908

Upload config.json

Browse files

本来想更新一下config但是忘记了旧版4.0v1和v2差了啥,摆了

Files changed (1) hide show
  1. sovits4-v2_44k/v4/config.json +35 -36
sovits4-v2_44k/v4/config.json CHANGED
@@ -3,69 +3,68 @@
3
  "log_interval": 100,
4
  "eval_interval": 10000,
5
  "seed": 1234,
 
6
  "epochs": 10000,
7
- "learning_rate": 0.0001,
8
  "betas": [
9
  0.8,
10
  0.99
11
  ],
12
  "eps": 1e-09,
13
  "batch_size": 12,
 
14
  "fp16_run": false,
15
- "half_type": "fp16",
16
- "lr_decay": 0.999875,
17
  "segment_size": 10240,
18
  "init_lr_ratio": 1,
19
  "warmup_epochs": 0,
20
  "c_mel": 45,
21
- "c_kl": 1.0,
22
- "use_sr": true,
23
- "max_speclen": 512,
24
- "port": "8001",
25
- "keep_ckpts": 5,
26
- "all_in_mem": false,
27
- "vol_aug":false
28
  },
29
  "data": {
30
- "training_files": "filelists/train.txt",
31
- "validation_files": "filelists/val.txt",
 
 
 
32
  "max_wav_value": 32768.0,
33
  "sampling_rate": 44100,
34
- "filter_length": 2048,
 
 
35
  "hop_length": 512,
36
- "win_length": 2048,
37
- "n_mel_channels": 80,
38
- "mel_fmin": 0.0,
39
- "mel_fmax": 22050,
40
- "unit_interpolate_mode":"nearest"
 
 
 
41
  },
42
  "model": {
43
- "inter_channels": 192,
44
  "hidden_channels": 192,
 
45
  "filter_channels": 768,
46
  "n_heads": 2,
47
- "n_layers": 6,
48
  "kernel_size": 3,
49
  "p_dropout": 0.1,
 
 
 
 
 
 
50
  "resblock": "1",
 
51
  "resblock_kernel_sizes": [3,7,11],
52
  "resblock_dilation_sizes": [[1,3,5],[1,3,5],[1,3,5]],
53
- "upsample_rates": [8,8,2,2,2],
54
- "upsample_initial_channel": 512,
55
- "upsample_kernel_sizes": [16,16,4,4,4],
56
- "n_layers_q": 3,
57
- "n_flow_layer": 4,
58
- "use_spectral_norm": false,
59
- "gin_channels": 256,
60
- "ssl_dim": 256,
61
- "n_speakers": 200,
62
- "vocoder_name":"nsf-hifigan",
63
- "speech_encoder":"vec256l9",
64
- "speaker_embedding":false,
65
- "vol_embedding":false,
66
- "use_depthwise_conv":false,
67
- "flow_share_parameter": false,
68
- "use_automatic_f0_prediction": true
69
  },
70
  "spk": {
71
  "suijiSUI": 0
 
3
  "log_interval": 100,
4
  "eval_interval": 10000,
5
  "seed": 1234,
6
+ "port": 8001,
7
  "epochs": 10000,
8
+ "learning_rate": 0.0002,
9
  "betas": [
10
  0.8,
11
  0.99
12
  ],
13
  "eps": 1e-09,
14
  "batch_size": 12,
15
+ "accumulation_steps": 1,
16
  "fp16_run": false,
17
+ "lr_decay": 0.998,
 
18
  "segment_size": 10240,
19
  "init_lr_ratio": 1,
20
  "warmup_epochs": 0,
21
  "c_mel": 45,
22
+ "keep_ckpts": 6
 
 
 
 
 
 
23
  },
24
  "data": {
25
+ "data_dir": "dataset",
26
+ "dataset_type": "SingDataset",
27
+ "collate_type": "SingCollate",
28
+ "training_filelist": "filelists/train.txt",
29
+ "validation_filelist": "filelists/val.txt",
30
  "max_wav_value": 32768.0,
31
  "sampling_rate": 44100,
32
+ "n_fft": 2048,
33
+ "fmin": 0,
34
+ "fmax": 22050,
35
  "hop_length": 512,
36
+ "win_size": 2048,
37
+ "acoustic_dim": 80,
38
+ "c_dim": 256,
39
+ "min_level_db": -115,
40
+ "ref_level_db": 20,
41
+ "min_db": -115,
42
+ "max_abs_value": 4.0,
43
+ "n_speakers": 200
44
  },
45
  "model": {
 
46
  "hidden_channels": 192,
47
+ "spk_channels": 192,
48
  "filter_channels": 768,
49
  "n_heads": 2,
50
+ "n_layers": 4,
51
  "kernel_size": 3,
52
  "p_dropout": 0.1,
53
+ "prior_hidden_channels": 192,
54
+ "prior_filter_channels": 768,
55
+ "prior_n_heads": 2,
56
+ "prior_n_layers": 4,
57
+ "prior_kernel_size": 3,
58
+ "prior_p_dropout": 0.1,
59
  "resblock": "1",
60
+ "use_spectral_norm": false,
61
  "resblock_kernel_sizes": [3,7,11],
62
  "resblock_dilation_sizes": [[1,3,5],[1,3,5],[1,3,5]],
63
+ "upsample_rates": [8,8,4,2],
64
+ "upsample_initial_channel": 256,
65
+ "upsample_kernel_sizes": [16,16,8,4],
66
+ "n_harmonic": 64,
67
+ "n_bands": 65
 
 
 
 
 
 
 
 
 
 
 
68
  },
69
  "spk": {
70
  "suijiSUI": 0