isuneast commited on
Commit
653a44d
1 Parent(s): 9480e85

clean unused model

Browse files
Data/Azuma/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Azuma/filelists/train.list",
30
- "validation_files": "Data/Azuma/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "东雪莲": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Azuma/models/G_17900.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bece1cd9129451f15a1b143a7dfaad0ea4d7c5718725aa48825cbf89f4b31983
3
- size 728379830
 
 
 
 
Data/Azuma/models/G_18200.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe75c0301cbf361898fc7c9aff122dd411df647b3fc8a8a53c4846f07d5f7a43
3
- size 728379830
 
 
 
 
Data/Azusa/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Azusa/filelists/train.list",
30
- "validation_files": "Data/Azusa/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "Azusa": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Azusa/models/G_11300.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0708043b54ab21eb8ec1b600982ea7b105bcded370a9207281e043c64e195dc3
3
- size 728379830
 
 
 
 
Data/Carol/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Carol/filelists/train.list",
30
- "validation_files": "Data/Carol/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "珈乐": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Carol/models/G_12800.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d95ce470d428646e70a3ef6b9f39bcdbabd0d60e5311c136acfa82c14140aa45
3
- size 728379830
 
 
 
 
Data/Diana/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Diana/filelists/train.list",
30
- "validation_files": "Data/Diana/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "嘉然": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Diana/models/G_7800.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e13531f291fb5c277fbe2dc61674ce174cffd28cd228b40a0136a2c407fd2286
3
- size 728370270
 
 
 
 
Data/Eileen/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Eileen/filelists/train.list",
30
- "validation_files": "Data/Eileen/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "乃琳": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Eileen/models/G_10700.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:905de797a2fb9382f22f160407f0dbf3dc4a24a98052dd3ab062bf0e13e58659
3
- size 728379830
 
 
 
 
Data/Eileen/models/G_10800.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb4607096aec15bbd73b7570cf25ad87ae56c13880398b6ced0aa57ee77866e7
3
- size 728379830
 
 
 
 
Data/Messi/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "eval_interval": 50,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Messi/filelists/train.list",
30
- "validation_files": "Data/Messi/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "Messi": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Messi/models/G_1800.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a812c363cd2e091a5126c33a2c8572fe06ce4b1840270d5c093e8ab7eba1b0ff
3
- size 728370270
 
 
 
 
Data/Nana7mi/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Nana7mi/filelists/train.list",
30
- "validation_files": "Data/Nana7mi/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "七海": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Nana7mi/models/G_10700.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:616e10795efb68b46b3da14283b33dd11134c365a7866f596fde7f8654cd8a40
3
- size 728379830
 
 
 
 
Data/Taffy/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/Taffy/filelists/train.list",
30
- "validation_files": "Data/Taffy/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "永雏塔菲": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/Taffy/models/G_11100.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:39c4f4a231902fea06da77789a7949cbcdaaa26abbbb455c04760d2abf1eebea
3
- size 728379830
 
 
 
 
Data/XingTong/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/XingTong/filelists/train.list",
30
- "validation_files": "Data/XingTong/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "星瞳": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/XingTong/models/G_11600.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bf5588133ea19d099ffd762cd92601210ded38abc3ace35226dab333d81a436
3
- size 728379830
 
 
 
 
Data/badXT/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "eval_interval": 50,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/badXT/filelists/train.list",
30
- "validation_files": "Data/badXT/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "坏女人星瞳": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/badXT/models/G_3250.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:70bccbfa08deb7ce10e8ab3d4d730b5a1dee4a5be6e326f03244aec4ea112658
3
- size 728370270
 
 
 
 
Data/dingzhen/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "eval_interval": 50,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/dingzhen/filelists/train.list",
30
- "validation_files": "Data/dingzhen/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "丁真": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/dingzhen/models/G_2650.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:540afcc29da68230b57c885fb00dd0dcd897b97f0796fab486ced3ede7fbfef7
3
- size 728370270
 
 
 
 
Data/nine/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/nine/filelists/train.list",
30
- "validation_files": "Data/nine/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "尼奈": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/nine/models/G_10700.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe12e7d98b99bc9e26b45b9f191b1534a444ecddc630a313e6ea6439c5d486d3
3
- size 728379830
 
 
 
 
Data/otto/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0002,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/otto/filelists/train.list",
30
- "validation_files": "Data/otto/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "otto": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 256,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/otto/models/G_5400.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:561e3ac21e3346e05f2b130a2833a28cd011d486ea14ff4a164e5f19094e5843
3
- size 703791134
 
 
 
 
Data/xuan/config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 100,
5
- "seed": 42,
6
- "epochs": 1000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 12,
14
- "bf16_run": false,
15
- "lr_decay": 0.99995,
16
- "segment_size": 16384,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "c_commit": 100,
22
- "skip_optimizer": true,
23
- "freeze_ZH_bert": false,
24
- "freeze_JP_bert": false,
25
- "freeze_EN_bert": false,
26
- "freeze_emo": false
27
- },
28
- "data": {
29
- "training_files": "Data/xuan/filelists/train.list",
30
- "validation_files": "Data/xuan/filelists/val.list",
31
- "max_wav_value": 32768.0,
32
- "sampling_rate": 44100,
33
- "filter_length": 2048,
34
- "hop_length": 512,
35
- "win_length": 2048,
36
- "n_mel_channels": 128,
37
- "mel_fmin": 0.0,
38
- "mel_fmax": null,
39
- "add_blank": true,
40
- "n_speakers": 1,
41
- "cleaned_text": true,
42
- "spk2id": {
43
- "炫神": 0
44
- }
45
- },
46
- "model": {
47
- "use_spk_conditioned_encoder": true,
48
- "use_noise_scaled_mas": true,
49
- "use_mel_posterior_encoder": false,
50
- "use_duration_discriminator": true,
51
- "inter_channels": 192,
52
- "hidden_channels": 192,
53
- "filter_channels": 768,
54
- "n_heads": 2,
55
- "n_layers": 6,
56
- "kernel_size": 3,
57
- "p_dropout": 0.1,
58
- "resblock": "1",
59
- "resblock_kernel_sizes": [
60
- 3,
61
- 7,
62
- 11
63
- ],
64
- "resblock_dilation_sizes": [
65
- [
66
- 1,
67
- 3,
68
- 5
69
- ],
70
- [
71
- 1,
72
- 3,
73
- 5
74
- ],
75
- [
76
- 1,
77
- 3,
78
- 5
79
- ]
80
- ],
81
- "upsample_rates": [
82
- 8,
83
- 8,
84
- 2,
85
- 2,
86
- 2
87
- ],
88
- "upsample_initial_channel": 512,
89
- "upsample_kernel_sizes": [
90
- 16,
91
- 16,
92
- 8,
93
- 2,
94
- 2
95
- ],
96
- "n_layers_q": 3,
97
- "use_spectral_norm": false,
98
- "gin_channels": 512,
99
- "slm": {
100
- "model": "./slm/wavlm-base-plus",
101
- "sr": 16000,
102
- "hidden": 768,
103
- "nlayers": 13,
104
- "initial_channel": 64
105
- }
106
- },
107
- "version": "2.3"
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Data/xuan/models/G_7200.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fd876c3eb726c3e15f1df3260a0821f84d955944044cca119e10d021dd58300
3
- size 728370270