Wanlau commited on
Commit
f3c7393
1 Parent(s): 9ca93ef
Anon_v2/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "filter_length": 2048,
4
+ "hop_length": 400,
5
+ "max_wav_value": 32768.0,
6
+ "mel_fmax": null,
7
+ "mel_fmin": 0.0,
8
+ "n_mel_channels": 125,
9
+ "sampling_rate": 40000,
10
+ "win_length": 2048
11
+ },
12
+ "model": {
13
+ "filter_channels": 768,
14
+ "gin_channels": 256,
15
+ "hidden_channels": 192,
16
+ "inter_channels": 192,
17
+ "kernel_size": 3,
18
+ "n_heads": 2,
19
+ "n_layers": 6,
20
+ "p_dropout": 0,
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5
37
+ ]
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 7,
42
+ 11
43
+ ],
44
+ "spk_embed_dim": 109,
45
+ "upsample_initial_channel": 512,
46
+ "upsample_kernel_sizes": [
47
+ 16,
48
+ 16,
49
+ 4,
50
+ 4
51
+ ],
52
+ "upsample_rates": [
53
+ 10,
54
+ 10,
55
+ 2,
56
+ 2
57
+ ],
58
+ "use_spectral_norm": false
59
+ },
60
+ "train": {
61
+ "batch_size": 4,
62
+ "betas": [
63
+ 0.8,
64
+ 0.99
65
+ ],
66
+ "c_kl": 1.0,
67
+ "c_mel": 45,
68
+ "epochs": 20000,
69
+ "eps": 1e-09,
70
+ "fp16_run": true,
71
+ "init_lr_ratio": 1,
72
+ "learning_rate": 0.0001,
73
+ "log_interval": 200,
74
+ "lr_decay": 0.999875,
75
+ "seed": 1234,
76
+ "segment_size": 12800,
77
+ "warmup_epochs": 0
78
+ }
79
+ }
README.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: audio-to-audio
3
+ tags:
4
+ - RVC
5
+ ---
6
+ # Voice Conversion Models for *BanG Dream!*
7
+
8
+  
9
+
10
+ [**English**](./README.md) | [**中文简体**](./README_zh_CN.md)
11
+
12
+ ---
13
+ Using RVC (Retrieval-based-Voice-Conversion-WebUI)
14
+
15
+ [**RVC**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
16
+
17
+  
18
+
19
+ RVC models for *BanG Dream!*, using voice data from bestdori.
20
+
21
+ These models are aiming to facilitate communication and learning. Engaging in illegal activities, is strictly prohibited.
22
+
23
+  
24
+
25
+ \*_v2 for v2, others for v1.
26
+
27
+ (the v2 version model has changed the input from the 256 dimensional feature of 9-layer Hubert+final_proj to the 768 dimensional feature of 12-layer Hubert, and has added 3 period discriminators)
28
+
29
+ ---
30
+ ## character list
31
+
32
+ ### Poppin'Party
33
+
34
+ ### Afterglow
35
+
36
+ ### Hello, Happy World!
37
+
38
+ ### Pastel*Palettes
39
+
40
+ ![hina](png/thina1.png)
41
+ 冰川 日菜
42
+ Hina Hikawa
43
+
44
+ ### Roselia
45
+
46
+ ![sayo](png/tsayo1.png)
47
+ 冰川 纱夜
48
+ Sayo Hikawa
49
+
50
+ ![yukina](png/tyukina1.png)
51
+ 凑 友希那
52
+ Yukina Minato
53
+
54
+ ![lisa](png/tlisa1.png)
55
+ 今井 莉莎
56
+ Lisa Imai
57
+
58
+ ### RAISE A SUILEN
59
+
60
+ ### Morfonica
61
+
62
+ ### MyGO!!!!!
63
+
64
+ ![anon](png/tanon1.png)
65
+ 千早 爱音
66
+ Anon Chihaya
67
+
68
+ ![soyo](png/tsoyo1.png)
69
+ 长崎 爽世
70
+ Soyo Nagasaki
71
+
72
+ ![tomori](png/ttomori1.png)
73
+ 高松 灯
74
+ Tomori Takamatsu
75
+
76
+ ![taki](png/ttaki1.png)
77
+ 椎名 立希
78
+ Taki Shiina
79
+
80
+ ![rana](png/trana1.png)
81
+ 要 乐奈
82
+ Rana Kaname
83
+
84
+ ### Ave Mujica
85
+
86
+
87
+ ---
88
+ ## Links
89
+
90
+ [**RVC(github)**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
91
+
92
+ [**RVC(huggingface)**](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main)
93
+
94
+ [**tutorial video for RVC**](https://www.bilibili.com/video/BV1pm4y1z7Gm)
README_zh_CN.md ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BanG Dream!角色语音转换模型
2
+
3
+  
4
+
5
+ [**English**](./README.md) | [**中文简体**](./README_zh_CN.md)
6
+
7
+ ---
8
+ 使用 RVC (Retrieval-based-Voice-Conversion-WebUI)
9
+
10
+ [**RVC**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
11
+
12
+  
13
+
14
+ BanG Dream!角色RVC模型,以bestdori上的语音数据训练而成。
15
+
16
+ 仅供学习交流试用,不可用于非法用途。
17
+
18
+ 具体使用方法见:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/README.md
19
+
20
+  
21
+
22
+ 带v2标识的模型为v2模型,无标识的模型皆为v1模型。
23
+
24
+ (v2版本模型将特征从 9层hubert+final_proj的256维输入 变更为 12层hubert的768维输入,并且增加了3个周期鉴别器)
25
+
26
+ ---
27
+ ## 角色列表
28
+
29
+ ### Poppin'Party
30
+
31
+ ### Afterglow
32
+
33
+ ### Hello, Happy World!
34
+
35
+ ### Pastel*Palettes
36
+
37
+ ![hina](png/thina1.png)
38
+ 冰川 日菜
39
+ Hina Hikawa
40
+
41
+ ### Roselia
42
+
43
+ ![sayo](png/tsayo1.png)
44
+ 冰川 纱夜
45
+ Sayo Hikawa
46
+
47
+ ![yukina](png/tyukina1.png)
48
+ 凑 友希那
49
+ Yukina Minato
50
+
51
+ ![lisa](png/tlisa1.png)
52
+ 今井 莉莎
53
+ Lisa Imai
54
+
55
+ ### RAISE A SUILEN
56
+
57
+ ### Morfonica
58
+
59
+ ### MyGO!!!!!
60
+
61
+ ![anon](png/tanon1.png)
62
+ 千早 爱音
63
+ Anon Chihaya
64
+
65
+ ![soyo](png/tsoyo1.png)
66
+ 长崎 爽世
67
+ Soyo Nagasaki
68
+
69
+ ![tomori](png/ttomori1.png)
70
+ 高松 灯
71
+ Tomori Takamatsu
72
+
73
+ ![taki](png/ttaki1.png)
74
+ 椎名 立希
75
+ Taki Shiina
76
+
77
+ ![rana](png/trana1.png)
78
+ 要 乐奈
79
+ Rana Kaname
80
+
81
+ ### Ave Mujica
82
+
83
+ ---
84
+ ## 相关链接
85
+
86
+ [**RVC(github)**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
87
+
88
+ [**RVC(huggingface)**](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main)
89
+
90
+ [**RVC视频教程**](https://www.bilibili.com/video/BV1pm4y1z7Gm)
Rana_v2/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "filter_length": 2048,
4
+ "hop_length": 400,
5
+ "max_wav_value": 32768.0,
6
+ "mel_fmax": null,
7
+ "mel_fmin": 0.0,
8
+ "n_mel_channels": 125,
9
+ "sampling_rate": 40000,
10
+ "win_length": 2048
11
+ },
12
+ "model": {
13
+ "filter_channels": 768,
14
+ "gin_channels": 256,
15
+ "hidden_channels": 192,
16
+ "inter_channels": 192,
17
+ "kernel_size": 3,
18
+ "n_heads": 2,
19
+ "n_layers": 6,
20
+ "p_dropout": 0,
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5
37
+ ]
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 7,
42
+ 11
43
+ ],
44
+ "spk_embed_dim": 109,
45
+ "upsample_initial_channel": 512,
46
+ "upsample_kernel_sizes": [
47
+ 16,
48
+ 16,
49
+ 4,
50
+ 4
51
+ ],
52
+ "upsample_rates": [
53
+ 10,
54
+ 10,
55
+ 2,
56
+ 2
57
+ ],
58
+ "use_spectral_norm": false
59
+ },
60
+ "train": {
61
+ "batch_size": 4,
62
+ "betas": [
63
+ 0.8,
64
+ 0.99
65
+ ],
66
+ "c_kl": 1.0,
67
+ "c_mel": 45,
68
+ "epochs": 20000,
69
+ "eps": 1e-09,
70
+ "fp16_run": true,
71
+ "init_lr_ratio": 1,
72
+ "learning_rate": 0.0001,
73
+ "log_interval": 200,
74
+ "lr_decay": 0.999875,
75
+ "seed": 1234,
76
+ "segment_size": 12800,
77
+ "warmup_epochs": 0
78
+ }
79
+ }
Soyo0_v2/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "filter_length": 2048,
4
+ "hop_length": 400,
5
+ "max_wav_value": 32768.0,
6
+ "mel_fmax": null,
7
+ "mel_fmin": 0.0,
8
+ "n_mel_channels": 125,
9
+ "sampling_rate": 40000,
10
+ "win_length": 2048
11
+ },
12
+ "model": {
13
+ "filter_channels": 768,
14
+ "gin_channels": 256,
15
+ "hidden_channels": 192,
16
+ "inter_channels": 192,
17
+ "kernel_size": 3,
18
+ "n_heads": 2,
19
+ "n_layers": 6,
20
+ "p_dropout": 0,
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5
37
+ ]
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 7,
42
+ 11
43
+ ],
44
+ "spk_embed_dim": 109,
45
+ "upsample_initial_channel": 512,
46
+ "upsample_kernel_sizes": [
47
+ 16,
48
+ 16,
49
+ 4,
50
+ 4
51
+ ],
52
+ "upsample_rates": [
53
+ 10,
54
+ 10,
55
+ 2,
56
+ 2
57
+ ],
58
+ "use_spectral_norm": false
59
+ },
60
+ "train": {
61
+ "batch_size": 4,
62
+ "betas": [
63
+ 0.8,
64
+ 0.99
65
+ ],
66
+ "c_kl": 1.0,
67
+ "c_mel": 45,
68
+ "epochs": 20000,
69
+ "eps": 1e-09,
70
+ "fp16_run": true,
71
+ "init_lr_ratio": 1,
72
+ "learning_rate": 0.0001,
73
+ "log_interval": 200,
74
+ "lr_decay": 0.999875,
75
+ "seed": 1234,
76
+ "segment_size": 12800,
77
+ "warmup_epochs": 0
78
+ }
79
+ }
Soyo1_v2/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "filter_length": 2048,
4
+ "hop_length": 400,
5
+ "max_wav_value": 32768.0,
6
+ "mel_fmax": null,
7
+ "mel_fmin": 0.0,
8
+ "n_mel_channels": 125,
9
+ "sampling_rate": 40000,
10
+ "win_length": 2048
11
+ },
12
+ "model": {
13
+ "filter_channels": 768,
14
+ "gin_channels": 256,
15
+ "hidden_channels": 192,
16
+ "inter_channels": 192,
17
+ "kernel_size": 3,
18
+ "n_heads": 2,
19
+ "n_layers": 6,
20
+ "p_dropout": 0,
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5
37
+ ]
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 7,
42
+ 11
43
+ ],
44
+ "spk_embed_dim": 109,
45
+ "upsample_initial_channel": 512,
46
+ "upsample_kernel_sizes": [
47
+ 16,
48
+ 16,
49
+ 4,
50
+ 4
51
+ ],
52
+ "upsample_rates": [
53
+ 10,
54
+ 10,
55
+ 2,
56
+ 2
57
+ ],
58
+ "use_spectral_norm": false
59
+ },
60
+ "train": {
61
+ "batch_size": 4,
62
+ "betas": [
63
+ 0.8,
64
+ 0.99
65
+ ],
66
+ "c_kl": 1.0,
67
+ "c_mel": 45,
68
+ "epochs": 20000,
69
+ "eps": 1e-09,
70
+ "fp16_run": true,
71
+ "init_lr_ratio": 1,
72
+ "learning_rate": 0.0001,
73
+ "log_interval": 200,
74
+ "lr_decay": 0.999875,
75
+ "seed": 1234,
76
+ "segment_size": 12800,
77
+ "warmup_epochs": 0
78
+ }
79
+ }
Taki_v2/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "filter_length": 2048,
4
+ "hop_length": 400,
5
+ "max_wav_value": 32768.0,
6
+ "mel_fmax": null,
7
+ "mel_fmin": 0.0,
8
+ "n_mel_channels": 125,
9
+ "sampling_rate": 40000,
10
+ "win_length": 2048
11
+ },
12
+ "model": {
13
+ "filter_channels": 768,
14
+ "gin_channels": 256,
15
+ "hidden_channels": 192,
16
+ "inter_channels": 192,
17
+ "kernel_size": 3,
18
+ "n_heads": 2,
19
+ "n_layers": 6,
20
+ "p_dropout": 0,
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5
37
+ ]
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 7,
42
+ 11
43
+ ],
44
+ "spk_embed_dim": 109,
45
+ "upsample_initial_channel": 512,
46
+ "upsample_kernel_sizes": [
47
+ 16,
48
+ 16,
49
+ 4,
50
+ 4
51
+ ],
52
+ "upsample_rates": [
53
+ 10,
54
+ 10,
55
+ 2,
56
+ 2
57
+ ],
58
+ "use_spectral_norm": false
59
+ },
60
+ "train": {
61
+ "batch_size": 4,
62
+ "betas": [
63
+ 0.8,
64
+ 0.99
65
+ ],
66
+ "c_kl": 1.0,
67
+ "c_mel": 45,
68
+ "epochs": 20000,
69
+ "eps": 1e-09,
70
+ "fp16_run": true,
71
+ "init_lr_ratio": 1,
72
+ "learning_rate": 0.0001,
73
+ "log_interval": 200,
74
+ "lr_decay": 0.999875,
75
+ "seed": 1234,
76
+ "segment_size": 12800,
77
+ "warmup_epochs": 0
78
+ }
79
+ }
Tomori_v2/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "filter_length": 2048,
4
+ "hop_length": 400,
5
+ "max_wav_value": 32768.0,
6
+ "mel_fmax": null,
7
+ "mel_fmin": 0.0,
8
+ "n_mel_channels": 125,
9
+ "sampling_rate": 40000,
10
+ "win_length": 2048
11
+ },
12
+ "model": {
13
+ "filter_channels": 768,
14
+ "gin_channels": 256,
15
+ "hidden_channels": 192,
16
+ "inter_channels": 192,
17
+ "kernel_size": 3,
18
+ "n_heads": 2,
19
+ "n_layers": 6,
20
+ "p_dropout": 0,
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5
37
+ ]
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 7,
42
+ 11
43
+ ],
44
+ "spk_embed_dim": 109,
45
+ "upsample_initial_channel": 512,
46
+ "upsample_kernel_sizes": [
47
+ 16,
48
+ 16,
49
+ 4,
50
+ 4
51
+ ],
52
+ "upsample_rates": [
53
+ 10,
54
+ 10,
55
+ 2,
56
+ 2
57
+ ],
58
+ "use_spectral_norm": false
59
+ },
60
+ "train": {
61
+ "batch_size": 4,
62
+ "betas": [
63
+ 0.8,
64
+ 0.99
65
+ ],
66
+ "c_kl": 1.0,
67
+ "c_mel": 45,
68
+ "epochs": 20000,
69
+ "eps": 1e-09,
70
+ "fp16_run": true,
71
+ "init_lr_ratio": 1,
72
+ "learning_rate": 0.0001,
73
+ "log_interval": 200,
74
+ "lr_decay": 0.999875,
75
+ "seed": 1234,
76
+ "segment_size": 12800,
77
+ "warmup_epochs": 0
78
+ }
79
+ }
png/tanon1.png ADDED
png/thina1.png ADDED
png/tlisa1.png ADDED
png/trana1.png ADDED
png/tsayo1.png ADDED
png/tsoyo1.png ADDED
png/ttaki1.png ADDED
png/ttomori1.png ADDED
png/tyukina1.png ADDED