XiaoHei Studio commited on
Commit
c008384
1 Parent(s): 0047e35

Upload 29 files

Browse files
.gitattributes CHANGED
@@ -1 +1,23 @@
1
  * text=auto eol=lf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  * text=auto eol=lf
2
+ pre_trained_model/768l12/D_0.pth filter=lfs diff=lfs merge=lfs -text
3
+ pre_trained_model/768l12/G_0.pth filter=lfs diff=lfs merge=lfs -text
4
+ pre_trained_model/768l12/vol_emb/D_0.pth filter=lfs diff=lfs merge=lfs -text
5
+ pre_trained_model/768l12/vol_emb/G_0.pth filter=lfs diff=lfs merge=lfs -text
6
+ pre_trained_model/D_0.pth filter=lfs diff=lfs merge=lfs -text
7
+ pre_trained_model/diffusion/768l12/max100/model_0.pt filter=lfs diff=lfs merge=lfs -text
8
+ pre_trained_model/diffusion/768l12/model_0.pt filter=lfs diff=lfs merge=lfs -text
9
+ pre_trained_model/diffusion/hubertsoft/model_0.pt filter=lfs diff=lfs merge=lfs -text
10
+ pre_trained_model/diffusion/whisper-ppg/model_0.pt filter=lfs diff=lfs merge=lfs -text
11
+ pre_trained_model/G_0.pth filter=lfs diff=lfs merge=lfs -text
12
+ pre_trained_model/hubertsoft/D_0.pth filter=lfs diff=lfs merge=lfs -text
13
+ pre_trained_model/hubertsoft/G_0.pth filter=lfs diff=lfs merge=lfs -text
14
+ pre_trained_model/tiny/vec768l12_vol_emb/D_0.pth filter=lfs diff=lfs merge=lfs -text
15
+ pre_trained_model/tiny/vec768l12_vol_emb/G_0.pth filter=lfs diff=lfs merge=lfs -text
16
+ pre_trained_model/whisper-ppg/D_0.pth filter=lfs diff=lfs merge=lfs -text
17
+ pre_trained_model/whisper-ppg/G_0.pth filter=lfs diff=lfs merge=lfs -text
18
+ pretrain/checkpoint_best_legacy_500.pt filter=lfs diff=lfs merge=lfs -text
19
+ pretrain/fcpe.pt filter=lfs diff=lfs merge=lfs -text
20
+ pretrain/hubert-soft-0d54a1f4.pt filter=lfs diff=lfs merge=lfs -text
21
+ pretrain/medium.pt filter=lfs diff=lfs merge=lfs -text
22
+ pretrain/nsf_hifigan/model filter=lfs diff=lfs merge=lfs -text
23
+ pretrain/rmvpe.pt filter=lfs diff=lfs merge=lfs -text
pre_trained_model/768l12/D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b6936d55d2cfaa717033eafe9d98dbe44d322e6adaf7be7c1c5a835ebb7177
3
+ size 187027770
pre_trained_model/768l12/G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d3e408786013590bb3574ade2831ab62c989d303834742fe73ca8d5552d2f03
3
+ size 209268661
pre_trained_model/768l12/vol_emb/D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2506a5457b2f6133563418ab0429463d3ca9788cc2575979d3839a8699b3c158
3
+ size 187027770
pre_trained_model/768l12/vol_emb/G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176d6e0dde4ba7de5ebfe1d01a1afebb161ec9945ea5cbee3131f187f53b39be
3
+ size 209270847
pre_trained_model/D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22007b495c822f9b3c90eb6c225844f4d203bea7f5c214bcd9b5dbc2a247ed7e
3
+ size 187018271
pre_trained_model/G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea40d6066461974c36f8984f956a7d2cafcce517a3bc6222a8877fab419c0179
3
+ size 180619621
pre_trained_model/diffusion/768l12/max100/model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4500926b38f792692d3985e9489127592879344d9b3850c8a06d506c18b17f
3
+ size 325929182
pre_trained_model/diffusion/768l12/model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409452a27ab310f7a5897844d003d372a7357cc91c4a43562584a1714518cdf9
3
+ size 220895384
pre_trained_model/diffusion/hubertsoft/model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e4b0e92dbc764456d39985f1e168117edd6eb3cea4a6afc7166fcd701c83ed8
3
+ size 220371096
pre_trained_model/diffusion/whisper-ppg/model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cdbc3e2dbc677b9c6fa5c96865cabd558e8f50afd76abd37460333e7153dbe6
3
+ size 221156297
pre_trained_model/hubertsoft/D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5424bd5508fb198025d359f727357ec2421ca65ef3b86b692242ece0e4c370d6
3
+ size 187027770
pre_trained_model/hubertsoft/G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a78c3c6f58a11f16c0ba92a8e51c9532697ee598771b729f4c3779afe130ab43
3
+ size 152358837
pre_trained_model/tiny/vec768l12_vol_emb/D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f653339e71ddc481375b5c6118d04ace2b2878490e6b0729cccb91beb0d2ffc
3
+ size 187029190
pre_trained_model/tiny/vec768l12_vol_emb/G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a3df21655aca0b73497e5ea74c4c347c66085def05263136741aae4c476fdd
3
+ size 128459314
pre_trained_model/whisper-ppg/D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7bfff64704b51c8f49d23fee8e292a47ac0b4dbf9887ebd5f867abf9353dc33
3
+ size 187027205
pre_trained_model/whisper-ppg/G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ff28db8fa5894fdcb29c8aff760e45c9fd3b88a892391dcf0d0257e80a78b1
3
+ size 237719813
pretrain/checkpoint_best_legacy_500.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54b40fd2802423a5643779c4861af1e9ee9c1564dc9d32f54f20b5ffba7db96
3
+ size 189507909
pretrain/fcpe.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a8dd2dbd51baf19ed295006f2ac25dba6dd60adc7ec578ae5fbd94970951da
3
+ size 69005189
pretrain/hubert-soft-0d54a1f4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82e7d079df05fe3aa535f6f7d42d309bdae1d2a53324e2b2386c56721f4f649
3
+ size 378435957
pretrain/medium.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1
3
+ size 1528008539
pretrain/meta.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def download_dict():
2
+ return {
3
+ "vec768l12": {
4
+ "url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
5
+ "output": "./pretrain/checkpoint_best_legacy_500.pt"
6
+ },
7
+ "vec256l9": {
8
+ "url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
9
+ "output": "./pretrain/checkpoint_best_legacy_500.pt"
10
+ },
11
+ "hubertsoft": {
12
+ "url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
13
+ "output": "./pretrain/hubert-soft-0d54a1f4.pt"
14
+ },
15
+ "whisper-ppg-small": {
16
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
17
+ "output": "./pretrain/small.pt"
18
+ },
19
+ "whisper-ppg": {
20
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
21
+ "output": "./pretrain/medium.pt"
22
+ },
23
+ "whisper-ppg-large": {
24
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
25
+ "output": "./pretrain/large-v2.pt"
26
+ }
27
+ }
28
+
29
+
30
+ def get_speech_encoder(config_path="configs/config.json"):
31
+ import json
32
+
33
+ with open(config_path, "r") as f:
34
+ data = f.read()
35
+ config = json.loads(data)
36
+ speech_encoder = config["model"]["speech_encoder"]
37
+ dict = download_dict()
38
+
39
+ return dict[speech_encoder]["url"], dict[speech_encoder]["output"]
pretrain/nsf_hifigan/NOTICE.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- DiffSinger Community Vocoder ---
2
+
3
+ ARCHITECTURE: NSF-HiFiGAN
4
+ RELEASE DATE: 2022-12-11
5
+
6
+ HYPER PARAMETERS:
7
+ - 44100 sample rate
8
+ - 128 mel bins
9
+ - 512 hop size
10
+ - 2048 window size
11
+ - fmin at 40Hz
12
+ - fmax at 16000Hz
13
+
14
+
15
+ NOTICE:
16
+
17
+ All model weights in the [DiffSinger Community Vocoder Project](https://openvpi.github.io/vocoders/), including
18
+ model weights in this directory, are provided by the [OpenVPI Team](https://github.com/openvpi/), under the
19
+ [Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
20
+
21
+
22
+ ACKNOWLEDGEMENTS:
23
+
24
+ Training data of this vocoder is provided and permitted by the following organizations, societies and individuals:
25
+
26
+ 孙飒 https://www.qfssr.cn
27
+ 赤松_Akamatsu https://www.zhibin.club
28
+ 乐威 https://www.zhibin.club
29
+ 伯添 https://space.bilibili.com/24087011
30
+ 雲宇光 https://space.bilibili.com/660675050
31
+ 橙子言 https://space.bilibili.com/318486464
32
+ 人衣大人 https://space.bilibili.com/2270344
33
+ 玖蝶 https://space.bilibili.com/676771003
34
+ Yuuko
35
+ 白夜零BYL https://space.bilibili.com/1605040503
36
+ 嗷天 https://space.bilibili.com/5675252
37
+ 洛泠羽 https://space.bilibili.com/347373318
38
+ 灰条纹的灰猫君 https://space.bilibili.com/2083633
39
+ 幽寂 https://space.bilibili.com/478860
40
+ 恶魔王女 https://space.bilibili.com/2475098
41
+ AlexYHX 芮晴
42
+ 绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
43
+ 诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
44
+ 汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
45
+ 1262917464
46
+ 炜阳
47
+ 叶卡yolka
48
+ 幸の夏 https://space.bilibili.com/1017297686
49
+ 暮色未量 https://space.bilibili.com/272904686
50
+ 晓寞sama https://space.bilibili.com/3463394
51
+ 没头绪的节操君
52
+ 串串BunC https://space.bilibili.com/95817834
53
+ 落雨 https://space.bilibili.com/1292427
54
+ 长尾巴的翎艾 https://space.bilibili.com/1638666
55
+ 声闻计划 https://space.bilibili.com/392812269
56
+ 唐家大小姐 http://5sing.kugou.com/palmusic/default.html
57
+ 不伊子
58
+
59
+ Training machines are provided by:
60
+
61
+ 花儿不哭 https://space.bilibili.com/5760446
62
+
63
+
64
+ TERMS OF REDISTRIBUTIONS:
65
+
66
+ 1. Do not sell this vocoder, or charge any fees from redistributing it, as prohibited by
67
+ the license.
68
+ 2. Include a copy of the CC BY-NC-SA 4.0 license, or a link referring to it.
69
+ 3. Include a copy of this notice, or any other notices informing that this vocoder is
70
+ provided by the OpenVPI Team, that this vocoder is licensed under CC BY-NC-SA 4.0, and
71
+ with a complete acknowledgement list as shown above.
72
+ 4. If you fine-tuned or modified the weights, leave a notice about what has been changed.
73
+ 5. (Optional) Leave a link to the official release page of the vocoder, and tell users
74
+ that other versions and future updates of this vocoder can be obtained from the website.
pretrain/nsf_hifigan/NOTICE.zh-CN.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- DiffSinger 社区声码器 ---
2
+
3
+ 架构:NSF-HiFiGAN
4
+ 发布日期:2022-12-11
5
+
6
+ 超参数:
7
+ - 44100 sample rate
8
+ - 128 mel bins
9
+ - 512 hop size
10
+ - 2048 window size
11
+ - fmin at 40Hz
12
+ - fmax at 16000Hz
13
+
14
+
15
+ 注意事项:
16
+
17
+ [DiffSinger 社区声码器企划](https://openvpi.github.io/vocoders/) 中的所有模型权重,
18
+ 包括此目录下的模型权重,均由 [OpenVPI Team](https://github.com/openvpi/) 提供,并基于
19
+ [Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)
20
+ 进行许可。
21
+
22
+
23
+ 致谢:
24
+
25
+ 此声码器的训练数据由以下组织、社团和个人提供并许可:
26
+
27
+ 孙飒 https://www.qfssr.cn
28
+ 赤松_Akamatsu https://www.zhibin.club
29
+ 乐威 https://www.zhibin.club
30
+ 伯添 https://space.bilibili.com/24087011
31
+ 雲宇光 https://space.bilibili.com/660675050
32
+ 橙子言 https://space.bilibili.com/318486464
33
+ 人衣大人 https://space.bilibili.com/2270344
34
+ 玖蝶 https://space.bilibili.com/676771003
35
+ Yuuko
36
+ 白夜零BYL https://space.bilibili.com/1605040503
37
+ 嗷天 https://space.bilibili.com/5675252
38
+ 洛泠羽 https://space.bilibili.com/347373318
39
+ 灰条纹的灰猫君 https://space.bilibili.com/2083633
40
+ 幽寂 https://space.bilibili.com/478860
41
+ 恶魔王女 https://space.bilibili.com/2475098
42
+ AlexYHX 芮晴
43
+ 绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
44
+ 诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
45
+ 汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
46
+ 1262917464
47
+ 炜阳
48
+ 叶卡yolka
49
+ 幸の夏 https://space.bilibili.com/1017297686
50
+ 暮色未量 https://space.bilibili.com/272904686
51
+ 晓寞sama https://space.bilibili.com/3463394
52
+ 没头绪的节操君
53
+ 串串BunC https://space.bilibili.com/95817834
54
+ 落雨 https://space.bilibili.com/1292427
55
+ 长尾巴的翎艾 https://space.bilibili.com/1638666
56
+ 声闻计划 https://space.bilibili.com/392812269
57
+ 唐家大小姐 http://5sing.kugou.com/palmusic/default.html
58
+ 不伊子
59
+
60
+ 训练算力的提供者如下:
61
+
62
+ 花儿不哭 https://space.bilibili.com/5760446
63
+
64
+
65
+ 二次分发条款:
66
+
67
+ 1. 请勿售卖此声码器或从其二次分发过程中收取任何费用,因为此类行为受到许可证的禁止。
68
+ 2. 请在二次分发文件中包含一份 CC BY-NC-SA 4.0 许可证的副本或指向该许可证的链接。
69
+ 3. 请在二次分发文件中包含这份声明,或以其他形式声明此声码器由 OpenVPI Team 提供并基于 CC BY-NC-SA 4.0 许可,
70
+ 并附带上述完整的致谢名单。
71
+ 4. 如果您微调或修改了权重,请留下一份关于其受到了何种修改的说明。
72
+ 5.(可选)留下一份指向此声码器的官方发布页面的链接,并告知使用者可从该网站获取此声码器的其他版本和未来的更新。
pretrain/nsf_hifigan/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 4,
4
+ "batch_size": 10,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [ 8, 8, 2, 2, 2],
12
+ "upsample_kernel_sizes": [16,16, 4, 4, 4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+ "discriminator_periods": [3, 5, 7, 11, 17, 23, 37],
17
+
18
+ "segment_size": 16384,
19
+ "num_mels": 128,
20
+ "num_freq": 1025,
21
+ "n_fft" : 2048,
22
+ "hop_size": 512,
23
+ "win_size": 2048,
24
+
25
+ "sampling_rate": 44100,
26
+
27
+ "fmin": 40,
28
+ "fmax": 16000,
29
+ "fmax_for_loss": null,
30
+
31
+ "num_workers": 16,
32
+
33
+ "dist_config": {
34
+ "dist_backend": "nccl",
35
+ "dist_url": "tcp://localhost:54321",
36
+ "world_size": 1
37
+ }
38
+ }
pretrain/nsf_hifigan/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
3
+ size 56825430
pretrain/nsf_hifigan/put_nsf_hifigan_ckpt_here ADDED
File without changes
pretrain/put_hubert_ckpt_here ADDED
File without changes
pretrain/rmvpe.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d49bd662038808878c9d7420e0f583f506fe69086cc384f0da88f0b3a4e1115
3
+ size 368492925
raw/put_raw_wav_here ADDED
File without changes