yl12053 commited on
Commit
ae80945
1 Parent(s): 5f63ddc
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pretrain/nsf_hifigan/model filter=lfs diff=lfs merge=lfs -text
pretrain/checkpoint_best_legacy_500.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d936ec5a566776fc392e69ad8b630d14eb588111233fe313436e200a7b187b
3
+ size 1330114945
pretrain/hubert-soft-0d54a1f4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82e7d079df05fe3aa535f6f7d42d309bdae1d2a53324e2b2386c56721f4f649
3
+ size 378435957
pretrain/medium.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1
3
+ size 1528008539
pretrain/meta.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def download_dict():
2
+ return {
3
+ "vec768l12": {
4
+ "url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
5
+ "output": "./pretrain/checkpoint_best_legacy_500.pt"
6
+ },
7
+ "vec256l9": {
8
+ "url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
9
+ "output": "./pretrain/checkpoint_best_legacy_500.pt"
10
+ },
11
+ "hubertsoft": {
12
+ "url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
13
+ "output": "./pretrain/hubert-soft-0d54a1f4.pt"
14
+ },
15
+ "whisper-ppg": {
16
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
17
+ "output": "./pretrain/medium.pt"
18
+ }
19
+ }
20
+
21
+
22
+ def get_speech_encoder(config_path="configs/config.json"):
23
+ import json
24
+
25
+ with open(config_path, "r") as f:
26
+ data = f.read()
27
+ config = json.loads(data)
28
+ speech_encoder = config["model"]["speech_encoder"]
29
+ dict = download_dict()
30
+
31
+ return dict[speech_encoder]["url"], dict[speech_encoder]["output"]
pretrain/nsf_hifigan/NOTICE.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- DiffSinger Community Vocoder ---
2
+
3
+ ARCHITECTURE: NSF-HiFiGAN
4
+ RELEASE DATE: 2022-12-11
5
+
6
+ HYPER PARAMETERS:
7
+ - 44100 sample rate
8
+ - 128 mel bins
9
+ - 512 hop size
10
+ - 2048 window size
11
+ - fmin at 40Hz
12
+ - fmax at 16000Hz
13
+
14
+
15
+ NOTICE:
16
+
17
+ All model weights in the [DiffSinger Community Vocoder Project](https://openvpi.github.io/vocoders/), including
18
+ model weights in this directory, are provided by the [OpenVPI Team](https://github.com/openvpi/), under the
19
+ [Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
20
+
21
+
22
+ ACKNOWLEDGEMENTS:
23
+
24
+ Training data of this vocoder is provided and permitted by the following organizations, societies and individuals:
25
+
26
+ 孙飒 https://www.qfssr.cn
27
+ 赤松_Akamatsu https://www.zhibin.club
28
+ 乐威 https://www.zhibin.club
29
+ 伯添 https://space.bilibili.com/24087011
30
+ 雲宇光 https://space.bilibili.com/660675050
31
+ 橙子言 https://space.bilibili.com/318486464
32
+ 人衣大人 https://space.bilibili.com/2270344
33
+ 玖蝶 https://space.bilibili.com/676771003
34
+ Yuuko
35
+ 白夜零BYL https://space.bilibili.com/1605040503
36
+ 嗷天 https://space.bilibili.com/5675252
37
+ 洛泠羽 https://space.bilibili.com/347373318
38
+ 灰条纹的灰猫君 https://space.bilibili.com/2083633
39
+ 幽寂 https://space.bilibili.com/478860
40
+ 恶魔王女 https://space.bilibili.com/2475098
41
+ AlexYHX 芮晴
42
+ 绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
43
+ 诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
44
+ 汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
45
+ 1262917464
46
+ 炜阳
47
+ 叶卡yolka
48
+ 幸の夏 https://space.bilibili.com/1017297686
49
+ 暮色未量 https://space.bilibili.com/272904686
50
+ 晓寞sama https://space.bilibili.com/3463394
51
+ 没头绪的节操君
52
+ 串串BunC https://space.bilibili.com/95817834
53
+ 落雨 https://space.bilibili.com/1292427
54
+ 长尾巴的翎艾 https://space.bilibili.com/1638666
55
+ 声闻计划 https://space.bilibili.com/392812269
56
+ 唐家大小姐 http://5sing.kugou.com/palmusic/default.html
57
+ 不伊子
58
+
59
+ Training machines are provided by:
60
+
61
+ 花儿不哭 https://space.bilibili.com/5760446
62
+
63
+
64
+ TERMS OF REDISTRIBUTIONS:
65
+
66
+ 1. Do not sell this vocoder, or charge any fees from redistributing it, as prohibited by
67
+ the license.
68
+ 2. Include a copy of the CC BY-NC-SA 4.0 license, or a link referring to it.
69
+ 3. Include a copy of this notice, or any other notices informing that this vocoder is
70
+ provided by the OpenVPI Team, that this vocoder is licensed under CC BY-NC-SA 4.0, and
71
+ with a complete acknowledgement list as shown above.
72
+ 4. If you fine-tuned or modified the weights, leave a notice about what has been changed.
73
+ 5. (Optional) Leave a link to the official release page of the vocoder, and tell users
74
+ that other versions and future updates of this vocoder can be obtained from the website.
pretrain/nsf_hifigan/NOTICE.zh-CN.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- DiffSinger 社区声码器 ---
2
+
3
+ 架构:NSF-HiFiGAN
4
+ 发布日期:2022-12-11
5
+
6
+ 超参数:
7
+ - 44100 sample rate
8
+ - 128 mel bins
9
+ - 512 hop size
10
+ - 2048 window size
11
+ - fmin at 40Hz
12
+ - fmax at 16000Hz
13
+
14
+
15
+ 注意事项:
16
+
17
+ [DiffSinger 社区声码器企划](https://openvpi.github.io/vocoders/) 中的所有模型权重,
18
+ 包括此目录下的模型权重,均由 [OpenVPI Team](https://github.com/openvpi/) 提供,并基于
19
+ [Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)
20
+ 进行许可。
21
+
22
+
23
+ 致谢:
24
+
25
+ 此声码器的训练数据由以下组织、社团和个人提供并许可:
26
+
27
+ 孙飒 https://www.qfssr.cn
28
+ 赤松_Akamatsu https://www.zhibin.club
29
+ 乐威 https://www.zhibin.club
30
+ 伯添 https://space.bilibili.com/24087011
31
+ 雲宇光 https://space.bilibili.com/660675050
32
+ 橙子言 https://space.bilibili.com/318486464
33
+ 人衣大人 https://space.bilibili.com/2270344
34
+ 玖蝶 https://space.bilibili.com/676771003
35
+ Yuuko
36
+ 白夜零BYL https://space.bilibili.com/1605040503
37
+ 嗷天 https://space.bilibili.com/5675252
38
+ 洛泠羽 https://space.bilibili.com/347373318
39
+ 灰条纹的灰猫君 https://space.bilibili.com/2083633
40
+ 幽寂 https://space.bilibili.com/478860
41
+ 恶魔王女 https://space.bilibili.com/2475098
42
+ AlexYHX 芮晴
43
+ 绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
44
+ 诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
45
+ 汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
46
+ 1262917464
47
+ 炜阳
48
+ 叶卡yolka
49
+ 幸の夏 https://space.bilibili.com/1017297686
50
+ 暮色未量 https://space.bilibili.com/272904686
51
+ 晓寞sama https://space.bilibili.com/3463394
52
+ 没头绪的节操君
53
+ 串串BunC https://space.bilibili.com/95817834
54
+ 落雨 https://space.bilibili.com/1292427
55
+ 长尾巴的翎艾 https://space.bilibili.com/1638666
56
+ 声闻计划 https://space.bilibili.com/392812269
57
+ 唐家大小姐 http://5sing.kugou.com/palmusic/default.html
58
+ 不伊子
59
+
60
+ 训练算力的提供者如下:
61
+
62
+ 花儿不哭 https://space.bilibili.com/5760446
63
+
64
+
65
+ 二次分发条款:
66
+
67
+ 1. 请勿售卖此声码器或从其二次分发过程中收取任何费用,因为此类行为受到许可证的禁止。
68
+ 2. 请在二次分发文件中包含一份 CC BY-NC-SA 4.0 许可证的副本或指向该许可证的链接。
69
+ 3. 请在二次分发文件中包含这份声明,或以其他形式声明此声码器由 OpenVPI Team 提供并基于 CC BY-NC-SA 4.0 许可,
70
+ 并附带上述完整的致谢名单。
71
+ 4. 如果您微调或修改了权重,请留下一份关于其受到了何种修改的说明。
72
+ 5.(可选)留下一份指向此声码器的官方发布页面的链接,并告知使用者可从该网站获取此声码器的其他版本和未来的更新。
pretrain/nsf_hifigan/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 4,
4
+ "batch_size": 10,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [ 8, 8, 2, 2, 2],
12
+ "upsample_kernel_sizes": [16,16, 4, 4, 4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+ "discriminator_periods": [3, 5, 7, 11, 17, 23, 37],
17
+
18
+ "segment_size": 16384,
19
+ "num_mels": 128,
20
+ "num_freq": 1025,
21
+ "n_fft" : 2048,
22
+ "hop_size": 512,
23
+ "win_size": 2048,
24
+
25
+ "sampling_rate": 44100,
26
+
27
+ "fmin": 40,
28
+ "fmax": 16000,
29
+ "fmax_for_loss": null,
30
+
31
+ "num_workers": 16,
32
+
33
+ "dist_config": {
34
+ "dist_backend": "nccl",
35
+ "dist_url": "tcp://localhost:54321",
36
+ "world_size": 1
37
+ }
38
+ }
pretrain/nsf_hifigan/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
3
+ size 56825430
pretrain/nsf_hifigan/put_nsf_hifigan_ckpt_here ADDED
File without changes
pretrain/put_hubert_ckpt_here ADDED
File without changes