Spaces:
Sleeping
Sleeping
aoxiang1221
commited on
Commit
•
9912004
1
Parent(s):
85ce65e
update
Browse files- __pycache__/compress_model.cpython-310.pyc +0 -0
- __pycache__/models.cpython-310.pyc +0 -0
- __pycache__/utils.cpython-310.pyc +0 -0
- cluster/__pycache__/__init__.cpython-310.pyc +0 -0
- configs/config.json +0 -0
- dataset_raw/wav_structure.txt +20 -0
- diffusion/__pycache__/__init__.cpython-310.pyc +0 -0
- diffusion/__pycache__/diffusion.cpython-310.pyc +0 -0
- diffusion/__pycache__/unit2mel.cpython-310.pyc +0 -0
- diffusion/__pycache__/vocoder.cpython-310.pyc +0 -0
- diffusion/__pycache__/wavenet.cpython-310.pyc +0 -0
- edgetts/__pycache__/tts_voices.cpython-310.pyc +0 -0
- filelists/test.txt +4 -0
- filelists/train.txt +15 -0
- filelists/val.txt +4 -0
- inference/__pycache__/__init__.cpython-310.pyc +0 -0
- inference/__pycache__/infer_tool.cpython-310.pyc +0 -0
- inference/__pycache__/slicer.cpython-310.pyc +0 -0
- logs/44k/diffusion/put_diffusion_pretrained_model_here +0 -0
- logs/44k/put_pretrained_model_here +0 -0
- modules/F0Predictor/__pycache__/F0Predictor.cpython-310.pyc +0 -0
- modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-310.pyc +0 -0
- modules/F0Predictor/__pycache__/__init__.cpython-310.pyc +0 -0
- modules/__pycache__/DSConv.cpython-310.pyc +0 -0
- modules/__pycache__/__init__.cpython-310.pyc +0 -0
- modules/__pycache__/attentions.cpython-310.pyc +0 -0
- modules/__pycache__/commons.cpython-310.pyc +0 -0
- modules/__pycache__/modules.cpython-310.pyc +0 -0
- pretrain/__init__.py +0 -0
- pretrain/checkpoint_best_legacy_500.pt +3 -0
- pretrain/meta.py +39 -0
- pretrain/nsf_hifigan/put_nsf_hifigan_ckpt_here +0 -0
- pretrain/put_hubert_ckpt_here +0 -0
- raw/put_raw_wav_here +0 -0
- trained/nahida/nahida.json +96 -0
- trained/nahida/nahida_G_40000.pth +3 -0
- vdecoder/__pycache__/__init__.cpython-310.pyc +0 -0
- vdecoder/hifigan/__pycache__/env.cpython-310.pyc +0 -0
- vdecoder/hifigan/__pycache__/models.cpython-310.pyc +0 -0
- vdecoder/hifigan/__pycache__/utils.cpython-310.pyc +0 -0
- vdecoder/nsf_hifigan/__pycache__/env.cpython-310.pyc +0 -0
- vdecoder/nsf_hifigan/__pycache__/models.cpython-310.pyc +0 -0
- vdecoder/nsf_hifigan/__pycache__/nvSTFT.cpython-310.pyc +0 -0
- vdecoder/nsf_hifigan/__pycache__/utils.cpython-310.pyc +0 -0
- vencoder/__pycache__/ContentVec256L9.cpython-310.pyc +0 -0
- vencoder/__pycache__/__init__.cpython-310.pyc +0 -0
- vencoder/__pycache__/encoder.cpython-310.pyc +0 -0
__pycache__/compress_model.cpython-310.pyc
ADDED
Binary file (2.22 kB). View file
|
|
__pycache__/models.cpython-310.pyc
ADDED
Binary file (14.7 kB). View file
|
|
__pycache__/utils.cpython-310.pyc
ADDED
Binary file (20.5 kB). View file
|
|
cluster/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (1.06 kB). View file
|
|
configs/config.json
ADDED
File without changes
|
dataset_raw/wav_structure.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
数据集准备
|
2 |
+
|
3 |
+
raw
|
4 |
+
├───speaker0
|
5 |
+
│ ├───xxx1-xxx1.wav
|
6 |
+
│ ├───...
|
7 |
+
│ └───Lxx-0xx8.wav
|
8 |
+
└───speaker1
|
9 |
+
├───xx2-0xxx2.wav
|
10 |
+
├───...
|
11 |
+
└───xxx7-xxx007.wav
|
12 |
+
|
13 |
+
此外还需要编辑config.json
|
14 |
+
|
15 |
+
"n_speakers": 10
|
16 |
+
|
17 |
+
"spk":{
|
18 |
+
"speaker0": 0,
|
19 |
+
"speaker1": 1,
|
20 |
+
}
|
diffusion/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (132 Bytes). View file
|
|
diffusion/__pycache__/diffusion.cpython-310.pyc
ADDED
Binary file (11.3 kB). View file
|
|
diffusion/__pycache__/unit2mel.cpython-310.pyc
ADDED
Binary file (4.9 kB). View file
|
|
diffusion/__pycache__/vocoder.cpython-310.pyc
ADDED
Binary file (3.52 kB). View file
|
|
diffusion/__pycache__/wavenet.cpython-310.pyc
ADDED
Binary file (3.83 kB). View file
|
|
edgetts/__pycache__/tts_voices.cpython-310.pyc
ADDED
Binary file (11.3 kB). View file
|
|
filelists/test.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
./dataset/44k/taffy/000562.wav
|
2 |
+
./dataset/44k/nyaru/000011.wav
|
3 |
+
./dataset/44k/nyaru/000008.wav
|
4 |
+
./dataset/44k/taffy/000563.wav
|
filelists/train.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
./dataset/44k/taffy/000549.wav
|
2 |
+
./dataset/44k/nyaru/000004.wav
|
3 |
+
./dataset/44k/nyaru/000006.wav
|
4 |
+
./dataset/44k/taffy/000551.wav
|
5 |
+
./dataset/44k/nyaru/000009.wav
|
6 |
+
./dataset/44k/taffy/000561.wav
|
7 |
+
./dataset/44k/nyaru/000001.wav
|
8 |
+
./dataset/44k/taffy/000553.wav
|
9 |
+
./dataset/44k/nyaru/000002.wav
|
10 |
+
./dataset/44k/taffy/000560.wav
|
11 |
+
./dataset/44k/taffy/000557.wav
|
12 |
+
./dataset/44k/nyaru/000005.wav
|
13 |
+
./dataset/44k/taffy/000554.wav
|
14 |
+
./dataset/44k/taffy/000550.wav
|
15 |
+
./dataset/44k/taffy/000559.wav
|
filelists/val.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
./dataset/44k/nyaru/000003.wav
|
2 |
+
./dataset/44k/nyaru/000007.wav
|
3 |
+
./dataset/44k/taffy/000558.wav
|
4 |
+
./dataset/44k/taffy/000556.wav
|
inference/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (132 Bytes). View file
|
|
inference/__pycache__/infer_tool.cpython-310.pyc
ADDED
Binary file (15.4 kB). View file
|
|
inference/__pycache__/slicer.cpython-310.pyc
ADDED
Binary file (3.85 kB). View file
|
|
logs/44k/diffusion/put_diffusion_pretrained_model_here
ADDED
File without changes
|
logs/44k/put_pretrained_model_here
ADDED
File without changes
|
modules/F0Predictor/__pycache__/F0Predictor.cpython-310.pyc
ADDED
Binary file (838 Bytes). View file
|
|
modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-310.pyc
ADDED
Binary file (2.46 kB). View file
|
|
modules/F0Predictor/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (142 Bytes). View file
|
|
modules/__pycache__/DSConv.cpython-310.pyc
ADDED
Binary file (2.98 kB). View file
|
|
modules/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (130 Bytes). View file
|
|
modules/__pycache__/attentions.cpython-310.pyc
ADDED
Binary file (11.1 kB). View file
|
|
modules/__pycache__/commons.cpython-310.pyc
ADDED
Binary file (6.39 kB). View file
|
|
modules/__pycache__/modules.cpython-310.pyc
ADDED
Binary file (9.9 kB). View file
|
|
pretrain/__init__.py
ADDED
File without changes
|
pretrain/checkpoint_best_legacy_500.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f54b40fd2802423a5643779c4861af1e9ee9c1564dc9d32f54f20b5ffba7db96
|
3 |
+
size 189507909
|
pretrain/meta.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def download_dict():
|
2 |
+
return {
|
3 |
+
"vec768l12": {
|
4 |
+
"url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
|
5 |
+
"output": "./pretrain/checkpoint_best_legacy_500.pt"
|
6 |
+
},
|
7 |
+
"vec256l9": {
|
8 |
+
"url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
|
9 |
+
"output": "./pretrain/checkpoint_best_legacy_500.pt"
|
10 |
+
},
|
11 |
+
"hubertsoft": {
|
12 |
+
"url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
|
13 |
+
"output": "./pretrain/hubert-soft-0d54a1f4.pt"
|
14 |
+
},
|
15 |
+
"whisper-ppg-small": {
|
16 |
+
"url": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
|
17 |
+
"output": "./pretrain/small.pt"
|
18 |
+
},
|
19 |
+
"whisper-ppg": {
|
20 |
+
"url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
|
21 |
+
"output": "./pretrain/medium.pt"
|
22 |
+
},
|
23 |
+
"whisper-ppg-large": {
|
24 |
+
"url": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
|
25 |
+
"output": "./pretrain/large-v2.pt"
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
def get_speech_encoder(config_path="configs/config.json"):
|
31 |
+
import json
|
32 |
+
|
33 |
+
with open(config_path, "r") as f:
|
34 |
+
data = f.read()
|
35 |
+
config = json.loads(data)
|
36 |
+
speech_encoder = config["model"]["speech_encoder"]
|
37 |
+
dict = download_dict()
|
38 |
+
|
39 |
+
return dict[speech_encoder]["url"], dict[speech_encoder]["output"]
|
pretrain/nsf_hifigan/put_nsf_hifigan_ckpt_here
ADDED
File without changes
|
pretrain/put_hubert_ckpt_here
ADDED
File without changes
|
raw/put_raw_wav_here
ADDED
File without changes
|
trained/nahida/nahida.json
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 800,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.00015,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 12,
|
14 |
+
"fp16_run": false,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 10240,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0,
|
21 |
+
"use_sr": true,
|
22 |
+
"max_speclen": 512,
|
23 |
+
"port": "8001",
|
24 |
+
"keep_ckpts": 3,
|
25 |
+
"all_in_mem": false
|
26 |
+
},
|
27 |
+
"data": {
|
28 |
+
"training_files": "filelists/train.txt",
|
29 |
+
"validation_files": "filelists/val.txt",
|
30 |
+
"max_wav_value": 32768.0,
|
31 |
+
"sampling_rate": 44100,
|
32 |
+
"filter_length": 2048,
|
33 |
+
"hop_length": 512,
|
34 |
+
"win_length": 2048,
|
35 |
+
"n_mel_channels": 80,
|
36 |
+
"mel_fmin": 0.0,
|
37 |
+
"mel_fmax": 22050
|
38 |
+
},
|
39 |
+
"model": {
|
40 |
+
"inter_channels": 192,
|
41 |
+
"hidden_channels": 192,
|
42 |
+
"filter_channels": 768,
|
43 |
+
"n_heads": 2,
|
44 |
+
"n_layers": 6,
|
45 |
+
"kernel_size": 3,
|
46 |
+
"p_dropout": 0.1,
|
47 |
+
"resblock": "1",
|
48 |
+
"resblock_kernel_sizes": [
|
49 |
+
3,
|
50 |
+
7,
|
51 |
+
11
|
52 |
+
],
|
53 |
+
"resblock_dilation_sizes": [
|
54 |
+
[
|
55 |
+
1,
|
56 |
+
3,
|
57 |
+
5
|
58 |
+
],
|
59 |
+
[
|
60 |
+
1,
|
61 |
+
3,
|
62 |
+
5
|
63 |
+
],
|
64 |
+
[
|
65 |
+
1,
|
66 |
+
3,
|
67 |
+
5
|
68 |
+
]
|
69 |
+
],
|
70 |
+
"upsample_rates": [
|
71 |
+
8,
|
72 |
+
8,
|
73 |
+
2,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4,
|
83 |
+
4
|
84 |
+
],
|
85 |
+
"n_layers_q": 3,
|
86 |
+
"use_spectral_norm": false,
|
87 |
+
"gin_channels": 256,
|
88 |
+
"ssl_dim": 256,
|
89 |
+
"n_speakers": 1,
|
90 |
+
"speech_encoder": "vec256l9",
|
91 |
+
"speaker_embedding": false
|
92 |
+
},
|
93 |
+
"spk": {
|
94 |
+
"nahida": 0
|
95 |
+
}
|
96 |
+
}
|
trained/nahida/nahida_G_40000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fec9506f8a762516e5321bcaedc72b6891d61a631e82ac596fe4e16e45b4652a
|
3 |
+
size 542178077
|
vdecoder/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (131 Bytes). View file
|
|
vdecoder/hifigan/__pycache__/env.cpython-310.pyc
ADDED
Binary file (804 Bytes). View file
|
|
vdecoder/hifigan/__pycache__/models.cpython-310.pyc
ADDED
Binary file (16.3 kB). View file
|
|
vdecoder/hifigan/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (2.3 kB). View file
|
|
vdecoder/nsf_hifigan/__pycache__/env.cpython-310.pyc
ADDED
Binary file (808 Bytes). View file
|
|
vdecoder/nsf_hifigan/__pycache__/models.cpython-310.pyc
ADDED
Binary file (14.2 kB). View file
|
|
vdecoder/nsf_hifigan/__pycache__/nvSTFT.cpython-310.pyc
ADDED
Binary file (4.26 kB). View file
|
|
vdecoder/nsf_hifigan/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (2.35 kB). View file
|
|
vencoder/__pycache__/ContentVec256L9.cpython-310.pyc
ADDED
Binary file (1.55 kB). View file
|
|
vencoder/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (131 Bytes). View file
|
|
vencoder/__pycache__/encoder.cpython-310.pyc
ADDED
Binary file (757 Bytes). View file
|
|