Spaces:
Runtime error
Runtime error
candlend
commited on
Commit
•
799ff6a
1
Parent(s):
c4c115b
fix deploy env
Browse files- app.py +6 -4
- requirements.txt +3 -2
- sovits/hubert_model.py +2 -2
- sovits/sovits_inferencer.py +3 -3
- vits/vits_inferencer.py +1 -1
app.py
CHANGED
@@ -1,16 +1,18 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from vits.vits_inferencer import VitsInferencer
|
3 |
from sovits.sovits_inferencer import SovitsInferencer
|
4 |
|
5 |
app = gr.Blocks()
|
6 |
with app:
|
7 |
-
with open("header.html", "r") as f:
|
8 |
gr.HTML(f.read())
|
9 |
with gr.Tabs():
|
10 |
with gr.TabItem("语音合成"):
|
11 |
-
vits_inferencer = VitsInferencer("vits/configs/hoshimi_base.json")
|
12 |
vits_inferencer.render()
|
13 |
with gr.TabItem("声线转换(开发中)"):
|
14 |
-
sovits_inferencer = SovitsInferencer("sovits/configs/hoshimi_base.json")
|
15 |
sovits_inferencer.render()
|
16 |
-
app.launch()
|
|
|
1 |
+
#encoding=utf-8
|
2 |
+
from base64 import encode
|
3 |
import gradio as gr
|
4 |
from vits.vits_inferencer import VitsInferencer
|
5 |
from sovits.sovits_inferencer import SovitsInferencer
|
6 |
|
7 |
app = gr.Blocks()
|
8 |
with app:
|
9 |
+
with open("header.html", "r", encoding="utf-8") as f:
|
10 |
gr.HTML(f.read())
|
11 |
with gr.Tabs():
|
12 |
with gr.TabItem("语音合成"):
|
13 |
+
vits_inferencer = VitsInferencer("vits/configs/hoshimi_base.json", device="cuda")
|
14 |
vits_inferencer.render()
|
15 |
with gr.TabItem("声线转换(开发中)"):
|
16 |
+
sovits_inferencer = SovitsInferencer("sovits/configs/hoshimi_base.json", device="cuda")
|
17 |
sovits_inferencer.render()
|
18 |
+
app.launch(server_name='0.0.0.0')
|
requirements.txt
CHANGED
@@ -15,10 +15,11 @@ ko-pron==1.3
|
|
15 |
inflect==6.0.0
|
16 |
eng-to-ipa==0.0.2
|
17 |
num-thai==0.0.5
|
18 |
-
opencc==1.1.
|
19 |
scikit-maad
|
20 |
torch
|
21 |
torchvision
|
22 |
torchaudio
|
23 |
numpy
|
24 |
-
pyworld
|
|
|
|
15 |
inflect==6.0.0
|
16 |
eng-to-ipa==0.0.2
|
17 |
num-thai==0.0.5
|
18 |
+
opencc==1.1.1
|
19 |
scikit-maad
|
20 |
torch
|
21 |
torchvision
|
22 |
torchaudio
|
23 |
numpy
|
24 |
+
pyworld
|
25 |
+
gradio
|
sovits/hubert_model.py
CHANGED
@@ -214,8 +214,8 @@ def hubert_soft(
|
|
214 |
Args:
|
215 |
path (str): path of a pretrained model
|
216 |
"""
|
217 |
-
|
218 |
-
dev = torch.device("cpu")
|
219 |
hubert = HubertSoft()
|
220 |
checkpoint = torch.load(path)
|
221 |
consume_prefix_in_state_dict_if_present(checkpoint, "module.")
|
|
|
214 |
Args:
|
215 |
path (str): path of a pretrained model
|
216 |
"""
|
217 |
+
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
218 |
+
# dev = torch.device("cpu")
|
219 |
hubert = HubertSoft()
|
220 |
checkpoint = torch.load(path)
|
221 |
consume_prefix_in_state_dict_if_present(checkpoint, "module.")
|
sovits/sovits_inferencer.py
CHANGED
@@ -13,7 +13,7 @@ class SovitsInferencer:
|
|
13 |
self.device = torch.device(device)
|
14 |
self.hps = utils.get_hparams_from_file(hps_path)
|
15 |
self.model_path = self.get_latest_model_path()
|
16 |
-
self.svc = infer_tool.Svc(self.model_path, hps_path)
|
17 |
|
18 |
def get_latest_model_path(self):
|
19 |
model_dir_path = os.path.join(ROOT_PATH, "models")
|
@@ -34,9 +34,9 @@ class SovitsInferencer:
|
|
34 |
|
35 |
o_audio, out_sr = self.svc.infer(0, tran, audio_path)
|
36 |
out_path = f"./out_temp.wav"
|
37 |
-
soundfile.write(out_path, o_audio, self.svc.target_sample)
|
38 |
mistake, var = self.svc.calc_error(audio_path, out_path, tran)
|
39 |
-
return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.numpy())
|
40 |
|
41 |
def render(self):
|
42 |
gr.Markdown("""
|
|
|
13 |
self.device = torch.device(device)
|
14 |
self.hps = utils.get_hparams_from_file(hps_path)
|
15 |
self.model_path = self.get_latest_model_path()
|
16 |
+
self.svc = infer_tool.Svc(self.model_path, hps_path, device=device)
|
17 |
|
18 |
def get_latest_model_path(self):
|
19 |
model_dir_path = os.path.join(ROOT_PATH, "models")
|
|
|
34 |
|
35 |
o_audio, out_sr = self.svc.infer(0, tran, audio_path)
|
36 |
out_path = f"./out_temp.wav"
|
37 |
+
soundfile.write(out_path, o_audio.cpu(), self.svc.target_sample)
|
38 |
mistake, var = self.svc.calc_error(audio_path, out_path, tran)
|
39 |
+
return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.cpu().numpy())
|
40 |
|
41 |
def render(self):
|
42 |
gr.Markdown("""
|
vits/vits_inferencer.py
CHANGED
@@ -51,7 +51,7 @@ class VitsInferencer:
|
|
51 |
with torch.no_grad():
|
52 |
x_tst = stn_tst.unsqueeze(0).to(self.device)
|
53 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(self.device)
|
54 |
-
audio = self.models[mode].infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.float().numpy()
|
55 |
return (self.hps.data.sampling_rate, audio)
|
56 |
|
57 |
def change_mode(self, mode):
|
|
|
51 |
with torch.no_grad():
|
52 |
x_tst = stn_tst.unsqueeze(0).to(self.device)
|
53 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(self.device)
|
54 |
+
audio = self.models[mode].infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.float().cpu().numpy()
|
55 |
return (self.hps.data.sampling_rate, audio)
|
56 |
|
57 |
def change_mode(self, mode):
|