candlend commited on
Commit
799ff6a
1 Parent(s): c4c115b

fix deploy env

Browse files
app.py CHANGED
@@ -1,16 +1,18 @@
 
 
1
  import gradio as gr
2
  from vits.vits_inferencer import VitsInferencer
3
  from sovits.sovits_inferencer import SovitsInferencer
4
 
5
  app = gr.Blocks()
6
  with app:
7
- with open("header.html", "r") as f:
8
  gr.HTML(f.read())
9
  with gr.Tabs():
10
  with gr.TabItem("语音合成"):
11
- vits_inferencer = VitsInferencer("vits/configs/hoshimi_base.json")
12
  vits_inferencer.render()
13
  with gr.TabItem("声线转换(开发中)"):
14
- sovits_inferencer = SovitsInferencer("sovits/configs/hoshimi_base.json")
15
  sovits_inferencer.render()
16
- app.launch()
 
1
+ #encoding=utf-8
2
+ from base64 import encode
3
  import gradio as gr
4
  from vits.vits_inferencer import VitsInferencer
5
  from sovits.sovits_inferencer import SovitsInferencer
6
 
7
  app = gr.Blocks()
8
  with app:
9
+ with open("header.html", "r", encoding="utf-8") as f:
10
  gr.HTML(f.read())
11
  with gr.Tabs():
12
  with gr.TabItem("语音合成"):
13
+ vits_inferencer = VitsInferencer("vits/configs/hoshimi_base.json", device="cuda")
14
  vits_inferencer.render()
15
  with gr.TabItem("声线转换(开发中)"):
16
+ sovits_inferencer = SovitsInferencer("sovits/configs/hoshimi_base.json", device="cuda")
17
  sovits_inferencer.render()
18
+ app.launch(server_name='0.0.0.0')
requirements.txt CHANGED
@@ -15,10 +15,11 @@ ko-pron==1.3
15
  inflect==6.0.0
16
  eng-to-ipa==0.0.2
17
  num-thai==0.0.5
18
- opencc==1.1.4
19
  scikit-maad
20
  torch
21
  torchvision
22
  torchaudio
23
  numpy
24
- pyworld
 
 
15
  inflect==6.0.0
16
  eng-to-ipa==0.0.2
17
  num-thai==0.0.5
18
+ opencc==1.1.1
19
  scikit-maad
20
  torch
21
  torchvision
22
  torchaudio
23
  numpy
24
+ pyworld
25
+ gradio
sovits/hubert_model.py CHANGED
@@ -214,8 +214,8 @@ def hubert_soft(
214
  Args:
215
  path (str): path of a pretrained model
216
  """
217
- # dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
218
- dev = torch.device("cpu")
219
  hubert = HubertSoft()
220
  checkpoint = torch.load(path)
221
  consume_prefix_in_state_dict_if_present(checkpoint, "module.")
 
214
  Args:
215
  path (str): path of a pretrained model
216
  """
217
+ dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
218
+ # dev = torch.device("cpu")
219
  hubert = HubertSoft()
220
  checkpoint = torch.load(path)
221
  consume_prefix_in_state_dict_if_present(checkpoint, "module.")
sovits/sovits_inferencer.py CHANGED
@@ -13,7 +13,7 @@ class SovitsInferencer:
13
  self.device = torch.device(device)
14
  self.hps = utils.get_hparams_from_file(hps_path)
15
  self.model_path = self.get_latest_model_path()
16
- self.svc = infer_tool.Svc(self.model_path, hps_path)
17
 
18
  def get_latest_model_path(self):
19
  model_dir_path = os.path.join(ROOT_PATH, "models")
@@ -34,9 +34,9 @@ class SovitsInferencer:
34
 
35
  o_audio, out_sr = self.svc.infer(0, tran, audio_path)
36
  out_path = f"./out_temp.wav"
37
- soundfile.write(out_path, o_audio, self.svc.target_sample)
38
  mistake, var = self.svc.calc_error(audio_path, out_path, tran)
39
- return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.numpy())
40
 
41
  def render(self):
42
  gr.Markdown("""
 
13
  self.device = torch.device(device)
14
  self.hps = utils.get_hparams_from_file(hps_path)
15
  self.model_path = self.get_latest_model_path()
16
+ self.svc = infer_tool.Svc(self.model_path, hps_path, device=device)
17
 
18
  def get_latest_model_path(self):
19
  model_dir_path = os.path.join(ROOT_PATH, "models")
 
34
 
35
  o_audio, out_sr = self.svc.infer(0, tran, audio_path)
36
  out_path = f"./out_temp.wav"
37
+ soundfile.write(out_path, o_audio.cpu(), self.svc.target_sample)
38
  mistake, var = self.svc.calc_error(audio_path, out_path, tran)
39
+ return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.cpu().numpy())
40
 
41
  def render(self):
42
  gr.Markdown("""
vits/vits_inferencer.py CHANGED
@@ -51,7 +51,7 @@ class VitsInferencer:
51
  with torch.no_grad():
52
  x_tst = stn_tst.unsqueeze(0).to(self.device)
53
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(self.device)
54
- audio = self.models[mode].infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.float().numpy()
55
  return (self.hps.data.sampling_rate, audio)
56
 
57
  def change_mode(self, mode):
 
51
  with torch.no_grad():
52
  x_tst = stn_tst.unsqueeze(0).to(self.device)
53
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(self.device)
54
+ audio = self.models[mode].infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.float().cpu().numpy()
55
  return (self.hps.data.sampling_rate, audio)
56
 
57
  def change_mode(self, mode):