candlend commited on
Commit
51a465c
1 Parent(s): b8d387b

vc_transform desc

Browse files
app.py CHANGED
@@ -15,9 +15,7 @@ with app:
15
  gr.HTML(f.read())
16
  with gr.Tabs():
17
  with gr.TabItem("语音合成"):
18
- vits_inferencer = VitsInferencer("vits/configs/hoshimi_base.json")
19
  vits_inferencer.render()
20
- with gr.TabItem("声线转换(开发中)"):
21
- sovits_inferencer = SovitsInferencer("sovits/configs/hoshimi_base.json")
22
  sovits_inferencer.render()
23
  app.launch(server_name='0.0.0.0')
 
15
  gr.HTML(f.read())
16
  with gr.Tabs():
17
  with gr.TabItem("语音合成"):
 
18
  vits_inferencer.render()
19
+ with gr.TabItem("声线转换"):
 
20
  sovits_inferencer.render()
21
  app.launch(server_name='0.0.0.0')
sovits/inference/infer_tool.py CHANGED
@@ -145,12 +145,11 @@ def mkdir(paths: list):
145
 
146
  class Svc(object):
147
  def __init__(self, net_g_path, config_path, hubert_path="hubert/hubert-soft-0d54a1f4.pt",
148
- onnx=False):
149
  self.onnx = onnx
150
  self.net_g_path = net_g_path
151
  self.hubert_path = hubert_path
152
- # self.dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
153
- self.dev = torch.device("cpu")
154
  self.net_g_ms = None
155
  self.hps_ms = utils.get_hparams_from_file(config_path)
156
  self.target_sample = self.hps_ms.data.sampling_rate
@@ -160,9 +159,7 @@ class Svc(object):
160
  self.speakers[sid] = spk
161
  self.spk2id = self.hps_ms.spk
162
  # 加载hubert
163
- self.hubert_soft = hubert_model.hubert_soft(hubert_path)
164
- # if torch.cuda.is_available():
165
- # self.hubert_soft = self.hubert_soft.cuda()
166
  self.load_model()
167
 
168
  def load_model(self):
 
145
 
146
  class Svc(object):
147
  def __init__(self, net_g_path, config_path, hubert_path="hubert/hubert-soft-0d54a1f4.pt",
148
+ onnx=False, device="cpu"):
149
  self.onnx = onnx
150
  self.net_g_path = net_g_path
151
  self.hubert_path = hubert_path
152
+ self.dev = torch.device(device)
 
153
  self.net_g_ms = None
154
  self.hps_ms = utils.get_hparams_from_file(config_path)
155
  self.target_sample = self.hps_ms.data.sampling_rate
 
159
  self.speakers[sid] = spk
160
  self.spk2id = self.hps_ms.spk
161
  # 加载hubert
162
+ self.hubert_soft = hubert_model.hubert_soft(hubert_path).to(device)
 
 
163
  self.load_model()
164
 
165
  def load_model(self):
sovits/sovits_inferencer.py CHANGED
@@ -16,7 +16,7 @@ class SovitsInferencer:
16
  self.device = torch.device(device)
17
  self.hps = utils.get_hparams_from_file(hps_path)
18
  self.model_path = self.get_latest_model_path()
19
- self.svc = infer_tool.Svc(self.model_path, hps_path, "sovits/hubert/hubert-soft-0d54a1f4.pt", device=device)
20
 
21
  def get_latest_model_path(self):
22
  model_dir_path = os.path.join(SOVITS_ROOT_PATH, "models")
@@ -54,9 +54,8 @@ class SovitsInferencer:
54
  _audio = out_audio.cpu().numpy()
55
  audio.extend(list(_audio))
56
  out_path = f"./out_temp.wav"
57
- soundfile.write(out_path, o_audio, self.svc.target_sample)
58
- mistake, var = self.svc.calc_error(audio_path, out_path, tran)
59
- return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.numpy())
60
 
61
  def render(self):
62
  gr.Markdown("""
@@ -66,7 +65,7 @@ class SovitsInferencer:
66
  record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
67
  upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
68
  elem_id="audio_inputs")
69
- vc_transform = gr.Number(label="升降半音(整数,可以正负,半音数量,升高八度就是12,若原声是男声可以设为9左右)", value=0)
70
  slice_db = gr.Number(label="过滤分贝(默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50)", value=-40)
71
  vc_submit = gr.Button("转换", variant="primary")
72
  out_message = gr.Textbox(label="Output Message")
 
16
  self.device = torch.device(device)
17
  self.hps = utils.get_hparams_from_file(hps_path)
18
  self.model_path = self.get_latest_model_path()
19
+ self.svc = infer_tool.Svc(self.model_path, hps_path, "sovits/hubert/hubert-soft-0d54a1f4.pt")
20
 
21
  def get_latest_model_path(self):
22
  model_dir_path = os.path.join(SOVITS_ROOT_PATH, "models")
 
54
  _audio = out_audio.cpu().numpy()
55
  audio.extend(list(_audio))
56
  out_path = f"./out_temp.wav"
57
+ soundfile.write(out_path, audio, self.svc.target_sample)
58
+ return "转换成功", (self.hps.data.sampling_rate, np.array(audio, dtype=np.float32))
 
59
 
60
  def render(self):
61
  gr.Markdown("""
 
65
  record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
66
  upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
67
  elem_id="audio_inputs")
68
+ vc_transform = gr.Number(label="升降半音(整数,可以正负,半音数量,升高八度就是12,若原声是男声需要设大点)", value=0)
69
  slice_db = gr.Number(label="过滤分贝(默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50)", value=-40)
70
  vc_submit = gr.Button("转换", variant="primary")
71
  out_message = gr.Textbox(label="Output Message")