vits-hoshimi / sovits /sovits_inferencer.py
candlend
fix deploy env
799ff6a
raw
history blame
2.66 kB
import os
import gradio as gr
import soundfile
import torch
import utils
import infer_tool
from sovits import ROOT_PATH
class SovitsInferencer:
def __init__(self, hps_path, device="cpu"):
print("init")
self.device = torch.device(device)
self.hps = utils.get_hparams_from_file(hps_path)
self.model_path = self.get_latest_model_path()
self.svc = infer_tool.Svc(self.model_path, hps_path, device=device)
def get_latest_model_path(self):
model_dir_path = os.path.join(ROOT_PATH, "models")
return utils.latest_checkpoint_path(model_dir_path, "G_*.pth")
def infer(self, audio_record, audio_upload, tran):
if audio_upload is not None:
audio_path = audio_upload
elif audio_record is not None:
audio_path = audio_record
else:
return "你需要上传wav文件或使用网页内置的录音!", None
audio, sampling_rate = self.svc.format_wav(audio_path)
duration = audio.shape[1] / sampling_rate
if duration > 60:
return "请上传小于60s的音频,需要转换长音频请使用colab", None
o_audio, out_sr = self.svc.infer(0, tran, audio_path)
out_path = f"./out_temp.wav"
soundfile.write(out_path, o_audio.cpu(), self.svc.target_sample)
mistake, var = self.svc.calc_error(audio_path, out_path, tran)
return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.cpu().numpy())
def render(self):
gr.Markdown("""
未完成,效果有待提升\n
该模型适合**歌声**的声线转换,目前仅支持**45s以内**、**无伴奏**、**单声道**的**wav或mp3格式**文件
""")
record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
elem_id="audio_inputs")
# vc_speaker = gr.Number(label="Speaker", value=0)
vc_transform = gr.Number(label="升降半音(整数,可以正负,半音数量,升高八度就是12)", value=0)
vc_submit = gr.Button("转换", variant="primary")
out_message = gr.Textbox(label="Output Message")
out_audio = gr.Audio(label="Output Audio")
# vc_submit.click(self.infer, [vc_speaker, record_input, upload_input, vc_transform], [out_message, out_audio])
vc_submit.click(self.infer, [record_input, upload_input, vc_transform], [out_message, out_audio])