import os import gradio as gr import soundfile import torch import utils import infer_tool from sovits import ROOT_PATH class SovitsInferencer: def __init__(self, hps_path, device="cpu"): print("init") self.device = torch.device(device) self.hps = utils.get_hparams_from_file(hps_path) self.model_path = self.get_latest_model_path() self.svc = infer_tool.Svc(self.model_path, hps_path, device=device) def get_latest_model_path(self): model_dir_path = os.path.join(ROOT_PATH, "models") return utils.latest_checkpoint_path(model_dir_path, "G_*.pth") def infer(self, audio_record, audio_upload, tran): if audio_upload is not None: audio_path = audio_upload elif audio_record is not None: audio_path = audio_record else: return "你需要上传wav文件或使用网页内置的录音!", None audio, sampling_rate = self.svc.format_wav(audio_path) duration = audio.shape[1] / sampling_rate if duration > 60: return "请上传小于60s的音频,需要转换长音频请使用colab", None o_audio, out_sr = self.svc.infer(0, tran, audio_path) out_path = f"./out_temp.wav" soundfile.write(out_path, o_audio.cpu(), self.svc.target_sample) mistake, var = self.svc.calc_error(audio_path, out_path, tran) return f"半音偏差:{mistake}\n半音方差:{var}", (self.hps.data.sampling_rate, o_audio.cpu().numpy()) def render(self): gr.Markdown(""" 未完成,效果有待提升\n 该模型适合**歌声**的声线转换,目前仅支持**45s以内**、**无伴奏**、**单声道**的**wav或mp3格式**文件 """) record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs") upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath", elem_id="audio_inputs") # vc_speaker = gr.Number(label="Speaker", value=0) vc_transform = gr.Number(label="升降半音(整数,可以正负,半音数量,升高八度就是12)", value=0) vc_submit = gr.Button("转换", variant="primary") out_message = gr.Textbox(label="Output Message") out_audio = gr.Audio(label="Output Audio") # vc_submit.click(self.infer, [vc_speaker, record_input, upload_input, vc_transform], [out_message, out_audio]) vc_submit.click(self.infer, [record_input, upload_input, vc_transform], [out_message, out_audio])