import gradio as gr import soundfile import torch import infer_tool convert_cnt = [0] dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_name = "83_epochs.pth" config_name = "nyarumul.json" net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}") # 获取config参数 target_sample = hps_ms.data.sampling_rate spk_dict = { "猫雷2.0": 0, "云灏": 2, "即霜": 3, "奕兰秋": 4 } def vc_fn(sid, audio_record, audio_upload, tran): print(sid) if audio_upload is not None: audio_path = audio_upload elif audio_record is not None: audio_path = audio_record else: return "你需要上传wav文件或使用网页内置的录音!", None audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample) duration = audio.shape[0] / sampling_rate if duration > 60: return "请上传小于60s的音频,需要转换长音频请使用colab", None o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input) out_path = f"./out_temp.wav" soundfile.write(out_path, o_audio, target_sample) infer_tool.f0_plt(audio_path, out_path, tran, hubert_soft, feature_input) mistake, var = infer_tool.calc_error(audio_path, out_path, tran, feature_input) return f"半音偏差:{mistake}\n半音方差:{var}", ( target_sample, o_audio), gr.Image.update("temp.jpg") app = gr.Blocks() with app: with gr.Tabs(): with gr.TabItem("Basic"): gr.Markdown(value=""" 本模型为sovits_f0(含AI猫雷2.0音色),支持**60s以内**的**无伴奏**wav、mp3(单声道)格式,或使用**网页内置**的录音(二选一) 转换效果取决于源音频语气、节奏是否与目标音色相近,以及音域是否超出目标音色音域范围 猫雷音色低音音域效果不佳,如转换男声歌声,建议变调升 **6-10key** 该模型的 [github仓库链接](https://github.com/innnky/so-vits-svc),如果想自己制作并训练模型可以访问这个 [github仓库](https://github.com/IceKyrin/sovits_guide) """) speaker_id = gr.Dropdown(label="音色", choices=['猫雷2.0', '云灏', '即霜', "奕兰秋"], value="猫雷2.0") record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs") upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath", elem_id="audio_inputs") vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0) vc_submit = gr.Button("转换", variant="primary") out_audio = gr.Audio(label="Output Audio") gr.Markdown(value=""" 输出信息为音高平均偏差半音数量,体现转换音频的跑调情况(一般平均小于0.5个半音) f0曲线可以直观的显示跑调情况,蓝色为输入音高,橙色为合成音频的音高 若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好 """) out_message = gr.Textbox(label="跑调误差信息") gr.Markdown(value="""f0曲线可以直观的显示跑调情况,蓝色为输入音高,橙色为合成音频的音高 若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好 """) f0_image = gr.Image(label="f0曲线") vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform], [out_message, out_audio, f0_image]) with gr.TabItem("使用说明"): gr.Markdown(value=""" 0、合集:https://github.com/IceKyrin/sovits_guide/blob/main/README.md 1、仅支持sovit_f0(sovits2.0)模型 2、自行下载hubert-soft-0d54a1f4.pt改名为hubert.pt放置于pth文件夹下(已经下好了) https://github.com/bshall/hubert/releases/tag/v0.1 3、pth文件夹下放置sovits2.0的模型 4、与模型配套的xxx.json,需有speaker项——人物列表 5、放无伴奏的音频、或网页内置录音,不要放奇奇怪怪的格式 6、仅供交流使用,不对用户行为负责 """) app.launch()