import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt import json import os import tempfile import shutil import requests from pathlib import Path ################################################### from utils.hparams import hparams from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe import numpy as np import matplotlib.pyplot as plt import IPython.display as ipd import utils import librosa import torchcrepe from infer import * import logging from infer_tools.infer_tool import * import io spk_dict = { "雷电将军": {"model_name": './models/genshin/raiden.ckpt', "config_name": './models/genshin/config.yaml'} } print(spk_dict) project_name = "Unnamed" model_path = spk_dict['雷电将军']['model_name'] config_path= spk_dict['雷电将军']['config_name'] hubert_gpu = False svc_model = Svc(project_name, config_path, hubert_gpu, model_path) print(svc_model) def vc_fn(sid, audio_record, audio_upload, tran, pndm_speedup=20): print(sid) if audio_upload is not None: audio_path = audio_upload elif audio_record is not None: audio_path = audio_record else: return "你需要上传wav文件或使用网页内置的录音!", None tran = int(tran) pndm_speedup = int(pndm_speedup) print('model loaded') # demoaudio, sr = librosa.load(audio_path) key = tran # 音高调整,支持正负(半音) # 加速倍数 wav_gen='./output.wav' # Show the spinner and run the run_clip function inside the 'with' block f0_tst, f0_pred, audio = run_clip(svc_model, file_path=audio_path, key=key, acc=pndm_speedup, use_crepe=True, use_pe=True, thre=0.1, use_gt_mel=False, add_noise_step=500, project_name=project_name, out_path=wav_gen) audio, sr = librosa.load(wav_gen) f0_gen,_=get_pitch_parselmouth(*svc_model.vocoder.wav2spec(wav_gen),hparams) f0_tst[f0_tst==0]=np.nan#ground truth f0 f0_pred[f0_pred==0]=np.nan#f0 pe predicted f0_gen[f0_gen==0]=np.nan#f0 generated fig=plt.figure(figsize=[15,5]) plt.plot(np.arange(0,len(f0_tst)),f0_tst,color='black',label="f0_tst") plt.plot(np.arange(0,len(f0_pred)),f0_pred,color='orange',label="f0_pred") plt.plot(np.arange(0,len(f0_gen)),f0_gen,color='red',label="f0_gen") plt.axhline(librosa.note_to_hz('C4'),ls=":",c="blue",label="C4") plt.axhline(librosa.note_to_hz('G4'),ls=":",c="green",label="G4") plt.axhline(librosa.note_to_hz('C5'),ls=":",c="orange",label="C5") plt.axhline(librosa.note_to_hz('F#5'),ls=":",c="red",label="F#5") #plt.axhline(librosa.note_to_hz('A#5'),ls=":",c="black",label="分割线") plt.legend() plt.savefig('./temp.png') return "Success", (sr, audio), gr.Image.update("temp.png") # hparams['audio_sample_rate'] app = gr.Blocks() with app: with gr.Tabs(): with gr.TabItem("Basic"): gr.Markdown(value=""" 本模型基于diffsvc训练,使用雷电将军语音数据,模型与数据都来源于[Erythrocyte/Diff-SVC_Genshin_Datasets](https://huggingface.co/datasets/Erythrocyte/Diff-SVC_Genshin_Datasets), 本页面支持**60s以内**的**无伴奏**wav格式,或使用**网页内置**的录音(二选一) 转换效果取决于源音频语气、节奏是否与目标音色相近,以及音域是否超出目标音色音域范围 """) speaker_id = gr.Dropdown(label="音色", choices=['雷电将军'], value="雷电将军") record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs") upload_input = gr.Audio(source="upload", label="上传音频(长度小于60秒)", type="filepath", elem_id="audio_inputs") vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0) vc_speedup = gr.Number(label="加速倍数", value=20) vc_submit = gr.Button("转换", variant="primary") out_audio = gr.Audio(label="Output Audio") gr.Markdown(value=""" 无用信息 """) out_message = gr.Textbox(label="Output") gr.Markdown(value="""f0曲线可以直观的显示跑调情况: """) f0_image = gr.Image(label="f0曲线") vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform, vc_speedup], [out_message, out_audio, f0_image]) app.launch()