Fushimi / app.py
chilge's picture
Update app.py
398192e
raw
history blame contribute delete
No virus
2.38 kB
import io
import gradio as gr
import librosa
import numpy as np
import soundfile
import torch
from inference.infer_tool import Svc
import logging
logging.getLogger('numba').setLevel(logging.WARNING)
model_name = "logs/32k/gx.pth"
config_name = "configs/config.json"
svc_model = Svc(model_name, config_name)
sid_map = {
"伏见弓弦": "fushimi"
}
def vc_fn(sid, input_audio, vc_transform):
if input_audio is None:
return "You need to upload an audio", None
sampling_rate, audio = input_audio
# print(audio.shape,sampling_rate)
duration = audio.shape[0] / sampling_rate
if duration > 45:
return "请上传小于45s的音频,需要转换长音频请本地进行转换", None
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
print(audio.shape)
out_wav_path = io.BytesIO()
soundfile.write(out_wav_path, audio, 16000, format="wav")
out_wav_path.seek(0)
sid = sid_map[sid]
out_audio, out_sr = svc_model.infer(sid, vc_transform, out_wav_path)
_audio = out_audio.cpu().numpy()
return "Success", (32000, _audio)
app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("Basic"):
gr.Markdown(value="""
i7000如果要在本地使用该demo,请使用git lfs clone 该仓库,安装requirements.txt后运行app.py即可
项目改写基于 https://huggingface.co/spaces/innnky/nyaru-svc-3.0
本地合成可以删除26、27两行代码以解除合成45s长度限制""")
sid = gr.Dropdown(label="音色", choices=["伏见弓弦"], value="fushimi")
vc_input3 = gr.Audio(label="上传音频(长度小于45秒)")
vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
vc_submit = gr.Button("转换", variant="primary")
vc_output1 = gr.Textbox(label="Output Message")
vc_output2 = gr.Audio(label="Output Audio")
vc_submit.click(vc_fn, [sid, vc_input3, vc_transform], [vc_output1, vc_output2])
app.launch()