kohrisatou-infinity's picture
Upload 49 files
6bb8521
raw
history blame contribute delete
No virus
2.71 kB
import io
import gradio as gr
import librosa
import numpy as np
import soundfile
import torch
from inference.infer_tool import Svc
import logging
logging.getLogger('numba').setLevel(logging.WARNING)
model_name = "logs/32k/G_220000.pth"
config_name = "configs/config.json"
svc_model = Svc(model_name, config_name)
sid_map = {
"KIP_01": "KIP_01"
}
def vc_fn(sid, input_audio, vc_transform):
if input_audio is None:
return "You need to upload an audio", None
sampling_rate, audio = input_audio
# print(audio.shape,sampling_rate)
duration = audio.shape[0] / sampling_rate
if duration > 45:
return "请上传小于 45s 的音频,如需要转换长音频,请在本地进行转换", None
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
print(audio.shape)
out_wav_path = io.BytesIO()
soundfile.write(out_wav_path, audio, 16000, format="wav")
out_wav_path.seek(0)
sid = sid_map[sid]
out_audio, out_sr = svc_model.infer(sid, vc_transform, out_wav_path)
_audio = out_audio.cpu().numpy()
return "Success", (32000, _audio)
app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("Basic"):
gr.Markdown(value="""
欢迎体验首个对外配布的 @冰糖IO SoVITS 3.0 模型
在使用此模型前请阅读 [Kohrisatou INFINITY PROTOTYPE-01 (beta) 使用协议] (https://huggingface.co/spaces/kohrisatou-infinity/KIP_01_beta/blob/main/terms.md)
冰糖IO @ bilibili:[点击关注](https://space.bilibili.com/198297)
如果要在本地使用该 Demo,请 git lfs clone 该仓库,安装 requirements.txt 后运行 app.py
项目改写基于 https://huggingface.co/spaces/innnky/nyaru-svc-3.0
本地合成可以删除 26、27 两行代码以解除合成 45s 长度限制""")
sid = gr.Dropdown(label="音色", choices=["KIP_01"], value="KIP_01")
vc_input3 = gr.Audio(label="上传音频(长度小于 45 秒)")
vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是 12)", value=0)
vc_submit = gr.Button("转换", variant="primary")
vc_output1 = gr.Textbox(label="Output Message")
vc_output2 = gr.Audio(label="Output Audio")
vc_submit.click(vc_fn, [sid, vc_input3, vc_transform], [vc_output1, vc_output2])
app.launch()