Spaces:
Sleeping
Sleeping
File size: 3,999 Bytes
b6912cc e97653e b6912cc fd90cb6 b6912cc 1bf2ff1 cd57111 b6912cc fd90cb6 b6912cc bad4855 b6912cc cd57111 b69732f 9a7bb7f b69732f 77e45ac b69732f cec2e08 b69732f b6912cc cec2e08 e692813 bad4855 b6912cc 272fda6 0ca786b b6912cc fd90cb6 5142fdd b6912cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import io
import os
# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt")
import gradio as gr
import librosa
import numpy as np
import soundfile
from inference.infer_tool import Svc
import logging
logging.getLogger('numba').setLevel(logging.WARNING)
logging.getLogger('markdown_it').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('matplotlib').setLevel(logging.WARNING)
config_path = "configs/config.json"
model = Svc("logs/44k/G_90400.pth", "configs/config.json", cluster_model_path="logs/44k/kmeans_10000.pt")
def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale):
if input_audio is None:
return "没有上传待处理的音频哦", None
sampling_rate, audio = input_audio
# print(audio.shape,sampling_rate)
duration = audio.shape[0] / sampling_rate
if duration > 10000000000000000000:
return "请上传小于100s的音频,需要转换长音频请本地进行转换", None
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
print(audio.shape)
out_wav_path = "temp.wav"
soundfile.write(out_wav_path, audio, 16000, format="wav")
print( cluster_ratio, auto_f0, noise_scale)
_audio = model.slice_inference(out_wav_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale)
return "转换完成", (44100, _audio)
app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("一个窗口awa"):
gr.Markdown(value="""
香风智乃sovits4.0 在线demo 小孩子不懂事做着玩的
备注:
1. 上传音频必须为`.mp3`或者`.wav`格式 `单声道` `44100采样率`。
2. 音频文件应`小于100s`转换大于100s可以在AU/AudioLab中切片逐一上传。
3. 使用男性音频可以考虑使用 升降调+4或+6/开启f0预测,使用女性音频可以不做调整。
4. 在线版服务器为2核16G免费版,转换效率较慢请耐心等待。
5. 使用该模型请标注作者 **模型训练/数据集:INT16**
6. 语音模型转换出的音频请勿用于商业化,若有侵犯您的权利,请联系**leenight2016@outlook.com**
模型作者b站@INT16 关注喵https://space.bilibili.com/133434728
Modified/Kangluted by LeeNight in 23.4.9
""")
spks = list(model.spk2id.keys())
sid = gr.Dropdown(label="音色", choices=spks, value=spks[0])
vc_input3 = gr.Audio(label="上传音频(长度小于100秒)")
vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0)
auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False)
slice_db = gr.Number(label="切片阈值", value=-40)
noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4)
vc_submit = gr.Button("转换", variant="primary")
vc_output1 = gr.Textbox(label="输出结果")
vc_output2 = gr.Audio(label="输出音频")
vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale], [vc_output1, vc_output2])
app.launch()
|