Mirror lj1995/VoiceConversionWebUI @ b2c8cae96e3b — infer-web.py
Browse files- infer-web.py +193 -0
infer-web.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch, pdb, os,traceback,sys,warnings,shutil
|
| 2 |
+
now_dir=os.getcwd()
|
| 3 |
+
sys.path.append(now_dir)
|
| 4 |
+
tmp=os.path.join(now_dir,"TEMP")
|
| 5 |
+
shutil.rmtree(tmp,ignore_errors=True)
|
| 6 |
+
os.makedirs(tmp,exist_ok=True)
|
| 7 |
+
os.environ["TEMP"]=tmp
|
| 8 |
+
warnings.filterwarnings("ignore")
|
| 9 |
+
torch.manual_seed(114514)
|
| 10 |
+
from infer_pack.models import SynthesizerTrnMs256NSF as SynthesizerTrn256
|
| 11 |
+
from scipy.io import wavfile
|
| 12 |
+
from fairseq import checkpoint_utils
|
| 13 |
+
import gradio as gr
|
| 14 |
+
import librosa
|
| 15 |
+
import logging
|
| 16 |
+
from vc_infer_pipeline import VC
|
| 17 |
+
import soundfile as sf
|
| 18 |
+
from config import is_half,device,is_half
|
| 19 |
+
from infer_uvr5 import _audio_pre_
|
| 20 |
+
logging.getLogger('numba').setLevel(logging.WARNING)
|
| 21 |
+
|
| 22 |
+
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
|
| 23 |
+
hubert_model = models[0]
|
| 24 |
+
hubert_model = hubert_model.to(device)
|
| 25 |
+
if(is_half):hubert_model = hubert_model.half()
|
| 26 |
+
else:hubert_model = hubert_model.float()
|
| 27 |
+
hubert_model.eval()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
weight_root="weights"
|
| 31 |
+
weight_uvr5_root="uvr5_weights"
|
| 32 |
+
names=[]
|
| 33 |
+
for name in os.listdir(weight_root):names.append(name.replace(".pt",""))
|
| 34 |
+
uvr5_names=[]
|
| 35 |
+
for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth",""))
|
| 36 |
+
|
| 37 |
+
def get_vc(sid):
|
| 38 |
+
person = "%s/%s.pt" % (weight_root, sid)
|
| 39 |
+
cpt = torch.load(person, map_location="cpu")
|
| 40 |
+
dv = cpt["dv"]
|
| 41 |
+
tgt_sr = cpt["config"][-1]
|
| 42 |
+
net_g = SynthesizerTrn256(*cpt["config"], is_half=is_half)
|
| 43 |
+
net_g.load_state_dict(cpt["weight"], strict=True)
|
| 44 |
+
net_g.eval().to(device)
|
| 45 |
+
if (is_half):net_g = net_g.half()
|
| 46 |
+
else:net_g = net_g.float()
|
| 47 |
+
vc = VC(tgt_sr, device, is_half)
|
| 48 |
+
return dv,tgt_sr,net_g,vc
|
| 49 |
+
|
| 50 |
+
def vc_single(sid,input_audio,f0_up_key,f0_file):
|
| 51 |
+
if input_audio is None:return "You need to upload an audio", None
|
| 52 |
+
f0_up_key = int(f0_up_key)
|
| 53 |
+
try:
|
| 54 |
+
if(type(input_audio)==str):
|
| 55 |
+
print("processing %s" % input_audio)
|
| 56 |
+
audio, sampling_rate = sf.read(input_audio)
|
| 57 |
+
else:
|
| 58 |
+
sampling_rate, audio = input_audio
|
| 59 |
+
audio = audio.astype("float32") / 32768
|
| 60 |
+
if(type(sid)==str):dv, tgt_sr, net_g, vc=get_vc(sid)
|
| 61 |
+
else:dv,tgt_sr,net_g,vc=sid
|
| 62 |
+
if len(audio.shape) > 1:
|
| 63 |
+
audio = librosa.to_mono(audio.transpose(1, 0))
|
| 64 |
+
if sampling_rate != 16000:
|
| 65 |
+
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
| 66 |
+
times = [0, 0, 0]
|
| 67 |
+
audio_opt=vc.pipeline(hubert_model,net_g,dv,audio,times,f0_up_key,f0_file=f0_file)
|
| 68 |
+
print(times)
|
| 69 |
+
return "Success", (tgt_sr, audio_opt)
|
| 70 |
+
except:
|
| 71 |
+
info=traceback.format_exc()
|
| 72 |
+
print(info)
|
| 73 |
+
return info,(None,None)
|
| 74 |
+
finally:
|
| 75 |
+
print("clean_empty_cache")
|
| 76 |
+
del net_g,dv,vc
|
| 77 |
+
torch.cuda.empty_cache()
|
| 78 |
+
|
| 79 |
+
def vc_multi(sid,dir_path,opt_root,paths,f0_up_key):
|
| 80 |
+
try:
|
| 81 |
+
dir_path=dir_path.strip(" ")#防止小白拷路径头尾带了空格
|
| 82 |
+
opt_root=opt_root.strip(" ")
|
| 83 |
+
os.makedirs(opt_root, exist_ok=True)
|
| 84 |
+
dv, tgt_sr, net_g, vc = get_vc(sid)
|
| 85 |
+
try:
|
| 86 |
+
if(dir_path!=""):paths=[os.path.join(dir_path,name)for name in os.listdir(dir_path)]
|
| 87 |
+
else:paths=[path.name for path in paths]
|
| 88 |
+
except:
|
| 89 |
+
traceback.print_exc()
|
| 90 |
+
paths = [path.name for path in paths]
|
| 91 |
+
infos=[]
|
| 92 |
+
for path in paths:
|
| 93 |
+
info,opt=vc_single([dv,tgt_sr,net_g,vc],path,f0_up_key,f0_file=None)
|
| 94 |
+
if(info=="Success"):
|
| 95 |
+
try:
|
| 96 |
+
tgt_sr,audio_opt=opt
|
| 97 |
+
wavfile.write("%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt)
|
| 98 |
+
except:
|
| 99 |
+
info=traceback.format_exc()
|
| 100 |
+
infos.append("%s->%s"%(os.path.basename(path),info))
|
| 101 |
+
return "\n".join(infos)
|
| 102 |
+
except:
|
| 103 |
+
return traceback.format_exc()
|
| 104 |
+
finally:
|
| 105 |
+
print("clean_empty_cache")
|
| 106 |
+
del net_g,dv,vc
|
| 107 |
+
torch.cuda.empty_cache()
|
| 108 |
+
|
| 109 |
+
def uvr(model_name,inp_root,save_root_vocal,save_root_ins):
|
| 110 |
+
infos = []
|
| 111 |
+
try:
|
| 112 |
+
inp_root = inp_root.strip(" ")# 防止小白拷路径头尾带了空格
|
| 113 |
+
save_root_vocal = save_root_vocal.strip(" ")
|
| 114 |
+
save_root_ins = save_root_ins.strip(" ")
|
| 115 |
+
pre_fun = _audio_pre_(model_path=os.path.join(weight_uvr5_root,model_name+".pth"), device=device, is_half=is_half)
|
| 116 |
+
for name in os.listdir(inp_root):
|
| 117 |
+
inp_path=os.path.join(inp_root,name)
|
| 118 |
+
try:
|
| 119 |
+
pre_fun._path_audio_(inp_path , save_root_ins,save_root_vocal)
|
| 120 |
+
infos.append("%s->Success"%(os.path.basename(inp_path)))
|
| 121 |
+
except:
|
| 122 |
+
infos.append("%s->%s" % (os.path.basename(inp_path),traceback.format_exc()))
|
| 123 |
+
except:
|
| 124 |
+
infos.append(traceback.format_exc())
|
| 125 |
+
finally:
|
| 126 |
+
try:
|
| 127 |
+
del pre_fun.model
|
| 128 |
+
del pre_fun
|
| 129 |
+
except:
|
| 130 |
+
traceback.print_exc()
|
| 131 |
+
print("clean_empty_cache")
|
| 132 |
+
torch.cuda.empty_cache()
|
| 133 |
+
return "\n".join(infos)
|
| 134 |
+
|
| 135 |
+
with gr.Blocks() as app:
|
| 136 |
+
with gr.Tabs():
|
| 137 |
+
with gr.TabItem("推理"):
|
| 138 |
+
with gr.Group():
|
| 139 |
+
gr.Markdown(value="""
|
| 140 |
+
使用软件者、传播软件导出的声音者自负全责。如不认可该条款,则不能使用/引用软件包内所有代码和文件。<br>
|
| 141 |
+
目前仅开放白菜音色,后续将扩展为本地训练推理工具,用户可训练自己的音色进行社区共享。<br>
|
| 142 |
+
男转女推荐+12key,女转男推荐-12key,如果音域爆炸导致音色失真也可以自己调整到合适音域
|
| 143 |
+
""")
|
| 144 |
+
with gr.Row():
|
| 145 |
+
with gr.Column():
|
| 146 |
+
sid0 = gr.Dropdown(label="音色", choices=names)
|
| 147 |
+
vc_transform0 = gr.Number(label="变调(整数,半音数量,升八度12降八度-12)", value=12)
|
| 148 |
+
f0_file = gr.File(label="F0曲线文件,可选,一行一个音高,代替默认F0及升降调")
|
| 149 |
+
input_audio0 = gr.Audio(label="上传音频")
|
| 150 |
+
but0=gr.Button("转换", variant="primary")
|
| 151 |
+
with gr.Column():
|
| 152 |
+
vc_output1 = gr.Textbox(label="输出信息")
|
| 153 |
+
vc_output2 = gr.Audio(label="输出音频")
|
| 154 |
+
but0.click(vc_single, [sid0, input_audio0, vc_transform0,f0_file], [vc_output1, vc_output2])
|
| 155 |
+
with gr.Group():
|
| 156 |
+
gr.Markdown(value="""
|
| 157 |
+
批量转换,上传多个音频文件,在指定文件夹(默认opt)下输出转换的音频。<br>
|
| 158 |
+
合格的文件夹路径格式举例:E:\codes\py39\\vits_vc_gpu\白鹭霜华测试样例(去文件管理器地址栏拷就行了)
|
| 159 |
+
""")
|
| 160 |
+
with gr.Row():
|
| 161 |
+
with gr.Column():
|
| 162 |
+
sid1 = gr.Dropdown(label="音色", choices=names)
|
| 163 |
+
vc_transform1 = gr.Number(label="变调(整数,半音数量,升八度12降八度-12)", value=12)
|
| 164 |
+
opt_input = gr.Textbox(label="指定输出文件夹",value="opt")
|
| 165 |
+
with gr.Column():
|
| 166 |
+
dir_input = gr.Textbox(label="输入待处理音频文件夹路径")
|
| 167 |
+
inputs = gr.File(file_count="multiple", label="也可批量输入音频文件,二选一,优先读文件夹")
|
| 168 |
+
but1=gr.Button("转换", variant="primary")
|
| 169 |
+
vc_output3 = gr.Textbox(label="输出信息")
|
| 170 |
+
but1.click(vc_multi, [sid1, dir_input,opt_input,inputs, vc_transform1], [vc_output3])
|
| 171 |
+
|
| 172 |
+
with gr.TabItem("数据处理"):
|
| 173 |
+
with gr.Group():
|
| 174 |
+
gr.Markdown(value="""
|
| 175 |
+
人声伴奏分离批量处理,使用UVR5模型。<br>
|
| 176 |
+
不带和声用HP2,带和声且提取的人声不需要和声用HP5<br>
|
| 177 |
+
合格的文件夹路径格式举例:E:\codes\py39\\vits_vc_gpu\白鹭霜华测试样例(去文件管理器地址栏拷就行了)
|
| 178 |
+
""")
|
| 179 |
+
with gr.Row():
|
| 180 |
+
with gr.Column():
|
| 181 |
+
dir_wav_input = gr.Textbox(label="输入待处理音频文件夹路径")
|
| 182 |
+
wav_inputs = gr.File(file_count="multiple", label="也可批量输入音频文件,二选一,优先读文件夹")
|
| 183 |
+
with gr.Column():
|
| 184 |
+
model_choose = gr.Dropdown(label="模型", choices=uvr5_names)
|
| 185 |
+
opt_vocal_root = gr.Textbox(label="指定输出人声文件夹",value="opt")
|
| 186 |
+
opt_ins_root = gr.Textbox(label="指定输出乐器文件夹",value="opt")
|
| 187 |
+
but2=gr.Button("转换", variant="primary")
|
| 188 |
+
vc_output4 = gr.Textbox(label="输出信息")
|
| 189 |
+
but2.click(uvr, [model_choose, dir_wav_input,opt_vocal_root,opt_ins_root], [vc_output4])
|
| 190 |
+
with gr.TabItem("训练-待开放"):pass
|
| 191 |
+
|
| 192 |
+
# app.launch(server_name="0.0.0.0",server_port=7860)
|
| 193 |
+
app.launch(server_name="127.0.0.1",server_port=7860)
|