bodhisativa commited on
Commit
0361ff4
·
verified ·
1 Parent(s): 7b1d2ba

Mirror lj1995/VoiceConversionWebUI @ b2c8cae96e3b — infer-web.py

Browse files
Files changed (1) hide show
  1. infer-web.py +193 -0
infer-web.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, pdb, os,traceback,sys,warnings,shutil
2
+ now_dir=os.getcwd()
3
+ sys.path.append(now_dir)
4
+ tmp=os.path.join(now_dir,"TEMP")
5
+ shutil.rmtree(tmp,ignore_errors=True)
6
+ os.makedirs(tmp,exist_ok=True)
7
+ os.environ["TEMP"]=tmp
8
+ warnings.filterwarnings("ignore")
9
+ torch.manual_seed(114514)
10
+ from infer_pack.models import SynthesizerTrnMs256NSF as SynthesizerTrn256
11
+ from scipy.io import wavfile
12
+ from fairseq import checkpoint_utils
13
+ import gradio as gr
14
+ import librosa
15
+ import logging
16
+ from vc_infer_pipeline import VC
17
+ import soundfile as sf
18
+ from config import is_half,device,is_half
19
+ from infer_uvr5 import _audio_pre_
20
+ logging.getLogger('numba').setLevel(logging.WARNING)
21
+
22
+ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
23
+ hubert_model = models[0]
24
+ hubert_model = hubert_model.to(device)
25
+ if(is_half):hubert_model = hubert_model.half()
26
+ else:hubert_model = hubert_model.float()
27
+ hubert_model.eval()
28
+
29
+
30
+ weight_root="weights"
31
+ weight_uvr5_root="uvr5_weights"
32
+ names=[]
33
+ for name in os.listdir(weight_root):names.append(name.replace(".pt",""))
34
+ uvr5_names=[]
35
+ for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth",""))
36
+
37
+ def get_vc(sid):
38
+ person = "%s/%s.pt" % (weight_root, sid)
39
+ cpt = torch.load(person, map_location="cpu")
40
+ dv = cpt["dv"]
41
+ tgt_sr = cpt["config"][-1]
42
+ net_g = SynthesizerTrn256(*cpt["config"], is_half=is_half)
43
+ net_g.load_state_dict(cpt["weight"], strict=True)
44
+ net_g.eval().to(device)
45
+ if (is_half):net_g = net_g.half()
46
+ else:net_g = net_g.float()
47
+ vc = VC(tgt_sr, device, is_half)
48
+ return dv,tgt_sr,net_g,vc
49
+
50
+ def vc_single(sid,input_audio,f0_up_key,f0_file):
51
+ if input_audio is None:return "You need to upload an audio", None
52
+ f0_up_key = int(f0_up_key)
53
+ try:
54
+ if(type(input_audio)==str):
55
+ print("processing %s" % input_audio)
56
+ audio, sampling_rate = sf.read(input_audio)
57
+ else:
58
+ sampling_rate, audio = input_audio
59
+ audio = audio.astype("float32") / 32768
60
+ if(type(sid)==str):dv, tgt_sr, net_g, vc=get_vc(sid)
61
+ else:dv,tgt_sr,net_g,vc=sid
62
+ if len(audio.shape) > 1:
63
+ audio = librosa.to_mono(audio.transpose(1, 0))
64
+ if sampling_rate != 16000:
65
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
66
+ times = [0, 0, 0]
67
+ audio_opt=vc.pipeline(hubert_model,net_g,dv,audio,times,f0_up_key,f0_file=f0_file)
68
+ print(times)
69
+ return "Success", (tgt_sr, audio_opt)
70
+ except:
71
+ info=traceback.format_exc()
72
+ print(info)
73
+ return info,(None,None)
74
+ finally:
75
+ print("clean_empty_cache")
76
+ del net_g,dv,vc
77
+ torch.cuda.empty_cache()
78
+
79
+ def vc_multi(sid,dir_path,opt_root,paths,f0_up_key):
80
+ try:
81
+ dir_path=dir_path.strip(" ")#防止小白拷路径头尾带了空格
82
+ opt_root=opt_root.strip(" ")
83
+ os.makedirs(opt_root, exist_ok=True)
84
+ dv, tgt_sr, net_g, vc = get_vc(sid)
85
+ try:
86
+ if(dir_path!=""):paths=[os.path.join(dir_path,name)for name in os.listdir(dir_path)]
87
+ else:paths=[path.name for path in paths]
88
+ except:
89
+ traceback.print_exc()
90
+ paths = [path.name for path in paths]
91
+ infos=[]
92
+ for path in paths:
93
+ info,opt=vc_single([dv,tgt_sr,net_g,vc],path,f0_up_key,f0_file=None)
94
+ if(info=="Success"):
95
+ try:
96
+ tgt_sr,audio_opt=opt
97
+ wavfile.write("%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt)
98
+ except:
99
+ info=traceback.format_exc()
100
+ infos.append("%s->%s"%(os.path.basename(path),info))
101
+ return "\n".join(infos)
102
+ except:
103
+ return traceback.format_exc()
104
+ finally:
105
+ print("clean_empty_cache")
106
+ del net_g,dv,vc
107
+ torch.cuda.empty_cache()
108
+
109
+ def uvr(model_name,inp_root,save_root_vocal,save_root_ins):
110
+ infos = []
111
+ try:
112
+ inp_root = inp_root.strip(" ")# 防止小白拷路径头尾带了空格
113
+ save_root_vocal = save_root_vocal.strip(" ")
114
+ save_root_ins = save_root_ins.strip(" ")
115
+ pre_fun = _audio_pre_(model_path=os.path.join(weight_uvr5_root,model_name+".pth"), device=device, is_half=is_half)
116
+ for name in os.listdir(inp_root):
117
+ inp_path=os.path.join(inp_root,name)
118
+ try:
119
+ pre_fun._path_audio_(inp_path , save_root_ins,save_root_vocal)
120
+ infos.append("%s->Success"%(os.path.basename(inp_path)))
121
+ except:
122
+ infos.append("%s->%s" % (os.path.basename(inp_path),traceback.format_exc()))
123
+ except:
124
+ infos.append(traceback.format_exc())
125
+ finally:
126
+ try:
127
+ del pre_fun.model
128
+ del pre_fun
129
+ except:
130
+ traceback.print_exc()
131
+ print("clean_empty_cache")
132
+ torch.cuda.empty_cache()
133
+ return "\n".join(infos)
134
+
135
+ with gr.Blocks() as app:
136
+ with gr.Tabs():
137
+ with gr.TabItem("推理"):
138
+ with gr.Group():
139
+ gr.Markdown(value="""
140
+ 使用软件者、传播软件导出的声音者自负全责。如不认可该条款,则不能使用/引用软件包内所有代码和文件。<br>
141
+ 目前仅开放白菜音色,后续将扩展为本地训练推理工具,用户可训练自己的音色进行社区共享。<br>
142
+ 男转女推荐+12key,女转男推荐-12key,如果音域爆炸导致音色失真也可以自己调整到合适音域
143
+ """)
144
+ with gr.Row():
145
+ with gr.Column():
146
+ sid0 = gr.Dropdown(label="音色", choices=names)
147
+ vc_transform0 = gr.Number(label="变调(整数,半音数量,升八度12降八度-12)", value=12)
148
+ f0_file = gr.File(label="F0曲线文件,可选,一行一个音高,代替默认F0及升降调")
149
+ input_audio0 = gr.Audio(label="上传音频")
150
+ but0=gr.Button("转换", variant="primary")
151
+ with gr.Column():
152
+ vc_output1 = gr.Textbox(label="输出信息")
153
+ vc_output2 = gr.Audio(label="输出音频")
154
+ but0.click(vc_single, [sid0, input_audio0, vc_transform0,f0_file], [vc_output1, vc_output2])
155
+ with gr.Group():
156
+ gr.Markdown(value="""
157
+ 批量转换,上传多个音频文件,在指定文件夹(默认opt)下输出转换的音频。<br>
158
+ 合格的文件夹路径格式举例:E:\codes\py39\\vits_vc_gpu\白鹭霜华测试样例(去文件管理器地址栏拷就行了)
159
+ """)
160
+ with gr.Row():
161
+ with gr.Column():
162
+ sid1 = gr.Dropdown(label="音色", choices=names)
163
+ vc_transform1 = gr.Number(label="变调(整数,半音数量,升八度12降八度-12)", value=12)
164
+ opt_input = gr.Textbox(label="指定输出文件夹",value="opt")
165
+ with gr.Column():
166
+ dir_input = gr.Textbox(label="输入待处理音频文件夹路径")
167
+ inputs = gr.File(file_count="multiple", label="也可批量输入音频文件,二选一,优先读文件夹")
168
+ but1=gr.Button("转换", variant="primary")
169
+ vc_output3 = gr.Textbox(label="输出信息")
170
+ but1.click(vc_multi, [sid1, dir_input,opt_input,inputs, vc_transform1], [vc_output3])
171
+
172
+ with gr.TabItem("数据处理"):
173
+ with gr.Group():
174
+ gr.Markdown(value="""
175
+ 人声伴奏分离批量处理,使用UVR5模型。<br>
176
+ 不带和声用HP2,带和声且提取的人声不需要和声用HP5<br>
177
+ 合格的文件夹路径格式举例:E:\codes\py39\\vits_vc_gpu\白鹭霜华测试样例(去文件管理器地址栏拷就行了)
178
+ """)
179
+ with gr.Row():
180
+ with gr.Column():
181
+ dir_wav_input = gr.Textbox(label="输入待处理音频文件夹路径")
182
+ wav_inputs = gr.File(file_count="multiple", label="也可批量输入音频文件,二选一,优先读文件夹")
183
+ with gr.Column():
184
+ model_choose = gr.Dropdown(label="模型", choices=uvr5_names)
185
+ opt_vocal_root = gr.Textbox(label="指定输出人声文件夹",value="opt")
186
+ opt_ins_root = gr.Textbox(label="指定输出乐器文件夹",value="opt")
187
+ but2=gr.Button("转换", variant="primary")
188
+ vc_output4 = gr.Textbox(label="输出信息")
189
+ but2.click(uvr, [model_choose, dir_wav_input,opt_vocal_root,opt_ins_root], [vc_output4])
190
+ with gr.TabItem("训练-待开放"):pass
191
+
192
+ # app.launch(server_name="0.0.0.0",server_port=7860)
193
+ app.launch(server_name="127.0.0.1",server_port=7860)