Marcis commited on
Commit
ecfefe5
·
verified ·
1 Parent(s): 4661443

Criar app.py

Browse files
Files changed (1) hide show
  1. app.py +319 -0
app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+
4
+ # os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
5
+ import gradio as gr
6
+ import librosa
7
+ import numpy as np
8
+ import logging
9
+ from fairseq import checkpoint_utils
10
+ from vc_infer_pipeline import VC
11
+ import traceback
12
+ from config import Config
13
+ from lib.infer_pack.models import (
14
+ SynthesizerTrnMs256NSFsid,
15
+ SynthesizerTrnMs256NSFsid_nono,
16
+ SynthesizerTrnMs768NSFsid,
17
+ SynthesizerTrnMs768NSFsid_nono,
18
+ )
19
+ from i18n import I18nAuto
20
+
21
+ logging.getLogger("numba").setLevel(logging.WARNING)
22
+ logging.getLogger("markdown_it").setLevel(logging.WARNING)
23
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
24
+ logging.getLogger("matplotlib").setLevel(logging.WARNING)
25
+
26
+ i18n = I18nAuto()
27
+ i18n.print()
28
+
29
+ config = Config()
30
+
31
+ weight_root = "weights"
32
+ weight_uvr5_root = "uvr5_weights"
33
+ index_root = "logs"
34
+ names = []
35
+ hubert_model = None
36
+ for name in os.listdir(weight_root):
37
+ if name.endswith(".pth"):
38
+ names.append(name)
39
+ index_paths = []
40
+ for root, dirs, files in os.walk(index_root, topdown=False):
41
+ for name in files:
42
+ if name.endswith(".index") and "trained" not in name:
43
+ index_paths.append("%s/%s" % (root, name))
44
+
45
+
46
+ def get_vc(sid):
47
+ global n_spk, tgt_sr, net_g, vc, cpt, version
48
+ if sid == "" or sid == []:
49
+ global hubert_model
50
+ if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
51
+ print("clean_empty_cache")
52
+ del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
53
+ hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
54
+ if torch.cuda.is_available():
55
+ torch.cuda.empty_cache()
56
+ ###楼下不这么折腾清理不干净
57
+ if_f0 = cpt.get("f0", 1)
58
+ version = cpt.get("version", "v1")
59
+ if version == "v1":
60
+ if if_f0 == 1:
61
+ net_g = SynthesizerTrnMs256NSFsid(
62
+ *cpt["config"], is_half=config.is_half
63
+ )
64
+ else:
65
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
66
+ elif version == "v2":
67
+ if if_f0 == 1:
68
+ net_g = SynthesizerTrnMs768NSFsid(
69
+ *cpt["config"], is_half=config.is_half
70
+ )
71
+ else:
72
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
73
+ del net_g, cpt
74
+ if torch.cuda.is_available():
75
+ torch.cuda.empty_cache()
76
+ cpt = None
77
+ return {"visible": False, "__type__": "update"}
78
+ person = "%s/%s" % (weight_root, sid)
79
+ print("loading %s" % person)
80
+ cpt = torch.load(person, map_location="cpu")
81
+ tgt_sr = cpt["config"][-1]
82
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
83
+ if_f0 = cpt.get("f0", 1)
84
+ version = cpt.get("version", "v1")
85
+ if version == "v1":
86
+ if if_f0 == 1:
87
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
88
+ else:
89
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
90
+ elif version == "v2":
91
+ if if_f0 == 1:
92
+ net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
93
+ else:
94
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
95
+ del net_g.enc_q
96
+ print(net_g.load_state_dict(cpt["weight"], strict=False))
97
+ net_g.eval().to(config.device)
98
+ if config.is_half:
99
+ net_g = net_g.half()
100
+ else:
101
+ net_g = net_g.float()
102
+ vc = VC(tgt_sr, config)
103
+ n_spk = cpt["config"][-3]
104
+ return {"visible": True, "maximum": n_spk, "__type__": "update"}
105
+
106
+
107
+ def load_hubert():
108
+ global hubert_model
109
+ models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
110
+ ["hubert_base.pt"],
111
+ suffix="",
112
+ )
113
+ hubert_model = models[0]
114
+ hubert_model = hubert_model.to(config.device)
115
+ if config.is_half:
116
+ hubert_model = hubert_model.half()
117
+ else:
118
+ hubert_model = hubert_model.float()
119
+ hubert_model.eval()
120
+
121
+
122
+ def vc_single(
123
+ sid,
124
+ input_audio_path,
125
+ f0_up_key,
126
+ f0_file,
127
+ f0_method,
128
+ file_index,
129
+ file_index2,
130
+ # file_big_npy,
131
+ index_rate,
132
+ filter_radius,
133
+ resample_sr,
134
+ rms_mix_rate,
135
+ protect,
136
+ ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
137
+ global tgt_sr, net_g, vc, hubert_model, version
138
+ if input_audio_path is None:
139
+ return "You need to upload an audio", None
140
+ f0_up_key = int(f0_up_key)
141
+ try:
142
+ audio = input_audio_path[1] / 32768.0
143
+ if len(audio.shape) == 2:
144
+ audio = np.mean(audio, -1)
145
+ audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000)
146
+ audio_max = np.abs(audio).max() / 0.95
147
+ if audio_max > 1:
148
+ audio /= audio_max
149
+ times = [0, 0, 0]
150
+ if hubert_model == None:
151
+ load_hubert()
152
+ if_f0 = cpt.get("f0", 1)
153
+ file_index = (
154
+ (
155
+ file_index.strip(" ")
156
+ .strip('"')
157
+ .strip("\n")
158
+ .strip('"')
159
+ .strip(" ")
160
+ .replace("trained", "added")
161
+ )
162
+ if file_index != ""
163
+ else file_index2
164
+ ) # 防止小白写错,自动帮他替换掉
165
+ # file_big_npy = (
166
+ # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
167
+ # )
168
+ audio_opt = vc.pipeline(
169
+ hubert_model,
170
+ net_g,
171
+ sid,
172
+ audio,
173
+ input_audio_path,
174
+ times,
175
+ f0_up_key,
176
+ f0_method,
177
+ file_index,
178
+ # file_big_npy,
179
+ index_rate,
180
+ if_f0,
181
+ filter_radius,
182
+ tgt_sr,
183
+ resample_sr,
184
+ rms_mix_rate,
185
+ version,
186
+ protect,
187
+ f0_file=f0_file,
188
+ )
189
+ if resample_sr >= 16000 and tgt_sr != resample_sr:
190
+ tgt_sr = resample_sr
191
+ index_info = (
192
+ "Using index:%s." % file_index
193
+ if os.path.exists(file_index)
194
+ else "Index not used."
195
+ )
196
+ return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
197
+ index_info,
198
+ times[0],
199
+ times[1],
200
+ times[2],
201
+ ), (tgt_sr, audio_opt)
202
+ except:
203
+ info = traceback.format_exc()
204
+ print(info)
205
+ return info, (None, None)
206
+
207
+
208
+ app = gr.Blocks()
209
+ with app:
210
+ with gr.Tabs():
211
+ with gr.TabItem("在线demo"):
212
+ gr.Markdown(
213
+ value="""
214
+ RVC 在线demo
215
+ """
216
+ )
217
+ sid = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
218
+ with gr.Column():
219
+ spk_item = gr.Slider(
220
+ minimum=0,
221
+ maximum=2333,
222
+ step=1,
223
+ label=i18n("请选择说话人id"),
224
+ value=0,
225
+ visible=False,
226
+ interactive=True,
227
+ )
228
+ sid.change(
229
+ fn=get_vc,
230
+ inputs=[sid],
231
+ outputs=[spk_item],
232
+ )
233
+ gr.Markdown(
234
+ value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
235
+ )
236
+ vc_input3 = gr.Audio(label="上传音频(长度小于90秒)")
237
+ vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0)
238
+ f0method0 = gr.Radio(
239
+ label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"),
240
+ choices=["pm", "harvest", "crepe"],
241
+ value="pm",
242
+ interactive=True,
243
+ )
244
+ filter_radius0 = gr.Slider(
245
+ minimum=0,
246
+ maximum=7,
247
+ label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
248
+ value=3,
249
+ step=1,
250
+ interactive=True,
251
+ )
252
+ with gr.Column():
253
+ file_index1 = gr.Textbox(
254
+ label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
255
+ value="",
256
+ interactive=False,
257
+ visible=False,
258
+ )
259
+ file_index2 = gr.Dropdown(
260
+ label=i18n("自动检测index路径,下拉式选择(dropdown)"),
261
+ choices=sorted(index_paths),
262
+ interactive=True,
263
+ )
264
+ index_rate1 = gr.Slider(
265
+ minimum=0,
266
+ maximum=1,
267
+ label=i18n("检索特征占比"),
268
+ value=0.88,
269
+ interactive=True,
270
+ )
271
+ resample_sr0 = gr.Slider(
272
+ minimum=0,
273
+ maximum=48000,
274
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
275
+ value=0,
276
+ step=1,
277
+ interactive=True,
278
+ )
279
+ rms_mix_rate0 = gr.Slider(
280
+ minimum=0,
281
+ maximum=1,
282
+ label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
283
+ value=1,
284
+ interactive=True,
285
+ )
286
+ protect0 = gr.Slider(
287
+ minimum=0,
288
+ maximum=0.5,
289
+ label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
290
+ value=0.33,
291
+ step=0.01,
292
+ interactive=True,
293
+ )
294
+ f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
295
+ but0 = gr.Button(i18n("转换"), variant="primary")
296
+ vc_output1 = gr.Textbox(label=i18n("输出信息"))
297
+ vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
298
+ but0.click(
299
+ vc_single,
300
+ [
301
+ spk_item,
302
+ vc_input3,
303
+ vc_transform0,
304
+ f0_file,
305
+ f0method0,
306
+ file_index1,
307
+ file_index2,
308
+ # file_big_npy1,
309
+ index_rate1,
310
+ filter_radius0,
311
+ resample_sr0,
312
+ rms_mix_rate0,
313
+ protect0,
314
+ ],
315
+ [vc_output1, vc_output2],
316
+ )
317
+
318
+
319
+ app.launch()