NeoPy commited on
Commit
ac40bf5
·
verified ·
1 Parent(s): 02a2474

Create inference.py

Browse files
Files changed (1) hide show
  1. app/tabs/inference.py +301 -0
app/tabs/inference.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from original import *
3
+
4
+
5
+ def infer_tabs():
6
+ with gr.TabItem(i18n("模型推理")):
7
+ with gr.Row():
8
+ sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
9
+ with gr.Column():
10
+ refresh_button = gr.Button(
11
+ i18n("刷新音色列表和索引路径"), variant="primary"
12
+ )
13
+ clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
14
+ spk_item = gr.Slider(
15
+ minimum=0,
16
+ maximum=2333,
17
+ step=1,
18
+ label=i18n("请选择说话人id"),
19
+ value=0,
20
+ visible=False,
21
+ interactive=True,
22
+ )
23
+ clean_button.click(
24
+ fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean"
25
+ )
26
+ with gr.TabItem(i18n("单次推理")):
27
+ with gr.Group():
28
+ with gr.Row():
29
+ with gr.Column():
30
+ vc_transform0 = gr.Number(
31
+ label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
32
+ value=0,
33
+ )
34
+ input_audio0 = gr.Textbox(
35
+ label=i18n(
36
+ "输入待处理音频文件路径(默认是正确格式示例)"
37
+ ),
38
+ placeholder="C:\\Users\\Desktop\\audio_example.wav",
39
+ )
40
+ file_index1 = gr.Textbox(
41
+ label=i18n(
42
+ "特征检索库文件路径,为空则使用下拉的选择结果"
43
+ ),
44
+ placeholder="C:\\Users\\Desktop\\model_example.index",
45
+ interactive=True,
46
+ )
47
+ file_index2 = gr.Dropdown(
48
+ label=i18n("自动检测index路径,下拉式选择(dropdown)"),
49
+ choices=sorted(index_paths),
50
+ interactive=True,
51
+ )
52
+ f0method0 = gr.Radio(
53
+ label=i18n(
54
+ "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
55
+ ),
56
+ choices=(
57
+ ["pm", "harvest", "crepe", "rmvpe"]
58
+ if config.dml == False
59
+ else ["pm", "harvest", "rmvpe"]
60
+ ),
61
+ value="rmvpe",
62
+ interactive=True,
63
+ )
64
+
65
+ with gr.Column():
66
+ resample_sr0 = gr.Slider(
67
+ minimum=0,
68
+ maximum=48000,
69
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
70
+ value=0,
71
+ step=1,
72
+ interactive=True,
73
+ )
74
+ rms_mix_rate0 = gr.Slider(
75
+ minimum=0,
76
+ maximum=1,
77
+ label=i18n(
78
+ "输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"
79
+ ),
80
+ value=0.25,
81
+ interactive=True,
82
+ )
83
+ protect0 = gr.Slider(
84
+ minimum=0,
85
+ maximum=0.5,
86
+ label=i18n(
87
+ "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
88
+ ),
89
+ value=0.33,
90
+ step=0.01,
91
+ interactive=True,
92
+ )
93
+ filter_radius0 = gr.Slider(
94
+ minimum=0,
95
+ maximum=7,
96
+ label=i18n(
97
+ ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"
98
+ ),
99
+ value=3,
100
+ step=1,
101
+ interactive=True,
102
+ )
103
+ index_rate1 = gr.Slider(
104
+ minimum=0,
105
+ maximum=1,
106
+ label=i18n("检索特征占比"),
107
+ value=0.75,
108
+ interactive=True,
109
+ )
110
+ f0_file = gr.File(
111
+ label=i18n(
112
+ "F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
113
+ ),
114
+ visible=False,
115
+ )
116
+
117
+ refresh_button.click(
118
+ fn=change_choices,
119
+ inputs=[],
120
+ outputs=[sid0, file_index2],
121
+ api_name="infer_refresh",
122
+ )
123
+ # file_big_npy1 = gr.Textbox(
124
+ # label=i18n("特征文件路径"),
125
+ # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
126
+ # interactive=True,
127
+ # )
128
+ with gr.Group():
129
+ with gr.Column():
130
+ but0 = gr.Button(i18n("转换"), variant="primary")
131
+ with gr.Row():
132
+ vc_output1 = gr.Textbox(label=i18n("输出信息"))
133
+ vc_output2 = gr.Audio(
134
+ label=i18n("输出音频(右下角三个点,点了可以下载)")
135
+ )
136
+
137
+ but0.click(
138
+ vc.vc_single,
139
+ [
140
+ spk_item,
141
+ input_audio0,
142
+ vc_transform0,
143
+ f0_file,
144
+ f0method0,
145
+ file_index1,
146
+ file_index2,
147
+ # file_big_npy1,
148
+ index_rate1,
149
+ filter_radius0,
150
+ resample_sr0,
151
+ rms_mix_rate0,
152
+ protect0,
153
+ ],
154
+ [vc_output1, vc_output2],
155
+ api_name="infer_convert",
156
+ )
157
+ with gr.TabItem(i18n("批量推理")):
158
+ gr.Markdown(
159
+ value=i18n(
160
+ "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. "
161
+ )
162
+ )
163
+ with gr.Row():
164
+ with gr.Column():
165
+ vc_transform1 = gr.Number(
166
+ label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
167
+ value=0,
168
+ )
169
+ opt_input = gr.Textbox(
170
+ label=i18n("指定输出文件夹"), value="opt"
171
+ )
172
+ file_index3 = gr.Textbox(
173
+ label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
174
+ value="",
175
+ interactive=True,
176
+ )
177
+ file_index4 = gr.Dropdown(
178
+ label=i18n("自动检测index路径,下拉式选择(dropdown)"),
179
+ choices=sorted(index_paths),
180
+ interactive=True,
181
+ )
182
+ f0method1 = gr.Radio(
183
+ label=i18n(
184
+ "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
185
+ ),
186
+ choices=(
187
+ ["pm", "harvest", "crepe", "rmvpe"]
188
+ if config.dml == False
189
+ else ["pm", "harvest", "rmvpe"]
190
+ ),
191
+ value="rmvpe",
192
+ interactive=True,
193
+ )
194
+ format1 = gr.Radio(
195
+ label=i18n("导出文件格式"),
196
+ choices=["wav", "flac", "mp3", "m4a"],
197
+ value="wav",
198
+ interactive=True,
199
+ )
200
+
201
+ refresh_button.click(
202
+ fn=lambda: change_choices()[1],
203
+ inputs=[],
204
+ outputs=file_index4,
205
+ api_name="infer_refresh_batch",
206
+ )
207
+ # file_big_npy2 = gr.Textbox(
208
+ # label=i18n("特征文件路径"),
209
+ # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
210
+ # interactive=True,
211
+ # )
212
+
213
+ with gr.Column():
214
+ resample_sr1 = gr.Slider(
215
+ minimum=0,
216
+ maximum=48000,
217
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
218
+ value=0,
219
+ step=1,
220
+ interactive=True,
221
+ )
222
+ rms_mix_rate1 = gr.Slider(
223
+ minimum=0,
224
+ maximum=1,
225
+ label=i18n(
226
+ "输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"
227
+ ),
228
+ value=1,
229
+ interactive=True,
230
+ )
231
+ protect1 = gr.Slider(
232
+ minimum=0,
233
+ maximum=0.5,
234
+ label=i18n(
235
+ "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
236
+ ),
237
+ value=0.33,
238
+ step=0.01,
239
+ interactive=True,
240
+ )
241
+ filter_radius1 = gr.Slider(
242
+ minimum=0,
243
+ maximum=7,
244
+ label=i18n(
245
+ ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"
246
+ ),
247
+ value=3,
248
+ step=1,
249
+ interactive=True,
250
+ )
251
+ index_rate2 = gr.Slider(
252
+ minimum=0,
253
+ maximum=1,
254
+ label=i18n("检索特征占比"),
255
+ value=1,
256
+ interactive=True,
257
+ )
258
+ with gr.Row():
259
+ dir_input = gr.Textbox(
260
+ label=i18n(
261
+ "输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"
262
+ ),
263
+ placeholder="C:\\Users\\Desktop\\input_vocal_dir",
264
+ )
265
+ inputs = gr.File(
266
+ file_count="multiple",
267
+ label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
268
+ )
269
+
270
+ with gr.Row():
271
+ but1 = gr.Button(i18n("转换"), variant="primary")
272
+ vc_output3 = gr.Textbox(label=i18n("输出信息"))
273
+
274
+ but1.click(
275
+ vc.vc_multi,
276
+ [
277
+ spk_item,
278
+ dir_input,
279
+ opt_input,
280
+ inputs,
281
+ vc_transform1,
282
+ f0method1,
283
+ file_index3,
284
+ file_index4,
285
+ # file_big_npy2,
286
+ index_rate2,
287
+ filter_radius1,
288
+ resample_sr1,
289
+ rms_mix_rate1,
290
+ protect1,
291
+ format1,
292
+ ],
293
+ [vc_output3],
294
+ api_name="infer_convert_batch",
295
+ )
296
+ sid0.change(
297
+ fn=vc.get_vc,
298
+ inputs=[sid0, protect0, protect1],
299
+ outputs=[spk_item, protect0, protect1, file_index2, file_index4],
300
+ api_name="infer_change_voice",
301
+ )