TheStinger commited on
Commit
e71a85c
1 Parent(s): 62d6c2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +508 -504
app.py CHANGED
@@ -1,504 +1,508 @@
1
- import gradio as gr
2
- import requests
3
- import random
4
- import os
5
- import zipfile
6
- import librosa
7
- import time
8
- from infer_rvc_python import BaseLoader
9
- from pydub import AudioSegment
10
- from tts_voice import tts_order_voice
11
- import edge_tts
12
- import tempfile
13
- from audio_separator.separator import Separator
14
- import model_handler
15
- import psutil
16
- import cpuinfo
17
-
18
- language_dict = tts_order_voice
19
-
20
- async def text_to_speech_edge(text, language_code):
21
- voice = language_dict[language_code]
22
- communicate = edge_tts.Communicate(text, voice)
23
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
- tmp_path = tmp_file.name
25
-
26
- await communicate.save(tmp_path)
27
-
28
- return tmp_path
29
-
30
- try:
31
- import spaces
32
- spaces_status = True
33
- except ImportError:
34
- spaces_status = False
35
-
36
- separator = Separator()
37
- converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None) # <- yeah so like this handles rvc
38
-
39
- global pth_file
40
- global index_file
41
-
42
- pth_file = "model.pth"
43
- index_file = "model.index"
44
-
45
- #CONFIGS
46
- TEMP_DIR = "temp"
47
- MODEL_PREFIX = "model"
48
- PITCH_ALGO_OPT = [
49
- "pm",
50
- "harvest",
51
- "crepe",
52
- "rmvpe",
53
- "rmvpe+",
54
- ]
55
- UVR_5_MODELS = [
56
- {"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
57
- {"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
58
- {"model_name": "Kim Vocal 2", "checkpoint": "Kim_Vocal_2.onnx"},
59
- {"model_name": "5_HP-Karaoke", "checkpoint": "5_HP-Karaoke-UVR.pth"},
60
- {"model_name": "UVR-DeNoise by FoxJoy", "checkpoint": "UVR-DeNoise.pth"},
61
- {"model_name": "UVR-DeEcho-DeReverb by FoxJoy", "checkpoint": "UVR-DeEcho-DeReverb.pth"},
62
- ]
63
- MODELS = [
64
- {"model": "model.pth", "index": "model.index", "model_name": "Test Model"},
65
- ]
66
-
67
- os.makedirs(TEMP_DIR, exist_ok=True)
68
-
69
- def unzip_file(file):
70
- filename = os.path.basename(file).split(".")[0]
71
- with zipfile.ZipFile(file, 'r') as zip_ref:
72
- zip_ref.extractall(os.path.join(TEMP_DIR, filename))
73
- return True
74
-
75
-
76
- def progress_bar(total, current):
77
- return "[" + "=" * int(current / total * 20) + ">" + " " * (20 - int(current / total * 20)) + "] " + str(int(current / total * 100)) + "%"
78
-
79
- def download_from_url(url, name=None):
80
- if name is None:
81
- raise ValueError("The model name must be provided")
82
- if "/blob/" in url:
83
- url = url.replace("/blob/", "/resolve/")
84
- if "huggingface" not in url:
85
- return ["The URL must be from huggingface", "Failed", "Failed"]
86
- filename = os.path.join(TEMP_DIR, MODEL_PREFIX + str(random.randint(1, 1000)) + ".zip")
87
- response = requests.get(url)
88
- total = int(response.headers.get('content-length', 0))
89
- if total > 500000000:
90
-
91
- return ["The file is too large. You can only download files up to 500 MB in size.", "Failed", "Failed"]
92
- current = 0
93
- with open(filename, "wb") as f:
94
- for data in response.iter_content(chunk_size=4096):
95
- f.write(data)
96
- current += len(data)
97
- print(progress_bar(total, current), end="\r") #
98
-
99
-
100
-
101
- try:
102
- unzip_file(filename)
103
- except Exception as e:
104
- return ["Failed to unzip the file", "Failed", "Failed"]
105
- unzipped_dir = os.path.join(TEMP_DIR, os.path.basename(filename).split(".")[0])
106
- pth_files = []
107
- index_files = []
108
- for root, dirs, files in os.walk(unzipped_dir):
109
- for file in files:
110
- if file.endswith(".pth"):
111
- pth_files.append(os.path.join(root, file))
112
- elif file.endswith(".index"):
113
- index_files.append(os.path.join(root, file))
114
-
115
- print(pth_files, index_files)
116
- global pth_file
117
- global index_file
118
- pth_file = pth_files[0]
119
- index_file = index_files[0]
120
-
121
- print(pth_file)
122
- print(index_file)
123
-
124
- MODELS.append({"model": pth_file, "index": index_file, "model_name": name})
125
- return ["Downloaded as " + name, pth_files[0], index_files[0]]
126
-
127
- def inference(audio, model_name):
128
- output_data = inf_handler(audio, model_name)
129
- vocals = output_data[0]
130
- inst = output_data[1]
131
-
132
- return vocals, inst
133
-
134
- if spaces_status:
135
- @spaces.GPU()
136
- def convert_now(audio_files, random_tag, converter):
137
- return converter(
138
- audio_files,
139
- random_tag,
140
- overwrite=False,
141
- parallel_workers=8
142
- )
143
-
144
-
145
- else:
146
- def convert_now(audio_files, random_tag, converter):
147
- return converter(
148
- audio_files,
149
- random_tag,
150
- overwrite=False,
151
- parallel_workers=8
152
- )
153
-
154
- def calculate_remaining_time(epochs, seconds_per_epoch):
155
- total_seconds = epochs * seconds_per_epoch
156
-
157
- hours = total_seconds // 3600
158
- minutes = (total_seconds % 3600) // 60
159
- seconds = total_seconds % 60
160
-
161
- if hours == 0:
162
- return f"{int(minutes)} minutes"
163
- elif hours == 1:
164
- return f"{int(hours)} hour and {int(minutes)} minutes"
165
- else:
166
- return f"{int(hours)} hours and {int(minutes)} minutes"
167
-
168
- def inf_handler(audio, model_name):
169
- model_found = False
170
- for model_info in UVR_5_MODELS:
171
- if model_info["model_name"] == model_name:
172
- separator.load_model(model_info["checkpoint"])
173
- model_found = True
174
- break
175
- if not model_found:
176
- separator.load_model()
177
- output_files = separator.separate(audio)
178
- vocals = output_files[0]
179
- inst = output_files[1]
180
- return vocals, inst
181
-
182
-
183
- def run(
184
- model,
185
- audio_files,
186
- pitch_alg,
187
- pitch_lvl,
188
- index_inf,
189
- r_m_f,
190
- e_r,
191
- c_b_p,
192
- ):
193
- if not audio_files:
194
- raise ValueError("The audio pls")
195
-
196
- if isinstance(audio_files, str):
197
- audio_files = [audio_files]
198
-
199
- try:
200
- duration_base = librosa.get_duration(filename=audio_files[0])
201
- print("Duration:", duration_base)
202
- except Exception as e:
203
- print(e)
204
-
205
- random_tag = "USER_"+str(random.randint(10000000, 99999999))
206
-
207
- file_m = model
208
- print("File model:", file_m)
209
-
210
- # get from MODELS
211
- for model in MODELS:
212
- if model["model_name"] == file_m:
213
- print(model)
214
- file_m = model["model"]
215
- file_index = model["index"]
216
- break
217
-
218
- if not file_m.endswith(".pth"):
219
- raise ValueError("The model file must be a .pth file")
220
-
221
-
222
- print("Random tag:", random_tag)
223
- print("File model:", file_m)
224
- print("Pitch algorithm:", pitch_alg)
225
- print("Pitch level:", pitch_lvl)
226
- print("File index:", file_index)
227
- print("Index influence:", index_inf)
228
- print("Respiration median filtering:", r_m_f)
229
- print("Envelope ratio:", e_r)
230
-
231
- converter.apply_conf(
232
- tag=random_tag,
233
- file_model=file_m,
234
- pitch_algo=pitch_alg,
235
- pitch_lvl=pitch_lvl,
236
- file_index=file_index,
237
- index_influence=index_inf,
238
- respiration_median_filtering=r_m_f,
239
- envelope_ratio=e_r,
240
- consonant_breath_protection=c_b_p,
241
- resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
242
- )
243
- time.sleep(0.1)
244
-
245
- result = convert_now(audio_files, random_tag, converter)
246
- print("Result:", result)
247
-
248
- return result[0]
249
-
250
- def upload_model(index_file, pth_file, model_name):
251
- pth_file = pth_file.name
252
- index_file = index_file.name
253
- MODELS.append({"model": pth_file, "index": index_file, "model_name": model_name})
254
- return "Uploaded!"
255
-
256
- with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="rose"), title="Ilaria RVC 💖") as demo:
257
- gr.Markdown("## Ilaria RVC 💖")
258
- with gr.Tab("Inference"):
259
- sound_gui = gr.Audio(value=None,type="filepath",autoplay=False,visible=True,)
260
- def update():
261
- print(MODELS)
262
- return gr.Dropdown(label="Model",choices=[model["model_name"] for model in MODELS],visible=True,interactive=True, value=MODELS[0]["model_name"],)
263
- with gr.Row():
264
- models_dropdown = gr.Dropdown(label="Model",choices=[model["model_name"] for model in MODELS],visible=True,interactive=True, value=MODELS[0]["model_name"],)
265
- refresh_button = gr.Button("Refresh Models")
266
- refresh_button.click(update, outputs=[models_dropdown])
267
-
268
- with gr.Accordion("Ilaria TTS", open=False):
269
- text_tts = gr.Textbox(label="Text", placeholder="Hello!", lines=3, interactive=True,)
270
- dropdown_tts = gr.Dropdown(label="Language and Model",choices=list(language_dict.keys()),interactive=True, value=list(language_dict.keys())[0])
271
-
272
- button_tts = gr.Button("Speak", variant="primary",)
273
- button_tts.click(text_to_speech_edge, inputs=[text_tts, dropdown_tts], outputs=[sound_gui])
274
-
275
- with gr.Accordion("Settings", open=False):
276
- pitch_algo_conf = gr.Dropdown(PITCH_ALGO_OPT,value=PITCH_ALGO_OPT[4],label="Pitch algorithm",visible=True,interactive=True,)
277
- pitch_lvl_conf = gr.Slider(label="Pitch level (lower -> 'male' while higher -> 'female')",minimum=-24,maximum=24,step=1,value=0,visible=True,interactive=True,)
278
- index_inf_conf = gr.Slider(minimum=0,maximum=1,label="Index influence -> How much accent is applied",value=0.75,)
279
- respiration_filter_conf = gr.Slider(minimum=0,maximum=7,label="Respiration median filtering",value=3,step=1,interactive=True,)
280
- envelope_ratio_conf = gr.Slider(minimum=0,maximum=1,label="Envelope ratio",value=0.25,interactive=True,)
281
- consonant_protec_conf = gr.Slider(minimum=0,maximum=0.5,label="Consonant breath protection",value=0.5,interactive=True,)
282
-
283
- button_conf = gr.Button("Convert",variant="primary",)
284
- output_conf = gr.Audio(type="filepath",label="Output",)
285
-
286
- button_conf.click(lambda :None, None, output_conf)
287
- button_conf.click(
288
- run,
289
- inputs=[
290
- models_dropdown,
291
- sound_gui,
292
- pitch_algo_conf,
293
- pitch_lvl_conf,
294
- index_inf_conf,
295
- respiration_filter_conf,
296
- envelope_ratio_conf,
297
- consonant_protec_conf,
298
- ],
299
- outputs=[output_conf],
300
- )
301
-
302
-
303
- with gr.Tab("Model Loader (Download and Upload)"):
304
- with gr.Accordion("Model Downloader", open=False):
305
- gr.Markdown(
306
- "Download the model from the following URL and upload it here. (Huggingface RVC model)"
307
- )
308
- model = gr.Textbox(lines=1, label="Model URL")
309
- name = gr.Textbox(lines=1, label="Model Name", placeholder="Model Name")
310
- download_button = gr.Button("Download Model")
311
- status = gr.Textbox(lines=1, label="Status", placeholder="Waiting....", interactive=False)
312
- model_pth = gr.Textbox(lines=1, label="Model pth file", placeholder="Waiting....", interactive=False)
313
- index_pth = gr.Textbox(lines=1, label="Index pth file", placeholder="Waiting....", interactive=False)
314
- download_button.click(download_from_url, [model, name], outputs=[status, model_pth, index_pth])
315
- with gr.Accordion("Upload A Model", open=False):
316
- index_file_upload = gr.File(label="Index File (.index)")
317
- pth_file_upload = gr.File(label="Model File (.pth)")
318
-
319
- model_name = gr.Textbox(label="Model Name", placeholder="Model Name")
320
- upload_button = gr.Button("Upload Model")
321
- upload_status = gr.Textbox(lines=1, label="Status", placeholder="Waiting....", interactive=False)
322
-
323
- upload_button.click(upload_model, [index_file_upload, pth_file_upload, model_name], upload_status)
324
-
325
-
326
- with gr.Tab("Vocal Separator (UVR)"):
327
- gr.Markdown("Separate vocals and instruments from an audio file using UVR models. - This is only on CPU due to ZeroGPU being ZeroGPU :(")
328
- uvr5_audio_file = gr.Audio(label="Audio File",type="filepath")
329
-
330
- with gr.Row():
331
- uvr5_model = gr.Dropdown(label="Model", choices=[model["model_name"] for model in UVR_5_MODELS])
332
- uvr5_button = gr.Button("Separate Vocals", variant="primary",)
333
-
334
- uvr5_output_voc = gr.Audio(type="filepath", label="Output 1",)
335
- uvr5_output_inst = gr.Audio(type="filepath", label="Output 2",)
336
-
337
- uvr5_button.click(inference, [uvr5_audio_file, uvr5_model], [uvr5_output_voc, uvr5_output_inst])
338
-
339
- with gr.Tab("Extra"):
340
- with gr.Accordion("Model Information", open=False):
341
- def json_to_markdown_table(json_data):
342
- table = "| Key | Value |\n| --- | --- |\n"
343
- for key, value in json_data.items():
344
- table += f"| {key} | {value} |\n"
345
- return table
346
- def model_info(name):
347
- for model in MODELS:
348
- if model["model_name"] == name:
349
- print(model["model"])
350
- info = model_handler.model_info(model["model"])
351
- info2 = {
352
- "Model Name": model["model_name"],
353
- "Model Config": info['config'],
354
- "Epochs Trained": info['epochs'],
355
- "Sample Rate": info['sr'],
356
- "Pitch Guidance": info['f0'],
357
- "Model Precision": info['size'],
358
- }
359
- return gr.Markdown(json_to_markdown_table(info2))
360
-
361
- return "Model not found"
362
- def update():
363
- print(MODELS)
364
- return gr.Dropdown(label="Model", choices=[model["model_name"] for model in MODELS])
365
- with gr.Row():
366
- model_info_dropdown = gr.Dropdown(label="Model", choices=[model["model_name"] for model in MODELS])
367
- refresh_button = gr.Button("Refresh Models")
368
- refresh_button.click(update, outputs=[model_info_dropdown])
369
- model_info_button = gr.Button("Get Model Information")
370
- model_info_output = gr.Textbox(value="Waiting...",label="Output", interactive=False)
371
- model_info_button.click(model_info, [model_info_dropdown], [model_info_output])
372
-
373
-
374
-
375
- with gr.Accordion("Training Time Calculator", open=False):
376
- with gr.Column():
377
- epochs_input = gr.Number(label="Number of Epochs")
378
- seconds_input = gr.Number(label="Seconds per Epoch")
379
- calculate_button = gr.Button("Calculate Time Remaining")
380
- remaining_time_output = gr.Textbox(label="Remaining Time", interactive=False)
381
-
382
- calculate_button.click(calculate_remaining_time,inputs=[epochs_input, seconds_input],outputs=[remaining_time_output])
383
-
384
- with gr.Accordion("Model Fusion", open=False):
385
- with gr.Group():
386
- def merge(ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0, version_2):
387
- for model in MODELS:
388
- if model["model_name"] == ckpt_a:
389
- ckpt_a = model["model"]
390
- if model["model_name"] == ckpt_b:
391
- ckpt_b = model["model"]
392
-
393
- path = model_handler.merge(ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0, version_2)
394
- if path == "Fail to merge the models. The model architectures are not the same.":
395
- return "Fail to merge the models. The model architectures are not the same."
396
- else:
397
- MODELS.append({"model": path, "index": None, "model_name": name_to_save0})
398
- return "Merged, saved as " + name_to_save0
399
-
400
- gr.Markdown(value="Strongly suggested to use only very clean models.")
401
- with gr.Row():
402
- def update():
403
- print(MODELS)
404
- return gr.Dropdown(label="Model A", choices=[model["model_name"] for model in MODELS]), gr.Dropdown(label="Model B", choices=[model["model_name"] for model in MODELS])
405
- refresh_button_fusion = gr.Button("Refresh Models")
406
- ckpt_a = gr.Dropdown(label="Model A", choices=[model["model_name"] for model in MODELS])
407
- ckpt_b = gr.Dropdown(label="Model B", choices=[model["model_name"] for model in MODELS])
408
- refresh_button_fusion.click(update, outputs=[ckpt_a, ckpt_b])
409
- alpha_a = gr.Slider(
410
- minimum=0,
411
- maximum=1,
412
- label="Weight of the first model over the second",
413
- value=0.5,
414
- interactive=True,
415
- )
416
- with gr.Group():
417
- with gr.Row():
418
- sr_ = gr.Radio(
419
- label="Sample rate of both models",
420
- choices=["32k","40k", "48k"],
421
- value="32k",
422
- interactive=True,
423
- )
424
- if_f0_ = gr.Radio(
425
- label="Pitch Guidance",
426
- choices=["Yes", "Nah"],
427
- value="Yes",
428
- interactive=True,
429
- )
430
- info__ = gr.Textbox(
431
- label="Add informations to the model",
432
- value="",
433
- max_lines=8,
434
- interactive=True,
435
- visible=False
436
- )
437
- name_to_save0 = gr.Textbox(
438
- label="Final Model name",
439
- value="",
440
- max_lines=1,
441
- interactive=True,
442
- )
443
- version_2 = gr.Radio(
444
- label="Versions of the models",
445
- choices=["v1", "v2"],
446
- value="v2",
447
- interactive=True,
448
- )
449
- with gr.Group():
450
- with gr.Row():
451
- but6 = gr.Button("Fuse the two models", variant="primary")
452
- info4 = gr.Textbox(label="Output", value="", max_lines=8)
453
- but6.click(
454
- merge,
455
- [ckpt_a,ckpt_b,alpha_a,sr_,if_f0_,info__,name_to_save0,version_2,],info4,api_name="ckpt_merge",)
456
-
457
- with gr.Accordion("Model Quantization", open=False):
458
- gr.Markdown("Quantize the model to a lower precision. - soon™ or never™ 😎")
459
-
460
- with gr.Accordion("Debug", open=False):
461
- def json_to_markdown_table(json_data):
462
- table = "| Key | Value |\n| --- | --- |\n"
463
- for key, value in json_data.items():
464
- table += f"| {key} | {value} |\n"
465
- return table
466
- gr.Markdown("View the models that are currently loaded in the instance.")
467
-
468
- gr.Markdown(json_to_markdown_table({"Models": len(MODELS), "UVR Models": len(UVR_5_MODELS)}))
469
-
470
- gr.Markdown("View the current status of the instance.")
471
- status = {
472
- "Status": "Running", # duh lol
473
- "Models": len(MODELS),
474
- "UVR Models": len(UVR_5_MODELS),
475
- "CPU Usage": f"{psutil.cpu_percent()}%",
476
- "RAM Usage": f"{psutil.virtual_memory().percent}%",
477
- "CPU": f"{cpuinfo.get_cpu_info()['brand_raw']}",
478
- "System Uptime": f"{round(time.time() - psutil.boot_time(), 2)} seconds",
479
- "System Load Average": f"{psutil.getloadavg()}",
480
- "====================": "====================",
481
- "CPU Cores": psutil.cpu_count(),
482
- "CPU Threads": psutil.cpu_count(logical=True),
483
- "RAM Total": f"{round(psutil.virtual_memory().total / 1024**3, 2)} GB",
484
- "RAM Used": f"{round(psutil.virtual_memory().used / 1024**3, 2)} GB",
485
- "CPU Frequency": f"{psutil.cpu_freq().current} MHz",
486
- "====================": "====================",
487
- "GPU": "A100 - Do a request (Inference, you won't see it either way)",
488
- }
489
- gr.Markdown(json_to_markdown_table(status))
490
-
491
- with gr.Tab("Credits"):
492
- gr.Markdown(
493
- """
494
- Ilaria RVC made by [Ilaria](https://huggingface.co/TheStinger) suport her on [ko-fi](https://ko-fi.com/ilariaowo)
495
-
496
- The Inference code is made by [r3gm](https://huggingface.co/r3gm) (his module helped form this space 💖)
497
-
498
- made with ❤️ by [mikus](https://github.com/cappuch) - made the ui!
499
-
500
- ## In loving memory of JLabDX 🕊️
501
- """
502
- )
503
-
504
- demo.queue(api_open=False).launch(show_api=False) # idk ilaria if you want or dont want to
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import random
4
+ import os
5
+ import zipfile
6
+ import librosa
7
+ import time
8
+ from infer_rvc_python import BaseLoader
9
+ from pydub import AudioSegment
10
+ from tts_voice import tts_order_voice
11
+ import edge_tts
12
+ import tempfile
13
+ from audio_separator.separator import Separator
14
+ import model_handler
15
+ import psutil
16
+ import cpuinfo
17
+
18
+ language_dict = tts_order_voice
19
+
20
+ async def text_to_speech_edge(text, language_code):
21
+ voice = language_dict[language_code]
22
+ communicate = edge_tts.Communicate(text, voice)
23
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
+ tmp_path = tmp_file.name
25
+
26
+ await communicate.save(tmp_path)
27
+
28
+ return tmp_path
29
+
30
+ try:
31
+ import spaces
32
+ spaces_status = True
33
+ except ImportError:
34
+ spaces_status = False
35
+
36
+ separator = Separator()
37
+ converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
38
+
39
+ global pth_file
40
+ global index_file
41
+
42
+ pth_file = "model.pth"
43
+ index_file = "model.index"
44
+
45
+ #CONFIGS
46
+ TEMP_DIR = "temp"
47
+ MODEL_PREFIX = "model"
48
+ PITCH_ALGO_OPT = [
49
+ "pm",
50
+ "harvest",
51
+ "crepe",
52
+ "rmvpe",
53
+ "rmvpe+",
54
+ ]
55
+ UVR_5_MODELS = [
56
+ {"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
57
+ {"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
58
+ {"model_name": "Kim Vocal 2", "checkpoint": "Kim_Vocal_2.onnx"},
59
+ {"model_name": "5_HP-Karaoke", "checkpoint": "5_HP-Karaoke-UVR.pth"},
60
+ {"model_name": "UVR-DeNoise by FoxJoy", "checkpoint": "UVR-DeNoise.pth"},
61
+ {"model_name": "UVR-DeEcho-DeReverb by FoxJoy", "checkpoint": "UVR-DeEcho-DeReverb.pth"},
62
+ ]
63
+ MODELS = [
64
+ {"model": "model.pth", "index": "model.index", "model_name": "Test Model"},
65
+ ]
66
+
67
+ os.makedirs(TEMP_DIR, exist_ok=True)
68
+
69
+ def unzip_file(file):
70
+ filename = os.path.basename(file).split(".")[0]
71
+ with zipfile.ZipFile(file, 'r') as zip_ref:
72
+ zip_ref.extractall(os.path.join(TEMP_DIR, filename))
73
+ return True
74
+
75
+
76
+ def progress_bar(total, current):
77
+ return "[" + "=" * int(current / total * 20) + ">" + " " * (20 - int(current / total * 20)) + "] " + str(int(current / total * 100)) + "%"
78
+
79
+ def download_from_url(url, name=None):
80
+ if name is None:
81
+ raise ValueError("The model name must be provided")
82
+ if "/blob/" in url:
83
+ url = url.replace("/blob/", "/resolve/")
84
+ if "huggingface" not in url:
85
+ return ["The URL must be from huggingface", "Failed", "Failed"]
86
+ filename = os.path.join(TEMP_DIR, MODEL_PREFIX + str(random.randint(1, 1000)) + ".zip")
87
+ response = requests.get(url)
88
+ total = int(response.headers.get('content-length', 0))
89
+ if total > 500000000:
90
+
91
+ return ["The file is too large. You can only download files up to 500 MB in size.", "Failed", "Failed"]
92
+ current = 0
93
+ with open(filename, "wb") as f:
94
+ for data in response.iter_content(chunk_size=4096):
95
+ f.write(data)
96
+ current += len(data)
97
+ print(progress_bar(total, current), end="\r") #
98
+
99
+
100
+
101
+ try:
102
+ unzip_file(filename)
103
+ except Exception as e:
104
+ return ["Failed to unzip the file", "Failed", "Failed"]
105
+ unzipped_dir = os.path.join(TEMP_DIR, os.path.basename(filename).split(".")[0])
106
+ pth_files = []
107
+ index_files = []
108
+ for root, dirs, files in os.walk(unzipped_dir):
109
+ for file in files:
110
+ if file.endswith(".pth"):
111
+ pth_files.append(os.path.join(root, file))
112
+ elif file.endswith(".index"):
113
+ index_files.append(os.path.join(root, file))
114
+
115
+ print(pth_files, index_files)
116
+ global pth_file
117
+ global index_file
118
+ pth_file = pth_files[0]
119
+ index_file = index_files[0]
120
+
121
+ print(pth_file)
122
+ print(index_file)
123
+
124
+ MODELS.append({"model": pth_file, "index": index_file, "model_name": name})
125
+ return ["Downloaded as " + name, pth_files[0], index_files[0]]
126
+
127
+ def inference(audio, model_name):
128
+ output_data = inf_handler(audio, model_name)
129
+ vocals = output_data[0]
130
+ inst = output_data[1]
131
+
132
+ return vocals, inst
133
+
134
+ if spaces_status:
135
+ @spaces.GPU()
136
+ def convert_now(audio_files, random_tag, converter):
137
+ return converter(
138
+ audio_files,
139
+ random_tag,
140
+ overwrite=False,
141
+ parallel_workers=8
142
+ )
143
+
144
+
145
+ else:
146
+ def convert_now(audio_files, random_tag, converter):
147
+ return converter(
148
+ audio_files,
149
+ random_tag,
150
+ overwrite=False,
151
+ parallel_workers=8
152
+ )
153
+
154
+ def calculate_remaining_time(epochs, seconds_per_epoch):
155
+ total_seconds = epochs * seconds_per_epoch
156
+
157
+ hours = total_seconds // 3600
158
+ minutes = (total_seconds % 3600) // 60
159
+ seconds = total_seconds % 60
160
+
161
+ if hours == 0:
162
+ return f"{int(minutes)} minutes"
163
+ elif hours == 1:
164
+ return f"{int(hours)} hour and {int(minutes)} minutes"
165
+ else:
166
+ return f"{int(hours)} hours and {int(minutes)} minutes"
167
+
168
+ def inf_handler(audio, model_name):
169
+ model_found = False
170
+ for model_info in UVR_5_MODELS:
171
+ if model_info["model_name"] == model_name:
172
+ separator.load_model(model_info["checkpoint"])
173
+ model_found = True
174
+ break
175
+ if not model_found:
176
+ separator.load_model()
177
+ output_files = separator.separate(audio)
178
+ vocals = output_files[0]
179
+ inst = output_files[1]
180
+ return vocals, inst
181
+
182
+
183
+ def run(
184
+ model,
185
+ audio_files,
186
+ pitch_alg,
187
+ pitch_lvl,
188
+ index_inf,
189
+ r_m_f,
190
+ e_r,
191
+ c_b_p,
192
+ ):
193
+ if not audio_files:
194
+ raise ValueError("The audio pls")
195
+
196
+ if isinstance(audio_files, str):
197
+ audio_files = [audio_files]
198
+
199
+ try:
200
+ duration_base = librosa.get_duration(filename=audio_files[0])
201
+ print("Duration:", duration_base)
202
+ except Exception as e:
203
+ print(e)
204
+
205
+ random_tag = "USER_"+str(random.randint(10000000, 99999999))
206
+
207
+ file_m = model
208
+ print("File model:", file_m)
209
+
210
+ # get from MODELS
211
+ for model in MODELS:
212
+ if model["model_name"] == file_m:
213
+ print(model)
214
+ file_m = model["model"]
215
+ file_index = model["index"]
216
+ break
217
+
218
+ if not file_m.endswith(".pth"):
219
+ raise ValueError("The model file must be a .pth file")
220
+
221
+
222
+ print("Random tag:", random_tag)
223
+ print("File model:", file_m)
224
+ print("Pitch algorithm:", pitch_alg)
225
+ print("Pitch level:", pitch_lvl)
226
+ print("File index:", file_index)
227
+ print("Index influence:", index_inf)
228
+ print("Respiration median filtering:", r_m_f)
229
+ print("Envelope ratio:", e_r)
230
+
231
+ converter.apply_conf(
232
+ tag=random_tag,
233
+ file_model=file_m,
234
+ pitch_algo=pitch_alg,
235
+ pitch_lvl=pitch_lvl,
236
+ file_index=file_index,
237
+ index_influence=index_inf,
238
+ respiration_median_filtering=r_m_f,
239
+ envelope_ratio=e_r,
240
+ consonant_breath_protection=c_b_p,
241
+ resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
242
+ )
243
+ time.sleep(0.1)
244
+
245
+ result = convert_now(audio_files, random_tag, converter)
246
+ print("Result:", result)
247
+
248
+ return result[0]
249
+
250
+ def upload_model(index_file, pth_file, model_name):
251
+ pth_file = pth_file.name
252
+ index_file = index_file.name
253
+ MODELS.append({"model": pth_file, "index": index_file, "model_name": model_name})
254
+ return "Uploaded!"
255
+
256
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="rose"), title="Ilaria RVC 💖") as demo:
257
+ gr.Markdown("## Ilaria RVC 💖")
258
+ with gr.Tab("Inference"):
259
+ sound_gui = gr.Audio(value=None,type="filepath",autoplay=False,visible=True,)
260
+ def update():
261
+ print(MODELS)
262
+ return gr.Dropdown(label="Model",choices=[model["model_name"] for model in MODELS],visible=True,interactive=True, value=MODELS[0]["model_name"],)
263
+ with gr.Row():
264
+ models_dropdown = gr.Dropdown(label="Model",choices=[model["model_name"] for model in MODELS],visible=True,interactive=True, value=MODELS[0]["model_name"],)
265
+ refresh_button = gr.Button("Refresh Models")
266
+ refresh_button.click(update, outputs=[models_dropdown])
267
+
268
+ with gr.Accordion("Ilaria TTS", open=False):
269
+ text_tts = gr.Textbox(label="Text", placeholder="Hello!", lines=3, interactive=True,)
270
+ dropdown_tts = gr.Dropdown(label="Language and Model",choices=list(language_dict.keys()),interactive=True, value=list(language_dict.keys())[0])
271
+
272
+ button_tts = gr.Button("Speak", variant="primary",)
273
+ button_tts.click(text_to_speech_edge, inputs=[text_tts, dropdown_tts], outputs=[sound_gui])
274
+
275
+ with gr.Accordion("Settings", open=False):
276
+ pitch_algo_conf = gr.Dropdown(PITCH_ALGO_OPT,value=PITCH_ALGO_OPT[4],label="Pitch algorithm",visible=True,interactive=True,)
277
+ pitch_lvl_conf = gr.Slider(label="Pitch level (lower -> 'male' while higher -> 'female')",minimum=-24,maximum=24,step=1,value=0,visible=True,interactive=True,)
278
+ index_inf_conf = gr.Slider(minimum=0,maximum=1,label="Index influence -> How much accent is applied",value=0.75,)
279
+ respiration_filter_conf = gr.Slider(minimum=0,maximum=7,label="Respiration median filtering",value=3,step=1,interactive=True,)
280
+ envelope_ratio_conf = gr.Slider(minimum=0,maximum=1,label="Envelope ratio",value=0.25,interactive=True,)
281
+ consonant_protec_conf = gr.Slider(minimum=0,maximum=0.5,label="Consonant breath protection",value=0.5,interactive=True,)
282
+
283
+ button_conf = gr.Button("Convert",variant="primary",)
284
+ output_conf = gr.Audio(type="filepath",label="Output",)
285
+
286
+ button_conf.click(lambda :None, None, output_conf)
287
+ button_conf.click(
288
+ run,
289
+ inputs=[
290
+ models_dropdown,
291
+ sound_gui,
292
+ pitch_algo_conf,
293
+ pitch_lvl_conf,
294
+ index_inf_conf,
295
+ respiration_filter_conf,
296
+ envelope_ratio_conf,
297
+ consonant_protec_conf,
298
+ ],
299
+ outputs=[output_conf],
300
+ )
301
+
302
+
303
+ with gr.Tab("Model Loader (Download and Upload)"):
304
+ with gr.Accordion("Model Downloader", open=False):
305
+ gr.Markdown(
306
+ "Download the model from the following URL and upload it here. (Huggingface RVC model)"
307
+ )
308
+ model = gr.Textbox(lines=1, label="Model URL")
309
+ name = gr.Textbox(lines=1, label="Model Name", placeholder="Model Name")
310
+ download_button = gr.Button("Download Model")
311
+ status = gr.Textbox(lines=1, label="Status", placeholder="Waiting....", interactive=False)
312
+ model_pth = gr.Textbox(lines=1, label="Model pth file", placeholder="Waiting....", interactive=False)
313
+ index_pth = gr.Textbox(lines=1, label="Index pth file", placeholder="Waiting....", interactive=False)
314
+ download_button.click(download_from_url, [model, name], outputs=[status, model_pth, index_pth])
315
+ with gr.Accordion("Upload A Model", open=False):
316
+ index_file_upload = gr.File(label="Index File (.index)")
317
+ pth_file_upload = gr.File(label="Model File (.pth)")
318
+
319
+ model_name = gr.Textbox(label="Model Name", placeholder="Model Name")
320
+ upload_button = gr.Button("Upload Model")
321
+ upload_status = gr.Textbox(lines=1, label="Status", placeholder="Waiting....", interactive=False)
322
+
323
+ upload_button.click(upload_model, [index_file_upload, pth_file_upload, model_name], upload_status)
324
+
325
+
326
+ with gr.Tab("Vocal Separator (UVR)"):
327
+ gr.Markdown("Separate vocals and instruments from an audio file using UVR models. - This is only on CPU due to ZeroGPU being ZeroGPU :(")
328
+ uvr5_audio_file = gr.Audio(label="Audio File",type="filepath")
329
+
330
+ with gr.Row():
331
+ uvr5_model = gr.Dropdown(label="Model", choices=[model["model_name"] for model in UVR_5_MODELS])
332
+ uvr5_button = gr.Button("Separate Vocals", variant="primary",)
333
+
334
+ uvr5_output_voc = gr.Audio(type="filepath", label="Output 1",)
335
+ uvr5_output_inst = gr.Audio(type="filepath", label="Output 2",)
336
+
337
+ uvr5_button.click(inference, [uvr5_audio_file, uvr5_model], [uvr5_output_voc, uvr5_output_inst])
338
+
339
+ with gr.Tab("Extra"):
340
+ with gr.Accordion("Model Information", open=False):
341
+ def json_to_markdown_table(json_data):
342
+ table = "| Key | Value |\n| --- | --- |\n"
343
+ for key, value in json_data.items():
344
+ table += f"| {key} | {value} |\n"
345
+ return table
346
+ def model_info(name):
347
+ for model in MODELS:
348
+ if model["model_name"] == name:
349
+ print(model["model"])
350
+ info = model_handler.model_info(model["model"])
351
+ info2 = {
352
+ "Model Name": model["model_name"],
353
+ "Model Config": info['config'],
354
+ "Epochs Trained": info['epochs'],
355
+ "Sample Rate": info['sr'],
356
+ "Pitch Guidance": info['f0'],
357
+ "Model Precision": info['size'],
358
+ }
359
+ return gr.Markdown(json_to_markdown_table(info2))
360
+
361
+ return "Model not found"
362
+ def update():
363
+ print(MODELS)
364
+ return gr.Dropdown(label="Model", choices=[model["model_name"] for model in MODELS])
365
+ with gr.Row():
366
+ model_info_dropdown = gr.Dropdown(label="Model", choices=[model["model_name"] for model in MODELS])
367
+ refresh_button = gr.Button("Refresh Models")
368
+ refresh_button.click(update, outputs=[model_info_dropdown])
369
+ model_info_button = gr.Button("Get Model Information")
370
+ model_info_output = gr.Textbox(value="Waiting...",label="Output", interactive=False)
371
+ model_info_button.click(model_info, [model_info_dropdown], [model_info_output])
372
+
373
+
374
+
375
+ with gr.Accordion("Training Time Calculator", open=False):
376
+ with gr.Column():
377
+ epochs_input = gr.Number(label="Number of Epochs")
378
+ seconds_input = gr.Number(label="Seconds per Epoch")
379
+ calculate_button = gr.Button("Calculate Time Remaining")
380
+ remaining_time_output = gr.Textbox(label="Remaining Time", interactive=False)
381
+
382
+ calculate_button.click(calculate_remaining_time,inputs=[epochs_input, seconds_input],outputs=[remaining_time_output])
383
+
384
+ with gr.Accordion("Model Fusion", open=False):
385
+ with gr.Group():
386
+ def merge(ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0, version_2):
387
+ for model in MODELS:
388
+ if model["model_name"] == ckpt_a:
389
+ ckpt_a = model["model"]
390
+ if model["model_name"] == ckpt_b:
391
+ ckpt_b = model["model"]
392
+
393
+ path = model_handler.merge(ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0, version_2)
394
+ if path == "Fail to merge the models. The model architectures are not the same.":
395
+ return "Fail to merge the models. The model architectures are not the same."
396
+ else:
397
+ MODELS.append({"model": path, "index": None, "model_name": name_to_save0})
398
+ return "Merged, saved as " + name_to_save0
399
+
400
+ gr.Markdown(value="Strongly suggested to use only very clean models.")
401
+ with gr.Row():
402
+ def update():
403
+ print(MODELS)
404
+ return gr.Dropdown(label="Model A", choices=[model["model_name"] for model in MODELS]), gr.Dropdown(label="Model B", choices=[model["model_name"] for model in MODELS])
405
+ refresh_button_fusion = gr.Button("Refresh Models")
406
+ ckpt_a = gr.Dropdown(label="Model A", choices=[model["model_name"] for model in MODELS])
407
+ ckpt_b = gr.Dropdown(label="Model B", choices=[model["model_name"] for model in MODELS])
408
+ refresh_button_fusion.click(update, outputs=[ckpt_a, ckpt_b])
409
+ alpha_a = gr.Slider(
410
+ minimum=0,
411
+ maximum=1,
412
+ label="Weight of the first model over the second",
413
+ value=0.5,
414
+ interactive=True,
415
+ )
416
+ with gr.Group():
417
+ with gr.Row():
418
+ sr_ = gr.Radio(
419
+ label="Sample rate of both models",
420
+ choices=["32k","40k", "48k"],
421
+ value="32k",
422
+ interactive=True,
423
+ )
424
+ if_f0_ = gr.Radio(
425
+ label="Pitch Guidance",
426
+ choices=["Yes", "Nah"],
427
+ value="Yes",
428
+ interactive=True,
429
+ )
430
+ info__ = gr.Textbox(
431
+ label="Add informations to the model",
432
+ value="",
433
+ max_lines=8,
434
+ interactive=True,
435
+ visible=False
436
+ )
437
+ name_to_save0 = gr.Textbox(
438
+ label="Final Model name",
439
+ value="",
440
+ max_lines=1,
441
+ interactive=True,
442
+ )
443
+ version_2 = gr.Radio(
444
+ label="Versions of the models",
445
+ choices=["v1", "v2"],
446
+ value="v2",
447
+ interactive=True,
448
+ )
449
+ with gr.Group():
450
+ with gr.Row():
451
+ but6 = gr.Button("Fuse the two models", variant="primary")
452
+ info4 = gr.Textbox(label="Output", value="", max_lines=8)
453
+ but6.click(
454
+ merge,
455
+ [ckpt_a,ckpt_b,alpha_a,sr_,if_f0_,info__,name_to_save0,version_2,],info4,api_name="ckpt_merge",)
456
+
457
+ with gr.Accordion("Model Quantization", open=False):
458
+ gr.Markdown("Quantize the model to a lower precision. - soon™ or never™ 😎")
459
+
460
+ with gr.Accordion("Debug", open=False):
461
+ def json_to_markdown_table(json_data):
462
+ table = "| Key | Value |\n| --- | --- |\n"
463
+ for key, value in json_data.items():
464
+ table += f"| {key} | {value} |\n"
465
+ return table
466
+ gr.Markdown("View the models that are currently loaded in the instance.")
467
+
468
+ gr.Markdown(json_to_markdown_table({"Models": len(MODELS), "UVR Models": len(UVR_5_MODELS)}))
469
+
470
+ gr.Markdown("View the current status of the instance.")
471
+ status = {
472
+ "Status": "Running", # duh lol
473
+ "Models": len(MODELS),
474
+ "UVR Models": len(UVR_5_MODELS),
475
+ "CPU Usage": f"{psutil.cpu_percent()}%",
476
+ "RAM Usage": f"{psutil.virtual_memory().percent}%",
477
+ "CPU": f"{cpuinfo.get_cpu_info()['brand_raw']}",
478
+ "System Uptime": f"{round(time.time() - psutil.boot_time(), 2)} seconds",
479
+ "System Load Average": f"{psutil.getloadavg()}",
480
+ "====================": "====================",
481
+ "CPU Cores": psutil.cpu_count(),
482
+ "CPU Threads": psutil.cpu_count(logical=True),
483
+ "RAM Total": f"{round(psutil.virtual_memory().total / 1024**3, 2)} GB",
484
+ "RAM Used": f"{round(psutil.virtual_memory().used / 1024**3, 2)} GB",
485
+ "CPU Frequency": f"{psutil.cpu_freq().current} MHz",
486
+ "====================": "====================",
487
+ "GPU": "A100 - Do a request (Inference, you won't see it either way)",
488
+ }
489
+ gr.Markdown(json_to_markdown_table(status))
490
+
491
+ with gr.Tab("Credits"):
492
+ gr.Markdown(
493
+ """
494
+ Ilaria RVC made by [Ilaria](https://huggingface.co/TheStinger) suport her on [ko-fi](https://ko-fi.com/ilariaowo)
495
+
496
+ The Inference code is made by [r3gm](https://huggingface.co/r3gm) (his module helped form this space 💖)
497
+
498
+ made with ❤️ by [mikus](https://github.com/cappuch) - made the ui!
499
+
500
+ ## In loving memory of JLabDX 🕊️
501
+ """
502
+ )
503
+ with gr.Tab(("")):
504
+ gr.Markdown('''
505
+ ![ilaria](https://i.ytimg.com/vi/5PWqt2Wg-us/maxresdefault.jpg)
506
+ ''')
507
+
508
+ demo.queue(api_open=False).launch(show_api=False)