Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +54 -169
  3. pre-requirements.txt +0 -2
  4. requirements.txt +3 -8
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ⚡
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.43.1
8
  app_file: app.py
9
  license: mit
10
  pinned: true
 
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.28.3
8
  app_file: app.py
9
  license: mit
10
  pinned: true
app.py CHANGED
@@ -6,7 +6,7 @@ import random
6
  import logging
7
  import time
8
  import soundfile as sf
9
- from infer_rvc_python.main import download_manager, load_hu_bert, Config
10
  import zipfile
11
  import edge_tts
12
  import asyncio
@@ -21,54 +21,14 @@ import numpy as np
21
  import urllib.request
22
  import shutil
23
  import threading
24
- import argparse
25
- import sys
26
-
27
- parser = argparse.ArgumentParser(description="Run the app with optional sharing")
28
- parser.add_argument(
29
- '--share',
30
- action='store_true',
31
- help='Enable sharing mode'
32
- )
33
- parser.add_argument(
34
- '--theme',
35
- type=str,
36
- default="aliabid94/new-theme",
37
- help='Set the theme (default: aliabid94/new-theme)'
38
- )
39
- args = parser.parse_args()
40
-
41
- IS_COLAB = True if ('google.colab' in sys.modules or args.share) else False
42
- IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
43
 
44
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
45
 
46
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
47
- converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
48
-
49
- test_model = "https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
50
- test_names = ["model.pth", "model.index"]
51
-
52
- for url, filename in zip(test_model.split(", "), test_names):
53
- try:
54
- download_manager(
55
- url=url,
56
- path=".",
57
- extension="",
58
- overwrite=False,
59
- progress=True,
60
- )
61
- if not os.path.isfile(filename):
62
- raise FileNotFoundError
63
- except Exception:
64
- with open(filename, "wb") as f:
65
- pass
66
 
67
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
68
- description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately." if IS_ZERO_GPU else ""
69
- RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
70
- theme = args.theme
71
- delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
72
 
73
  PITCH_ALGO_OPT = [
74
  "pm",
@@ -79,26 +39,6 @@ PITCH_ALGO_OPT = [
79
  ]
80
 
81
 
82
- async def get_voices_list(proxy=None):
83
- """Print all available voices."""
84
- from edge_tts import list_voices
85
- voices = await list_voices(proxy=proxy)
86
- voices = sorted(voices, key=lambda voice: voice["ShortName"])
87
-
88
- table = [
89
- {
90
- "ShortName": voice["ShortName"],
91
- "Gender": voice["Gender"],
92
- "ContentCategories": ", ".join(voice["VoiceTag"]["ContentCategories"]),
93
- "VoicePersonalities": ", ".join(voice["VoiceTag"]["VoicePersonalities"]),
94
- "FriendlyName": voice["FriendlyName"],
95
- }
96
- for voice in voices
97
- ]
98
-
99
- return table
100
-
101
-
102
  def find_files(directory):
103
  file_paths = []
104
  for filename in os.listdir(directory):
@@ -166,24 +106,19 @@ def find_my_model(a_, b_):
166
  return model, index
167
 
168
 
169
- def ensure_valid_file(url):
 
170
  if "huggingface" not in url:
171
  raise ValueError("Only downloads from Hugging Face are allowed")
172
 
173
  try:
174
- request = urllib.request.Request(url, method="HEAD")
175
- with urllib.request.urlopen(request) as response:
176
- content_length = response.headers.get("Content-Length")
177
-
178
- if content_length is None:
179
- raise ValueError("No Content-Length header found")
180
 
181
  file_size = int(content_length)
182
- # print("debug", url, file_size)
183
- if file_size > 900000000 and IS_ZERO_GPU:
184
- raise ValueError("The file is too large. Max allowed is 900 MB.")
185
-
186
- return file_size
187
 
188
  except Exception as e:
189
  raise e
@@ -195,13 +130,13 @@ def clear_files(directory):
195
  shutil.rmtree(directory)
196
 
197
 
198
- def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
199
 
200
  if not url_data:
201
  return None, None
202
 
203
  if "," in url_data:
204
- a_, b_ = url_data.split(",")
205
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
206
  else:
207
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
@@ -212,9 +147,12 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
212
  os.makedirs(directory, exist_ok=True)
213
 
214
  try:
 
 
 
 
215
  valid_url = [a_] if not b_ else [a_, b_]
216
  for link in valid_url:
217
- ensure_valid_file(link)
218
  download_manager(
219
  url=link,
220
  path=directory,
@@ -256,13 +194,13 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
256
  t.start()
257
 
258
 
259
- def add_audio_effects(audio_list, type_output):
260
  print("Audio effects")
261
 
262
  result = []
263
  for audio_path in audio_list:
264
  try:
265
- output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
266
 
267
  # Initialize audio effects plugins
268
  board = Pedalboard(
@@ -273,23 +211,13 @@ def add_audio_effects(audio_list, type_output):
273
  ]
274
  )
275
 
276
- # Temporary WAV to hold processed data before exporting
277
- temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
278
-
279
  with AudioFile(audio_path) as f:
280
- with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
 
281
  while f.tell() < f.frames:
282
  chunk = f.read(int(f.samplerate))
283
  effected = board(chunk, f.samplerate, reset=False)
284
  o.write(effected)
285
-
286
- # Convert with pydub to desired output type
287
- audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
288
- audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
289
-
290
- # Clean up temp file
291
- os.remove(temp_wav)
292
-
293
  result.append(output_path)
294
  except Exception as e:
295
  traceback.print_exc()
@@ -299,13 +227,13 @@ def add_audio_effects(audio_list, type_output):
299
  return result
300
 
301
 
302
- def apply_noisereduce(audio_list, type_output):
303
  # https://github.com/sa-if/Audio-Denoiser
304
  print("Noice reduce")
305
 
306
  result = []
307
  for audio_path in audio_list:
308
- out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
309
 
310
  try:
311
  # Load audio file
@@ -326,7 +254,7 @@ def apply_noisereduce(audio_list, type_output):
326
  )
327
 
328
  # Save reduced audio to file
329
- reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
330
  result.append(out_path)
331
 
332
  except Exception as e:
@@ -338,17 +266,13 @@ def apply_noisereduce(audio_list, type_output):
338
 
339
 
340
  @spaces.GPU()
341
- def convert_now(audio_files, random_tag, converter, type_output, steps):
342
- for step in range(steps):
343
- audio_files = converter(
344
- audio_files,
345
- random_tag,
346
- overwrite=False,
347
- parallel_workers=(2 if IS_COLAB else 8),
348
- type_output=type_output,
349
- )
350
-
351
- return audio_files
352
 
353
 
354
  def run(
@@ -363,8 +287,6 @@ def run(
363
  c_b_p,
364
  active_noise_reduce,
365
  audio_effects,
366
- type_output,
367
- steps,
368
  ):
369
  if not audio_files:
370
  raise ValueError("The audio pls")
@@ -394,17 +316,17 @@ def run(
394
  respiration_median_filtering=r_m_f,
395
  envelope_ratio=e_r,
396
  consonant_breath_protection=c_b_p,
397
- resample_sr=0,
398
  )
399
  time.sleep(0.1)
400
 
401
- result = convert_now(audio_files, random_tag, converter, type_output, steps)
402
 
403
  if active_noise_reduce:
404
- result = apply_noisereduce(result, type_output)
405
 
406
  if audio_effects:
407
- result = add_audio_effects(result, type_output)
408
 
409
  return result
410
 
@@ -563,30 +485,10 @@ def sound_gui():
563
  type="filepath",
564
  # format="mp3",
565
  autoplay=True,
566
- visible=True,
567
- interactive=False,
568
- elem_id="audio_tts",
569
- )
570
-
571
-
572
- def steps_conf():
573
- return gr.Slider(
574
- minimum=1,
575
- maximum=3,
576
- label="Steps",
577
- value=1,
578
- step=1,
579
- interactive=True,
580
  )
581
 
582
 
583
- def format_output_gui():
584
- return gr.Dropdown(
585
- label="Format output:",
586
- choices=["wav", "mp3", "flac"],
587
- value="wav",
588
- )
589
-
590
  def denoise_conf():
591
  return gr.Checkbox(
592
  False,
@@ -669,18 +571,9 @@ def show_components_down(value_active):
669
  visible=value_active
670
  )
671
 
672
- CSS = """
673
- #audio_tts {
674
- visibility: hidden; /* invisible but still takes space */
675
- height: 0px;
676
- width: 0px;
677
- max-width: 0px;
678
- max-height: 0px;
679
- }
680
- """
681
 
682
  def get_gui(theme):
683
- with gr.Blocks(theme=theme, css=CSS, fill_width=True, fill_height=False, delete_cache=delete_cache_time) as app:
684
  gr.Markdown(title)
685
  gr.Markdown(description)
686
 
@@ -715,7 +608,7 @@ def get_gui(theme):
715
 
716
  down_active_gui = down_active_conf()
717
  down_info = gr.Markdown(
718
- f"Provide a link to a zip file, like this one: `https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `{test_model}`",
719
  visible=False
720
  )
721
  with gr.Row():
@@ -741,20 +634,17 @@ def get_gui(theme):
741
  [model, indx]
742
  )
743
 
744
- with gr.Accordion(label="Advanced settings", open=False):
745
- algo = pitch_algo_conf()
746
- algo_lvl = pitch_lvl_conf()
747
- indx_inf = index_inf_conf()
748
- res_fc = respiration_filter_conf()
749
- envel_r = envelope_ratio_conf()
750
- const = consonant_protec_conf()
751
- steps_gui = steps_conf()
752
- format_out = format_output_gui()
753
- with gr.Row():
754
- with gr.Column():
755
- with gr.Row():
756
- denoise_gui = denoise_conf()
757
- effects_gui = effects_conf()
758
  button_base = button_conf()
759
  output_base = output_conf()
760
 
@@ -772,8 +662,6 @@ def get_gui(theme):
772
  const,
773
  denoise_gui,
774
  effects_gui,
775
- format_out,
776
- steps_gui,
777
  ],
778
  outputs=[output_base],
779
  )
@@ -830,17 +718,14 @@ def get_gui(theme):
830
  outputs=[output_base],
831
  cache_examples=False,
832
  )
833
- gr.Markdown(RESOURCES)
834
 
835
  return app
836
 
837
 
838
  if __name__ == "__main__":
839
- tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
840
- voices = sorted([
841
- (" - ".join(reversed(v["FriendlyName"].split("-"))).replace("Microsoft ", "").replace("Online (Natural)", f"({v['Gender']})").strip(), f"{v['ShortName']}-{v['Gender']}")
842
- for v in tts_voice_list
843
- ])
844
 
845
  app = get_gui(theme)
846
 
@@ -848,9 +733,9 @@ if __name__ == "__main__":
848
 
849
  app.launch(
850
  max_threads=40,
851
- share=IS_COLAB,
852
  show_error=True,
853
  quiet=False,
854
- debug=IS_COLAB,
855
- ssr_mode=False,
856
  )
 
6
  import logging
7
  import time
8
  import soundfile as sf
9
+ from infer_rvc_python.main import download_manager
10
  import zipfile
11
  import edge_tts
12
  import asyncio
 
21
  import urllib.request
22
  import shutil
23
  import threading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
26
 
27
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
30
+ description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately."
31
+ theme = "aliabid94/new-theme"
 
 
32
 
33
  PITCH_ALGO_OPT = [
34
  "pm",
 
39
  ]
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def find_files(directory):
43
  file_paths = []
44
  for filename in os.listdir(directory):
 
106
  return model, index
107
 
108
 
109
+ def get_file_size(url):
110
+
111
  if "huggingface" not in url:
112
  raise ValueError("Only downloads from Hugging Face are allowed")
113
 
114
  try:
115
+ with urllib.request.urlopen(url) as response:
116
+ info = response.info()
117
+ content_length = info.get("Content-Length")
 
 
 
118
 
119
  file_size = int(content_length)
120
+ if file_size > 500000000:
121
+ raise ValueError("The file is too large. You can only download files up to 500 MB in size.")
 
 
 
122
 
123
  except Exception as e:
124
  raise e
 
130
  shutil.rmtree(directory)
131
 
132
 
133
+ def get_my_model(url_data):
134
 
135
  if not url_data:
136
  return None, None
137
 
138
  if "," in url_data:
139
+ a_, b_ = url_data.split()
140
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
141
  else:
142
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
 
147
  os.makedirs(directory, exist_ok=True)
148
 
149
  try:
150
+ get_file_size(a_)
151
+ if b_:
152
+ get_file_size(b_)
153
+
154
  valid_url = [a_] if not b_ else [a_, b_]
155
  for link in valid_url:
 
156
  download_manager(
157
  url=link,
158
  path=directory,
 
194
  t.start()
195
 
196
 
197
+ def add_audio_effects(audio_list):
198
  print("Audio effects")
199
 
200
  result = []
201
  for audio_path in audio_list:
202
  try:
203
+ output_path = f'{os.path.splitext(audio_path)[0]}_effects.wav'
204
 
205
  # Initialize audio effects plugins
206
  board = Pedalboard(
 
211
  ]
212
  )
213
 
 
 
 
214
  with AudioFile(audio_path) as f:
215
+ with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
216
+ # Read one second of audio at a time, until the file is empty:
217
  while f.tell() < f.frames:
218
  chunk = f.read(int(f.samplerate))
219
  effected = board(chunk, f.samplerate, reset=False)
220
  o.write(effected)
 
 
 
 
 
 
 
 
221
  result.append(output_path)
222
  except Exception as e:
223
  traceback.print_exc()
 
227
  return result
228
 
229
 
230
+ def apply_noisereduce(audio_list):
231
  # https://github.com/sa-if/Audio-Denoiser
232
  print("Noice reduce")
233
 
234
  result = []
235
  for audio_path in audio_list:
236
+ out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
237
 
238
  try:
239
  # Load audio file
 
254
  )
255
 
256
  # Save reduced audio to file
257
+ reduced_audio.export(out_path, format="wav")
258
  result.append(out_path)
259
 
260
  except Exception as e:
 
266
 
267
 
268
  @spaces.GPU()
269
+ def convert_now(audio_files, random_tag, converter):
270
+ return converter(
271
+ audio_files,
272
+ random_tag,
273
+ overwrite=False,
274
+ parallel_workers=8
275
+ )
 
 
 
 
276
 
277
 
278
  def run(
 
287
  c_b_p,
288
  active_noise_reduce,
289
  audio_effects,
 
 
290
  ):
291
  if not audio_files:
292
  raise ValueError("The audio pls")
 
316
  respiration_median_filtering=r_m_f,
317
  envelope_ratio=e_r,
318
  consonant_breath_protection=c_b_p,
319
+ resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
320
  )
321
  time.sleep(0.1)
322
 
323
+ result = convert_now(audio_files, random_tag, converter)
324
 
325
  if active_noise_reduce:
326
+ result = apply_noisereduce(result)
327
 
328
  if audio_effects:
329
+ result = add_audio_effects(result)
330
 
331
  return result
332
 
 
485
  type="filepath",
486
  # format="mp3",
487
  autoplay=True,
488
+ visible=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  )
490
 
491
 
 
 
 
 
 
 
 
492
  def denoise_conf():
493
  return gr.Checkbox(
494
  False,
 
571
  visible=value_active
572
  )
573
 
 
 
 
 
 
 
 
 
 
574
 
575
  def get_gui(theme):
576
+ with gr.Blocks(theme=theme, delete_cache=(3200, 3200)) as app:
577
  gr.Markdown(title)
578
  gr.Markdown(description)
579
 
 
608
 
609
  down_active_gui = down_active_conf()
610
  down_info = gr.Markdown(
611
+ "Provide a link to a zip file, like this one: `https://huggingface.co/mrmocciai/Models/resolve/main/Genshin%20Impact/ayaka-v2.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `https://huggingface.co/sail-rvc/ayaka-jp/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/ayaka-jp/resolve/main/model.index?download=true`",
612
  visible=False
613
  )
614
  with gr.Row():
 
634
  [model, indx]
635
  )
636
 
637
+ algo = pitch_algo_conf()
638
+ algo_lvl = pitch_lvl_conf()
639
+ indx_inf = index_inf_conf()
640
+ res_fc = respiration_filter_conf()
641
+ envel_r = envelope_ratio_conf()
642
+ const = consonant_protec_conf()
643
+ with gr.Row():
644
+ with gr.Column():
645
+ with gr.Row():
646
+ denoise_gui = denoise_conf()
647
+ effects_gui = effects_conf()
 
 
 
648
  button_base = button_conf()
649
  output_base = output_conf()
650
 
 
662
  const,
663
  denoise_gui,
664
  effects_gui,
 
 
665
  ],
666
  outputs=[output_base],
667
  )
 
718
  outputs=[output_base],
719
  cache_examples=False,
720
  )
 
721
 
722
  return app
723
 
724
 
725
  if __name__ == "__main__":
726
+
727
+ tts_voice_list = asyncio.new_event_loop().run_until_complete(edge_tts.list_voices())
728
+ voices = sorted([f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list])
 
 
729
 
730
  app = get_gui(theme)
731
 
 
733
 
734
  app.launch(
735
  max_threads=40,
736
+ share=False,
737
  show_error=True,
738
  quiet=False,
739
+ debug=False,
740
+ allowed_paths=["./downloads/"],
741
  )
pre-requirements.txt DELETED
@@ -1,2 +0,0 @@
1
- pip==23.0.1
2
- Setuptools<=80.6.0
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,6 @@
1
- torch==2.5.1
2
- infer-rvc-python
3
  edge-tts
4
  pedalboard
5
  noisereduce
6
- numpy==1.23.5
7
- transformers<=4.48.3
8
- pydantic==2.10.6
9
- gradio==5.43.1
10
- spaces
11
- matplotlib-inline
 
1
+ torch==2.2.0
2
+ infer-rvc-python==1.1.0
3
  edge-tts
4
  pedalboard
5
  noisereduce
6
+ numpy==1.23.5