r3gm commited on
Commit
e5bb6b4
1 Parent(s): 49bcbba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -2
app.py CHANGED
@@ -8,6 +8,16 @@ import time
8
  import soundfile as sf
9
  from infer_rvc_python.main import download_manager
10
  import zipfile
 
 
 
 
 
 
 
 
 
 
11
 
12
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
13
 
@@ -93,6 +103,77 @@ def find_my_model(a_, b_):
93
  return model, index
94
 
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  @spaces.GPU()
97
  def convert_now(audio_files, random_tag, converter):
98
  return converter(
@@ -113,6 +194,8 @@ def run(
113
  r_m_f,
114
  e_r,
115
  c_b_p,
 
 
116
  ):
117
  if not audio_files:
118
  raise ValueError("The audio pls")
@@ -138,9 +221,17 @@ def run(
138
  consonant_breath_protection=c_b_p,
139
  resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
140
  )
141
- time.sleep(0.3)
142
 
143
- return convert_now(audio_files, random_tag, converter)
 
 
 
 
 
 
 
 
144
 
145
 
146
  def audio_conf():
@@ -245,12 +336,142 @@ def output_conf():
245
  )
246
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  def get_gui(theme):
249
  with gr.Blocks(theme=theme) as app:
250
  gr.Markdown(title)
251
  gr.Markdown(description)
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  aud = audio_conf()
 
 
 
 
 
 
 
 
254
  with gr.Column():
255
  with gr.Row():
256
  model = model_conf()
@@ -261,6 +482,11 @@ def get_gui(theme):
261
  res_fc = respiration_filter_conf()
262
  envel_r = envelope_ratio_conf()
263
  const = consonant_protec_conf()
 
 
 
 
 
264
  button_base = button_conf()
265
  output_base = output_conf()
266
 
@@ -276,6 +502,8 @@ def get_gui(theme):
276
  res_fc,
277
  envel_r,
278
  const,
 
 
279
  ],
280
  outputs=[output_base],
281
  )
@@ -339,6 +567,9 @@ def get_gui(theme):
339
 
340
  if __name__ == "__main__":
341
 
 
 
 
342
  app = get_gui(theme)
343
 
344
  app.queue(default_concurrency_limit=40)
 
8
  import soundfile as sf
9
  from infer_rvc_python.main import download_manager
10
  import zipfile
11
+ import edge_tts
12
+ import asyncio
13
+ import librosa
14
+ import traceback
15
+ import soundfile as sf
16
+ from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
17
+ from pedalboard.io import AudioFile
18
+ from pydub import AudioSegment
19
+ import noisereduce as nr
20
+ import numpy as np
21
 
22
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
23
 
 
103
  return model, index
104
 
105
 
106
+ def add_audio_effects(audio_list):
107
+ print("Audio effects")
108
+
109
+ result = []
110
+ for audio_path in audio_list:
111
+ try:
112
+ output_path = f'{os.path.splitext(audio_path)[0]}_effects.wav'
113
+
114
+ # Initialize audio effects plugins
115
+ board = Pedalboard(
116
+ [
117
+ HighpassFilter(),
118
+ Compressor(ratio=4, threshold_db=-15),
119
+ Reverb(room_size=0.10, dry_level=0.8, wet_level=0.2, damping=0.7)
120
+ ]
121
+ )
122
+
123
+ with AudioFile(audio_path) as f:
124
+ with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
125
+ # Read one second of audio at a time, until the file is empty:
126
+ while f.tell() < f.frames:
127
+ chunk = f.read(int(f.samplerate))
128
+ effected = board(chunk, f.samplerate, reset=False)
129
+ o.write(effected)
130
+ result.append(output_path)
131
+ except Exception as e:
132
+ traceback.print_exc()
133
+ print(f"Error noisereduce: {str(e)}")
134
+ result.append(audio_path)
135
+
136
+ return result
137
+
138
+
139
+ def apply_noisereduce(audio_list):
140
+ # https://github.com/sa-if/Audio-Denoiser
141
+ print("Noice reduce")
142
+
143
+ result = []
144
+ for audio_path in audio_list:
145
+ out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
146
+
147
+ try:
148
+ # Load audio file
149
+ audio = AudioSegment.from_file(audio_path)
150
+
151
+ # Convert audio to numpy array
152
+ samples = np.array(audio.get_array_of_samples())
153
+
154
+ # Reduce noise
155
+ reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
156
+
157
+ # Convert reduced noise signal back to audio
158
+ reduced_audio = AudioSegment(
159
+ reduced_noise.tobytes(),
160
+ frame_rate=audio.frame_rate,
161
+ sample_width=audio.sample_width,
162
+ channels=audio.channels
163
+ )
164
+
165
+ # Save reduced audio to file
166
+ reduced_audio.export(out_path, format="wav")
167
+ result.append(out_path)
168
+
169
+ except Exception as e:
170
+ traceback.print_exc()
171
+ print(f"Error noisereduce: {str(e)}")
172
+ result.append(audio_path)
173
+
174
+ return result
175
+
176
+
177
  @spaces.GPU()
178
  def convert_now(audio_files, random_tag, converter):
179
  return converter(
 
194
  r_m_f,
195
  e_r,
196
  c_b_p,
197
+ active_noise_reduce,
198
+ audio_effects,
199
  ):
200
  if not audio_files:
201
  raise ValueError("The audio pls")
 
221
  consonant_breath_protection=c_b_p,
222
  resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
223
  )
224
+ time.sleep(0.1)
225
 
226
+ result = convert_now(audio_files, random_tag, converter)
227
+
228
+ if active_noise_reduce:
229
+ result = apply_noisereduce(result)
230
+
231
+ if audio_effects:
232
+ result = add_audio_effects(result)
233
+
234
+ return result
235
 
236
 
237
  def audio_conf():
 
336
  )
337
 
338
 
339
+ def active_tts_conf():
340
+ return gr.Checkbox(
341
+ False,
342
+ label="TTS",
343
+ # info="",
344
+ container=False,
345
+ )
346
+
347
+
348
+ def tts_voice_conf():
349
+ return gr.Dropdown(
350
+ label="tts voice",
351
+ choices=voices,
352
+ visible=False,
353
+ value="en-US-EmmaMultilingualNeural-Female",
354
+ )
355
+
356
+
357
+ def tts_text_conf():
358
+ return gr.Textbox(
359
+ value="",
360
+ placeholder="Write the text here...",
361
+ label="Text",
362
+ visible=False,
363
+ lines=3,
364
+ )
365
+
366
+
367
+ def tts_button_conf():
368
+ return gr.Button(
369
+ "Process TTS",
370
+ variant="secondary",
371
+ visible=False,
372
+ )
373
+
374
+
375
+ def tts_play_conf():
376
+ return gr.Checkbox(
377
+ False,
378
+ label="Play",
379
+ # info="",
380
+ container=False,
381
+ visible=False,
382
+ )
383
+
384
+
385
+ def sound_gui():
386
+ return gr.Audio(
387
+ value=None,
388
+ type="filepath",
389
+ # format="mp3",
390
+ autoplay=True,
391
+ visible=False,
392
+ )
393
+
394
+
395
+ def denoise_conf():
396
+ return gr.Checkbox(
397
+ False,
398
+ label="Denoise",
399
+ # info="",
400
+ container=False,
401
+ visible=True,
402
+ )
403
+
404
+
405
+ def effects_conf():
406
+ return gr.Checkbox(
407
+ False,
408
+ label="Effects",
409
+ # info="",
410
+ container=False,
411
+ visible=True,
412
+ )
413
+
414
+
415
+ def infer_tts_audio(tts_voice, tts_text, play_tts):
416
+ out_dir = "output"
417
+ folder_tts = "USER_"+str(random.randint(10000, 99999))
418
+
419
+ os.makedirs(out_dir, exist_ok=True)
420
+ os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
421
+ out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
422
+
423
+ asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(out_path))
424
+ if play_tts:
425
+ return [out_path], out_path
426
+ return [out_path], None
427
+
428
+
429
+ def show_components_tts(value_active):
430
+ return gr.update(
431
+ visible=value_active
432
+ ), gr.update(
433
+ visible=value_active
434
+ ), gr.update(
435
+ visible=value_active
436
+ ), gr.update(
437
+ visible=value_active
438
+ )
439
+
440
+
441
  def get_gui(theme):
442
  with gr.Blocks(theme=theme) as app:
443
  gr.Markdown(title)
444
  gr.Markdown(description)
445
 
446
+ active_tts = active_tts_conf()
447
+ with gr.Row():
448
+ with gr.Column(scale=1):
449
+ tts_text = tts_text_conf()
450
+ with gr.Column(scale=2):
451
+ with gr.Row():
452
+ with gr.Column():
453
+ with gr.Row():
454
+ tts_voice = tts_voice_conf()
455
+ tts_active_play = tts_play_conf()
456
+
457
+ tts_button = tts_button_conf()
458
+ tts_play = sound_gui()
459
+
460
+ active_tts.change(
461
+ fn=show_components_tts,
462
+ inputs=[active_tts],
463
+ outputs=[tts_voice, tts_text, tts_button, tts_active_play],
464
+ )
465
+
466
  aud = audio_conf()
467
+ gr.HTML("<hr></h2>")
468
+
469
+ tts_button.click(
470
+ fn=infer_tts_audio,
471
+ inputs=[tts_voice, tts_text, tts_active_play],
472
+ outputs=[aud, tts_play],
473
+ )
474
+
475
  with gr.Column():
476
  with gr.Row():
477
  model = model_conf()
 
482
  res_fc = respiration_filter_conf()
483
  envel_r = envelope_ratio_conf()
484
  const = consonant_protec_conf()
485
+ with gr.Row():
486
+ with gr.Column():
487
+ with gr.Row():
488
+ denoise_gui = denoise_conf()
489
+ effects_gui = effects_conf()
490
  button_base = button_conf()
491
  output_base = output_conf()
492
 
 
502
  res_fc,
503
  envel_r,
504
  const,
505
+ denoise_gui,
506
+ effects_gui,
507
  ],
508
  outputs=[output_base],
509
  )
 
567
 
568
  if __name__ == "__main__":
569
 
570
+ tts_voice_list = asyncio.new_event_loop().run_until_complete(edge_tts.list_voices())
571
+ voices = sorted([f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list])
572
+
573
  app = get_gui(theme)
574
 
575
  app.queue(default_concurrency_limit=40)