r3gm commited on
Commit
273d4fa
·
verified ·
1 Parent(s): 14cb923

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +615 -39
app.py CHANGED
@@ -28,6 +28,10 @@ import spaces
28
  import gradio as gr
29
  import logging
30
  import time
 
 
 
 
31
 
32
  warnings.filterwarnings("ignore")
33
 
@@ -360,6 +364,7 @@ def run_mdx(
360
  m_threads=2,
361
  device_base="cuda",
362
  ):
 
363
  if device_base == "cuda":
364
  device = torch.device("cuda:0")
365
  processor_num = 0
@@ -436,6 +441,96 @@ def run_mdx(
436
  return main_filepath, invert_filepath
437
 
438
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  MDX_DOWNLOAD_LINK = "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/"
440
  UVR_MODELS = [
441
  "UVR-MDX-NET-Voc_FT.onnx",
@@ -514,7 +609,8 @@ def process_uvr_task(
514
 
515
  if only_voiceless:
516
  logger.info("Voiceless Track Separation...")
517
- return run_mdx(
 
518
  mdx_model_params,
519
  song_output_dir,
520
  os.path.join(mdxnet_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
@@ -526,6 +622,8 @@ def process_uvr_task(
526
  device_base=device_base,
527
  )
528
 
 
 
529
  logger.info("Vocal Track Isolation...")
530
  vocals_path, instrumentals_path = run_mdx(
531
  mdx_model_params,
@@ -554,11 +652,7 @@ def process_uvr_task(
554
  device_base=device_base,
555
  )
556
  except Exception as e:
557
- if "0:00:" in str(e):
558
- gr.Info("Waiting 60 seconds for GPU quota")
559
- time.sleep(56)
560
- random_sleep()
561
- backup_vocals_path, main_vocals_path = run_mdx(
562
  mdx_model_params,
563
  song_output_dir,
564
  os.path.join(mdxnet_models_dir, "UVR_MDXNET_KARA_2.onnx"),
@@ -568,8 +662,6 @@ def process_uvr_task(
568
  denoise=True,
569
  device_base=device_base,
570
  )
571
- else:
572
- raise e
573
  else:
574
  backup_vocals_path, main_vocals_path = None, vocals_path
575
 
@@ -590,11 +682,7 @@ def process_uvr_task(
590
  device_base=device_base,
591
  )
592
  except Exception as e:
593
- if "0:00:" in str(e):
594
- gr.Info("Waiting 60 seconds for GPU quota")
595
- time.sleep(56)
596
- random_sleep()
597
- _, vocals_dereverb_path = run_mdx(
598
  mdx_model_params,
599
  song_output_dir,
600
  os.path.join(mdxnet_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
@@ -604,8 +692,6 @@ def process_uvr_task(
604
  denoise=True,
605
  device_base=device_base,
606
  )
607
- else:
608
- raise e
609
  else:
610
  vocals_dereverb_path = main_vocals_path
611
 
@@ -618,51 +704,229 @@ def process_uvr_task(
618
  )
619
 
620
 
621
- def sound_separate(media_file, stem, main, dereverb):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  if not media_file:
624
- raise ValueError("The audio pls")
625
 
626
  if not stem:
627
- raise ValueError("Select vocal or background...")
628
 
629
  hash_audio = str(get_hash(media_file))
 
630
 
631
  outputs = []
632
 
633
  start_time = time.time()
634
-
635
  if stem == "vocal":
636
  try:
637
  _, _, _, _, vocal_audio = process_uvr_task(
638
  orig_song_path=media_file,
639
- song_id=hash_audio+"mdx",
640
  main_vocals=main,
641
  dereverb=dereverb,
642
  remove_files_output_dir=False,
643
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  outputs.append(vocal_audio)
645
  except Exception as error:
646
- gr.Info(str(error))
647
  logger.error(str(error))
 
648
 
649
  if stem == "background":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
 
651
  background_audio, _ = process_uvr_task(
652
  orig_song_path=media_file,
653
- song_id=hash_audio+"voiceless",
654
  only_voiceless=True,
655
  remove_files_output_dir=False,
656
  )
657
- # copy_files(background_audio, ".")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  outputs.append(background_audio)
659
 
660
  end_time = time.time()
661
  execution_time = end_time - start_time
662
  logger.info(f"Execution time: {execution_time} seconds")
663
-
664
  if not outputs:
665
- raise Exception("Error in sound separate")
666
 
667
  return outputs
668
 
@@ -680,7 +944,7 @@ def stem_conf():
680
  return gr.Radio(
681
  choices=["vocal", "background"],
682
  value="vocal",
683
- label="Vocal",
684
  # info="",
685
  )
686
 
@@ -702,6 +966,255 @@ def dereverb_conf():
702
  )
703
 
704
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  def button_conf():
706
  return gr.Button(
707
  "Inference",
@@ -717,11 +1230,20 @@ def output_conf():
717
  )
718
 
719
 
720
- def show_vocal_components(input_bool):
721
- param = True if input_bool == "vocal" else False
722
- return gr.update(visible=param), gr.update(
723
- visible=param
724
- )
 
 
 
 
 
 
 
 
 
725
 
726
 
727
  def get_gui(theme):
@@ -730,23 +1252,54 @@ def get_gui(theme):
730
  gr.Markdown(description)
731
 
732
  aud = audio_conf()
733
-
734
  with gr.Column():
735
  with gr.Row():
736
  stem_gui = stem_conf()
737
 
738
-
739
  with gr.Column():
740
  with gr.Row():
741
  main_gui = main_conf()
742
  dereverb_gui = dereverb_conf()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
743
 
744
- stem_gui.change(
745
- show_vocal_components,
746
- [stem_gui],
747
- [main_gui, dereverb_gui],
748
- )
749
-
750
  button_base = button_conf()
751
  output_base = output_conf()
752
 
@@ -757,6 +1310,15 @@ def get_gui(theme):
757
  stem_gui,
758
  main_gui,
759
  dereverb_gui,
 
 
 
 
 
 
 
 
 
760
  ],
761
  outputs=[output_base],
762
  )
@@ -768,6 +1330,11 @@ def get_gui(theme):
768
  "vocal",
769
  False,
770
  False,
 
 
 
 
 
771
  ],
772
  ],
773
  fn=sound_separate,
@@ -776,6 +1343,15 @@ def get_gui(theme):
776
  stem_gui,
777
  main_gui,
778
  dereverb_gui,
 
 
 
 
 
 
 
 
 
779
  ],
780
  outputs=[output_base],
781
  cache_examples=False,
 
28
  import gradio as gr
29
  import logging
30
  import time
31
+ import traceback
32
+ from pedalboard import Pedalboard, Reverb, Delay, Chorus, Compressor, Gain, HighpassFilter, LowpassFilter
33
+ from pedalboard.io import AudioFile
34
+ import numpy as np
35
 
36
  warnings.filterwarnings("ignore")
37
 
 
364
  m_threads=2,
365
  device_base="cuda",
366
  ):
367
+
368
  if device_base == "cuda":
369
  device = torch.device("cuda:0")
370
  processor_num = 0
 
441
  return main_filepath, invert_filepath
442
 
443
 
444
+ def run_mdx_beta(
445
+ model_params,
446
+ output_dir,
447
+ model_path,
448
+ filename,
449
+ exclude_main=False,
450
+ exclude_inversion=False,
451
+ suffix=None,
452
+ invert_suffix=None,
453
+ denoise=False,
454
+ keep_orig=True,
455
+ m_threads=2,
456
+ device_base="",
457
+ ):
458
+
459
+ m_threads = 1
460
+ duration = librosa.get_duration(filename=filename)
461
+ if duration >= 60 and duration <= 120:
462
+ m_threads = 8
463
+ elif duration > 120:
464
+ m_threads = 16
465
+
466
+ logger.info(f"threads: {m_threads}")
467
+
468
+ model_hash = MDX.get_hash(model_path)
469
+ device = torch.device("cpu")
470
+ processor_num = -1
471
+ mp = model_params.get(model_hash)
472
+ model = MDXModel(
473
+ device,
474
+ dim_f=mp["mdx_dim_f_set"],
475
+ dim_t=2 ** mp["mdx_dim_t_set"],
476
+ n_fft=mp["mdx_n_fft_scale_set"],
477
+ stem_name=mp["primary_stem"],
478
+ compensation=mp["compensate"],
479
+ )
480
+
481
+ mdx_sess = MDX(model_path, model, processor=processor_num)
482
+ wave, sr = librosa.load(filename, mono=False, sr=44100)
483
+ # normalizing input wave gives better output
484
+ peak = max(np.max(wave), abs(np.min(wave)))
485
+ wave /= peak
486
+ if denoise:
487
+ wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (
488
+ mdx_sess.process_wave(wave, m_threads)
489
+ )
490
+ wave_processed *= 0.5
491
+ else:
492
+ wave_processed = mdx_sess.process_wave(wave, m_threads)
493
+ # return to previous peak
494
+ wave_processed *= peak
495
+ stem_name = model.stem_name if suffix is None else suffix
496
+
497
+ main_filepath = None
498
+ if not exclude_main:
499
+ main_filepath = os.path.join(
500
+ output_dir,
501
+ f"{os.path.basename(os.path.splitext(filename)[0])}_{stem_name}.wav",
502
+ )
503
+ sf.write(main_filepath, wave_processed.T, sr)
504
+
505
+ invert_filepath = None
506
+ if not exclude_inversion:
507
+ diff_stem_name = (
508
+ stem_naming.get(stem_name)
509
+ if invert_suffix is None
510
+ else invert_suffix
511
+ )
512
+ stem_name = (
513
+ f"{stem_name}_diff" if diff_stem_name is None else diff_stem_name
514
+ )
515
+ invert_filepath = os.path.join(
516
+ output_dir,
517
+ f"{os.path.basename(os.path.splitext(filename)[0])}_{stem_name}.wav",
518
+ )
519
+ sf.write(
520
+ invert_filepath,
521
+ (-wave_processed.T * model.compensation) + wave.T,
522
+ sr,
523
+ )
524
+
525
+ if not keep_orig:
526
+ os.remove(filename)
527
+
528
+ del mdx_sess, wave_processed, wave
529
+ gc.collect()
530
+ torch.cuda.empty_cache()
531
+ return main_filepath, invert_filepath
532
+
533
+
534
  MDX_DOWNLOAD_LINK = "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/"
535
  UVR_MODELS = [
536
  "UVR-MDX-NET-Voc_FT.onnx",
 
609
 
610
  if only_voiceless:
611
  logger.info("Voiceless Track Separation...")
612
+
613
+ process = run_mdx(
614
  mdx_model_params,
615
  song_output_dir,
616
  os.path.join(mdxnet_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
 
622
  device_base=device_base,
623
  )
624
 
625
+ return process
626
+
627
  logger.info("Vocal Track Isolation...")
628
  vocals_path, instrumentals_path = run_mdx(
629
  mdx_model_params,
 
652
  device_base=device_base,
653
  )
654
  except Exception as e:
655
+ backup_vocals_path, main_vocals_path = run_mdx_beta(
 
 
 
 
656
  mdx_model_params,
657
  song_output_dir,
658
  os.path.join(mdxnet_models_dir, "UVR_MDXNET_KARA_2.onnx"),
 
662
  denoise=True,
663
  device_base=device_base,
664
  )
 
 
665
  else:
666
  backup_vocals_path, main_vocals_path = None, vocals_path
667
 
 
682
  device_base=device_base,
683
  )
684
  except Exception as e:
685
+ _, vocals_dereverb_path = run_mdx_beta(
 
 
 
 
686
  mdx_model_params,
687
  song_output_dir,
688
  os.path.join(mdxnet_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
 
692
  denoise=True,
693
  device_base=device_base,
694
  )
 
 
695
  else:
696
  vocals_dereverb_path = main_vocals_path
697
 
 
704
  )
705
 
706
 
707
+ def add_vocal_effects(input_file, output_file, reverb_room_size=0.6, vocal_reverb_dryness=0.8, reverb_damping=0.6, reverb_wet_level=0.35,
708
+ delay_seconds=0.4, delay_mix=0.25,
709
+ compressor_threshold_db=-25, compressor_ratio=3.5, compressor_attack_ms=10, compressor_release_ms=60,
710
+ gain_db=3):
711
+
712
+ effects = [HighpassFilter()]
713
+
714
+ effects.append(Reverb(room_size=reverb_room_size, damping=reverb_damping, wet_level=reverb_wet_level, dry_level=vocal_reverb_dryness))
715
+
716
+ effects.append(Compressor(threshold_db=compressor_threshold_db, ratio=compressor_ratio,
717
+ attack_ms=compressor_attack_ms, release_ms=compressor_release_ms))
718
+
719
+ if delay_seconds > 0 or delay_mix > 0:
720
+ effects.append(Delay(delay_seconds=delay_seconds, mix=delay_mix))
721
+ print("delay applied")
722
+ # effects.append(Chorus())
723
+
724
+ if gain_db:
725
+ effects.append(Gain(gain_db=gain_db))
726
+ print("added gain db")
727
+
728
+ board = Pedalboard(effects)
729
+
730
+ with AudioFile(input_file) as f:
731
+ with AudioFile(output_file, 'w', f.samplerate, f.num_channels) as o:
732
+ # Read one second of audio at a time, until the file is empty:
733
+ while f.tell() < f.frames:
734
+ chunk = f.read(int(f.samplerate))
735
+ effected = board(chunk, f.samplerate, reset=False)
736
+ o.write(effected)
737
+
738
+
739
+ def add_instrumental_effects(input_file, output_file, highpass_freq=100, lowpass_freq=12000,
740
+ reverb_room_size=0.5, reverb_damping=0.5, reverb_wet_level=0.25,
741
+ compressor_threshold_db=-20, compressor_ratio=2.5, compressor_attack_ms=15, compressor_release_ms=80,
742
+ gain_db=2):
743
+
744
+ effects = [
745
+ HighpassFilter(cutoff_frequency_hz=highpass_freq),
746
+ LowpassFilter(cutoff_frequency_hz=lowpass_freq),
747
+ ]
748
+ if reverb_room_size > 0 or reverb_damping > 0 or reverb_wet_level > 0:
749
+ effects.append(Reverb(room_size=reverb_room_size, damping=reverb_damping, wet_level=reverb_wet_level))
750
+
751
+ effects.append(Compressor(threshold_db=compressor_threshold_db, ratio=compressor_ratio,
752
+ attack_ms=compressor_attack_ms, release_ms=compressor_release_ms))
753
+
754
+ if gain_db:
755
+ effects.append(Gain(gain_db=gain_db))
756
+
757
+ board = Pedalboard(effects)
758
 
759
+ with AudioFile(input_file) as f:
760
+ with AudioFile(output_file, 'w', f.samplerate, f.num_channels) as o:
761
+ # Read one second of audio at a time, until the file is empty:
762
+ while f.tell() < f.frames:
763
+ chunk = f.read(int(f.samplerate))
764
+ effected = board(chunk, f.samplerate, reset=False)
765
+ o.write(effected)
766
+
767
+
768
+ def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, background_effects=True,
769
+ vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_wet_level=0.35,
770
+ vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
771
+ vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
772
+ vocal_gain_db=4,
773
+ background_highpass_freq=120, background_lowpass_freq=11000,
774
+ background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
775
+ background_compressor_threshold_db=-20, background_compressor_ratio=2.5, background_compressor_attack_ms=15, background_compressor_release_ms=80,
776
+ background_gain_db=3):
777
  if not media_file:
778
+ raise ValueError("The audio path is missing.")
779
 
780
  if not stem:
781
+ raise ValueError("Please select 'vocal' or 'background' stem.")
782
 
783
  hash_audio = str(get_hash(media_file))
784
+ media_dir = os.path.dirname(media_file)
785
 
786
  outputs = []
787
 
788
  start_time = time.time()
789
+
790
  if stem == "vocal":
791
  try:
792
  _, _, _, _, vocal_audio = process_uvr_task(
793
  orig_song_path=media_file,
794
+ song_id=hash_audio + "mdx",
795
  main_vocals=main,
796
  dereverb=dereverb,
797
  remove_files_output_dir=False,
798
  )
799
+
800
+ if vocal_effects:
801
+ suffix = '_effects'
802
+ file_name, file_extension = os.path.splitext(vocal_audio)
803
+ out_effects = file_name + suffix + file_extension
804
+ out_effects_path = os.path.join(media_dir, out_effects)
805
+ add_vocal_effects(vocal_audio, out_effects_path,
806
+ reverb_room_size=vocal_reverb_room_size, reverb_damping=vocal_reverb_damping, reverb_wet_level=vocal_reverb_wet_level,
807
+ delay_seconds=vocal_delay_seconds, delay_mix=vocal_delay_mix,
808
+ compressor_threshold_db=vocal_compressor_threshold_db, compressor_ratio=vocal_compressor_ratio, compressor_attack_ms=vocal_compressor_attack_ms, compressor_release_ms=vocal_compressor_release_ms,
809
+ gain_db=vocal_gain_db
810
+ )
811
+ vocal_audio = out_effects_path
812
+
813
  outputs.append(vocal_audio)
814
  except Exception as error:
 
815
  logger.error(str(error))
816
+ traceback.print_exc()
817
 
818
  if stem == "background":
819
+ background_audio, _ = process_uvr_task(
820
+ orig_song_path=media_file,
821
+ song_id=hash_audio + "voiceless",
822
+ only_voiceless=True,
823
+ remove_files_output_dir=False,
824
+ )
825
+
826
+ if background_effects:
827
+ suffix = '_effects'
828
+ file_name, file_extension = os.path.splitext(background_audio)
829
+ out_effects = file_name + suffix + file_extension
830
+ out_effects_path = os.path.join(media_dir, out_effects)
831
+ add_instrumental_effects(background_audio, out_effects_path,
832
+ highpass_freq=background_highpass_freq, lowpass_freq=background_lowpass_freq,
833
+ reverb_room_size=background_reverb_room_size, reverb_damping=background_reverb_damping, reverb_wet_level=background_reverb_wet_level,
834
+ compressor_threshold_db=background_compressor_threshold_db, compressor_ratio=background_compressor_ratio, compressor_attack_ms=background_compressor_attack_ms, compressor_release_ms=background_compressor_release_ms,
835
+ gain_db=background_gain_db
836
+ )
837
+ background_audio = out_effects_path
838
+
839
+ outputs.append(background_audio)
840
+
841
+ end_time = time.time()
842
+ execution_time = end_time - start_time
843
+ logger.info(f"Execution time: {execution_time} seconds")
844
+
845
+ if not outputs:
846
+ raise Exception("Error in sound separation.")
847
+
848
+ return outputs
849
+
850
+
851
+ def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, background_effects=True,
852
+ vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8 ,vocal_reverb_wet_level=0.35,
853
+ vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
854
+ vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
855
+ vocal_gain_db=4,
856
+ background_highpass_freq=120, background_lowpass_freq=11000,
857
+ background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
858
+ background_compressor_threshold_db=-20, background_compressor_ratio=2.5, background_compressor_attack_ms=15, background_compressor_release_ms=80,
859
+ background_gain_db=3):
860
+ if not media_file:
861
+ raise ValueError("The audio path is missing.")
862
+
863
+ if not stem:
864
+ raise ValueError("Please select 'vocal' or 'background' stem.")
865
+
866
+ hash_audio = str(get_hash(media_file))
867
+ media_dir = os.path.dirname(media_file)
868
+
869
+ outputs = []
870
+
871
+ start_time = time.time()
872
+
873
+ if stem == "vocal":
874
+ try:
875
+ _, _, _, _, vocal_audio = process_uvr_task(
876
+ orig_song_path=media_file,
877
+ song_id=hash_audio + "mdx",
878
+ main_vocals=main,
879
+ dereverb=dereverb,
880
+ remove_files_output_dir=False,
881
+ )
882
+
883
+ if vocal_effects:
884
+ suffix = '_effects'
885
+ file_name, file_extension = os.path.splitext(os.path.abspath(vocal_audio))
886
+ out_effects = file_name + suffix + file_extension
887
+ out_effects_path = os.path.join(media_dir, out_effects)
888
+ add_vocal_effects(vocal_audio, out_effects_path,
889
+ reverb_room_size=vocal_reverb_room_size, reverb_damping=vocal_reverb_damping, vocal_reverb_dryness=vocal_reverb_dryness, reverb_wet_level=vocal_reverb_wet_level,
890
+ delay_seconds=vocal_delay_seconds, delay_mix=vocal_delay_mix,
891
+ compressor_threshold_db=vocal_compressor_threshold_db, compressor_ratio=vocal_compressor_ratio, compressor_attack_ms=vocal_compressor_attack_ms, compressor_release_ms=vocal_compressor_release_ms,
892
+ gain_db=vocal_gain_db
893
+ )
894
+ vocal_audio = out_effects_path
895
+
896
+ outputs.append(vocal_audio)
897
+ except Exception as error:
898
+ logger.error(str(error))
899
 
900
+ if stem == "background":
901
  background_audio, _ = process_uvr_task(
902
  orig_song_path=media_file,
903
+ song_id=hash_audio + "voiceless",
904
  only_voiceless=True,
905
  remove_files_output_dir=False,
906
  )
907
+
908
+ if background_effects:
909
+ suffix = '_effects'
910
+ file_name, file_extension = os.path.splitext(os.path.abspath(background_audio))
911
+ out_effects = file_name + suffix + file_extension
912
+ out_effects_path = os.path.join(media_dir, out_effects)
913
+ print(file_name, file_extension, out_effects, out_effects_path)
914
+ add_instrumental_effects(background_audio, out_effects_path,
915
+ highpass_freq=background_highpass_freq, lowpass_freq=background_lowpass_freq,
916
+ reverb_room_size=background_reverb_room_size, reverb_damping=background_reverb_damping, reverb_wet_level=background_reverb_wet_level,
917
+ compressor_threshold_db=background_compressor_threshold_db, compressor_ratio=background_compressor_ratio, compressor_attack_ms=background_compressor_attack_ms, compressor_release_ms=background_compressor_release_ms,
918
+ gain_db=background_gain_db
919
+ )
920
+ background_audio = out_effects_path
921
+
922
  outputs.append(background_audio)
923
 
924
  end_time = time.time()
925
  execution_time = end_time - start_time
926
  logger.info(f"Execution time: {execution_time} seconds")
927
+
928
  if not outputs:
929
+ raise Exception("Error in sound separation.")
930
 
931
  return outputs
932
 
 
944
  return gr.Radio(
945
  choices=["vocal", "background"],
946
  value="vocal",
947
+ label="Stem",
948
  # info="",
949
  )
950
 
 
966
  )
967
 
968
 
969
+ def vocal_effects_conf():
970
+ return gr.Checkbox(
971
+ False,
972
+ label="Vocal Effects",
973
+ # info="",
974
+ visible=True,
975
+ )
976
+
977
+
978
+ def background_effects_conf():
979
+ return gr.Checkbox(
980
+ False,
981
+ label="Background Effects",
982
+ # info="",
983
+ visible=False,
984
+ )
985
+
986
+
987
+ def vocal_reverb_room_size_conf():
988
+ return gr.Number(
989
+ 0.15,
990
+ label="Vocal Reverb Room Size",
991
+ minimum=0.0,
992
+ maximum=1.0,
993
+ step=0.05,
994
+ visible=True,
995
+ )
996
+
997
+
998
+ def vocal_reverb_damping_conf():
999
+ return gr.Number(
1000
+ 0.7,
1001
+ label="Vocal Reverb Damping",
1002
+ minimum=0.0,
1003
+ maximum=1.0,
1004
+ step=0.01,
1005
+ visible=True,
1006
+ )
1007
+
1008
+
1009
+ def vocal_reverb_wet_level_conf():
1010
+ return gr.Number(
1011
+ 0.2,
1012
+ label="Vocal Reverb Wet Level",
1013
+ minimum=0.0,
1014
+ maximum=1.0,
1015
+ step=0.05,
1016
+ visible=True,
1017
+ )
1018
+
1019
+
1020
+ def vocal_reverb_dryness_level_conf():
1021
+ return gr.Number(
1022
+ 0.8,
1023
+ label="Vocal Reverb Dryness Level",
1024
+ minimum=0.0,
1025
+ maximum=1.0,
1026
+ step=0.05,
1027
+ visible=True,
1028
+ )
1029
+
1030
+
1031
+ def vocal_delay_seconds_conf():
1032
+ return gr.Number(
1033
+ 0.,
1034
+ label="Vocal Delay Seconds",
1035
+ minimum=0.0,
1036
+ maximum=1.0,
1037
+ step=0.01,
1038
+ visible=True,
1039
+ )
1040
+
1041
+
1042
+ def vocal_delay_mix_conf():
1043
+ return gr.Number(
1044
+ 0.,
1045
+ label="Vocal Delay Mix",
1046
+ minimum=0.0,
1047
+ maximum=1.0,
1048
+ step=0.01,
1049
+ visible=True,
1050
+ )
1051
+
1052
+
1053
+ def vocal_compressor_threshold_db_conf():
1054
+ return gr.Number(
1055
+ -15,
1056
+ label="Vocal Compressor Threshold (dB)",
1057
+ minimum=-60,
1058
+ maximum=0,
1059
+ step=1,
1060
+ visible=True,
1061
+ )
1062
+
1063
+
1064
+ def vocal_compressor_ratio_conf():
1065
+ return gr.Number(
1066
+ 4.,
1067
+ label="Vocal Compressor Ratio",
1068
+ minimum=0,
1069
+ maximum=20,
1070
+ step=0.1,
1071
+ visible=True,
1072
+ )
1073
+
1074
+
1075
+ def vocal_compressor_attack_ms_conf():
1076
+ return gr.Number(
1077
+ 1.0,
1078
+ label="Vocal Compressor Attack (ms)",
1079
+ minimum=0,
1080
+ maximum=1000,
1081
+ step=1,
1082
+ visible=True,
1083
+ )
1084
+
1085
+
1086
+ def vocal_compressor_release_ms_conf():
1087
+ return gr.Number(
1088
+ 100,
1089
+ label="Vocal Compressor Release (ms)",
1090
+ minimum=0,
1091
+ maximum=3000,
1092
+ step=1,
1093
+ visible=True,
1094
+ )
1095
+
1096
+
1097
+ def vocal_gain_db_conf():
1098
+ return gr.Number(
1099
+ 0,
1100
+ label="Vocal Gain (dB)",
1101
+ minimum=-40,
1102
+ maximum=40,
1103
+ step=1,
1104
+ visible=True,
1105
+ )
1106
+
1107
+
1108
+ def background_highpass_freq_conf():
1109
+ return gr.Number(
1110
+ 120,
1111
+ label="Background Highpass Frequency (Hz)",
1112
+ minimum=0,
1113
+ maximum=1000,
1114
+ step=1,
1115
+ visible=True,
1116
+ )
1117
+
1118
+
1119
+ def background_lowpass_freq_conf():
1120
+ return gr.Number(
1121
+ 11000,
1122
+ label="Background Lowpass Frequency (Hz)",
1123
+ minimum=0,
1124
+ maximum=20000,
1125
+ step=1,
1126
+ visible=True,
1127
+ )
1128
+
1129
+
1130
+ def background_reverb_room_size_conf():
1131
+ return gr.Number(
1132
+ 0.1,
1133
+ label="Background Reverb Room Size",
1134
+ minimum=0.0,
1135
+ maximum=1.0,
1136
+ step=0.1,
1137
+ visible=True,
1138
+ )
1139
+
1140
+
1141
+ def background_reverb_damping_conf():
1142
+ return gr.Number(
1143
+ 0.5,
1144
+ label="Background Reverb Damping",
1145
+ minimum=0.0,
1146
+ maximum=1.0,
1147
+ step=0.1,
1148
+ visible=True,
1149
+ )
1150
+
1151
+
1152
+ def background_reverb_wet_level_conf():
1153
+ return gr.Number(
1154
+ 0.25,
1155
+ label="Background Reverb Wet Level",
1156
+ minimum=0.0,
1157
+ maximum=1.0,
1158
+ step=0.05,
1159
+ visible=True,
1160
+ )
1161
+
1162
+
1163
+ def background_compressor_threshold_db_conf():
1164
+ return gr.Number(
1165
+ -15,
1166
+ label="Background Compressor Threshold (dB)",
1167
+ minimum=-60,
1168
+ maximum=0,
1169
+ step=1,
1170
+ visible=True,
1171
+ )
1172
+
1173
+
1174
+ def background_compressor_ratio_conf():
1175
+ return gr.Number(
1176
+ 4.,
1177
+ label="Background Compressor Ratio",
1178
+ minimum=0,
1179
+ maximum=20,
1180
+ step=0.1,
1181
+ visible=True,
1182
+ )
1183
+
1184
+
1185
+ def background_compressor_attack_ms_conf():
1186
+ return gr.Number(
1187
+ 15,
1188
+ label="Background Compressor Attack (ms)",
1189
+ minimum=0,
1190
+ maximum=1000,
1191
+ step=1,
1192
+ visible=True,
1193
+ )
1194
+
1195
+
1196
+ def background_compressor_release_ms_conf():
1197
+ return gr.Number(
1198
+ 60,
1199
+ label="Background Compressor Release (ms)",
1200
+ minimum=0,
1201
+ maximum=3000,
1202
+ step=1,
1203
+ visible=True,
1204
+ )
1205
+
1206
+
1207
+ def background_gain_db_conf():
1208
+ return gr.Number(
1209
+ 0,
1210
+ label="Background Gain (dB)",
1211
+ minimum=-40,
1212
+ maximum=40,
1213
+ step=1,
1214
+ visible=True,
1215
+ )
1216
+
1217
+
1218
  def button_conf():
1219
  return gr.Button(
1220
  "Inference",
 
1230
  )
1231
 
1232
 
1233
+ def show_vocal_components(value_name):
1234
+
1235
+ if value_name == "vocal":
1236
+ return gr.update(visible=True), gr.update(
1237
+ visible=True
1238
+ ), gr.update(visible=True), gr.update(
1239
+ visible=False
1240
+ )
1241
+ else:
1242
+ return gr.update(visible=False), gr.update(
1243
+ visible=False
1244
+ ), gr.update(visible=False), gr.update(
1245
+ visible=True
1246
+ )
1247
 
1248
 
1249
  def get_gui(theme):
 
1252
  gr.Markdown(description)
1253
 
1254
  aud = audio_conf()
1255
+
1256
  with gr.Column():
1257
  with gr.Row():
1258
  stem_gui = stem_conf()
1259
 
 
1260
  with gr.Column():
1261
  with gr.Row():
1262
  main_gui = main_conf()
1263
  dereverb_gui = dereverb_conf()
1264
+ vocal_effects_gui = vocal_effects_conf()
1265
+ background_effects_gui = background_effects_conf()
1266
+
1267
+ # with gr.Column():
1268
+ with gr.Accordion("Vocal Effects Parameters", open=False): # with gr.Row():
1269
+ # gr.Label("Vocal Effects Parameters")
1270
+ with gr.Row():
1271
+ vocal_reverb_room_size_gui = vocal_reverb_room_size_conf()
1272
+ vocal_reverb_damping_gui = vocal_reverb_damping_conf()
1273
+ vocal_reverb_dryness_gui = vocal_reverb_dryness_level_conf()
1274
+ vocal_reverb_wet_level_gui = vocal_reverb_wet_level_conf()
1275
+ vocal_delay_seconds_gui = vocal_delay_seconds_conf()
1276
+ vocal_delay_mix_gui = vocal_delay_mix_conf()
1277
+ vocal_compressor_threshold_db_gui = vocal_compressor_threshold_db_conf()
1278
+ vocal_compressor_ratio_gui = vocal_compressor_ratio_conf()
1279
+ vocal_compressor_attack_ms_gui = vocal_compressor_attack_ms_conf()
1280
+ vocal_compressor_release_ms_gui = vocal_compressor_release_ms_conf()
1281
+ vocal_gain_db_gui = vocal_gain_db_conf()
1282
+
1283
+ with gr.Accordion("Background Effects Parameters", open=False): # with gr.Row():
1284
+ # gr.Label("Background Effects Parameters")
1285
+ with gr.Row():
1286
+ background_highpass_freq_gui = background_highpass_freq_conf()
1287
+ background_lowpass_freq_gui = background_lowpass_freq_conf()
1288
+ background_reverb_room_size_gui = background_reverb_room_size_conf()
1289
+ background_reverb_damping_gui = background_reverb_damping_conf()
1290
+ background_reverb_wet_level_gui = background_reverb_wet_level_conf()
1291
+ background_compressor_threshold_db_gui = background_compressor_threshold_db_conf()
1292
+ background_compressor_ratio_gui = background_compressor_ratio_conf()
1293
+ background_compressor_attack_ms_gui = background_compressor_attack_ms_conf()
1294
+ background_compressor_release_ms_gui = background_compressor_release_ms_conf()
1295
+ background_gain_db_gui = background_gain_db_conf()
1296
+
1297
+ stem_gui.change(
1298
+ show_vocal_components,
1299
+ [stem_gui],
1300
+ [main_gui, dereverb_gui, vocal_effects_gui, background_effects_gui],
1301
+ )
1302
 
 
 
 
 
 
 
1303
  button_base = button_conf()
1304
  output_base = output_conf()
1305
 
 
1310
  stem_gui,
1311
  main_gui,
1312
  dereverb_gui,
1313
+ vocal_effects_gui,
1314
+ background_effects_gui,
1315
+ vocal_reverb_room_size_gui, vocal_reverb_damping_gui, vocal_reverb_dryness_gui, vocal_reverb_wet_level_gui,
1316
+ vocal_delay_seconds_gui, vocal_delay_mix_gui, vocal_compressor_threshold_db_gui, vocal_compressor_ratio_gui,
1317
+ vocal_compressor_attack_ms_gui, vocal_compressor_release_ms_gui, vocal_gain_db_gui,
1318
+ background_highpass_freq_gui, background_lowpass_freq_gui, background_reverb_room_size_gui,
1319
+ background_reverb_damping_gui, background_reverb_wet_level_gui, background_compressor_threshold_db_gui,
1320
+ background_compressor_ratio_gui, background_compressor_attack_ms_gui, background_compressor_release_ms_gui,
1321
+ background_gain_db_gui,
1322
  ],
1323
  outputs=[output_base],
1324
  )
 
1330
  "vocal",
1331
  False,
1332
  False,
1333
+ False,
1334
+ False,
1335
+ 0.15, 0.7, 0.8, 0.2,
1336
+ 0., 0., -15, 4., 1, 100, 0,
1337
+ 120, 11000, 0.5, 0.1, 0.25, -15, 4., 15, 60, 0,
1338
  ],
1339
  ],
1340
  fn=sound_separate,
 
1343
  stem_gui,
1344
  main_gui,
1345
  dereverb_gui,
1346
+ vocal_effects_gui,
1347
+ background_effects_gui,
1348
+ vocal_reverb_room_size_gui, vocal_reverb_damping_gui, vocal_reverb_dryness_gui, vocal_reverb_wet_level_gui,
1349
+ vocal_delay_seconds_gui, vocal_delay_mix_gui, vocal_compressor_threshold_db_gui, vocal_compressor_ratio_gui,
1350
+ vocal_compressor_attack_ms_gui, vocal_compressor_release_ms_gui, vocal_gain_db_gui,
1351
+ background_highpass_freq_gui, background_lowpass_freq_gui, background_reverb_room_size_gui,
1352
+ background_reverb_damping_gui, background_reverb_wet_level_gui, background_compressor_threshold_db_gui,
1353
+ background_compressor_ratio_gui, background_compressor_attack_ms_gui, background_compressor_release_ms_gui,
1354
+ background_gain_db_gui,
1355
  ],
1356
  outputs=[output_base],
1357
  cache_examples=False,