soni_cloned

Running

App Files Files Community

test-rtechs commited on Jul 3

Commit

4265432

•

1 Parent(s): cd7a065

Update app_rvc.py

Browse files

Files changed (1) hide show

app_rvc.py +32 -18

app_rvc.py CHANGED Viewed

@@ -4,10 +4,6 @@ os.system("pip install -q piper-tts==1.2.0")
 os.system("pip install -q -r requirements_xtts.txt")
 os.system("pip install -q TTS==0.21.1  --no-deps")
 import spaces
-import torch
-if os.environ.get("ZERO_GPU") != "TRUE" and torch.cuda.is_available():
-    # onnxruntime GPU
-    os.system("pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/")
 import librosa
 from soni_translate.logging_setup import (
     logger,
@@ -15,6 +11,7 @@ from soni_translate.logging_setup import (
     configure_logging_libs,
 ); configure_logging_libs() # noqa
 import whisperx
 import os
 from soni_translate.audio_segments import create_translated_audio
 from soni_translate.text_to_speech import (
@@ -350,6 +347,7 @@ class SoniTranslate(SoniTrCache):
         edit_text_arg = kwargs[31]
         get_text_arg = kwargs[32]
         is_gui_arg = kwargs[-1]
         kwargs = kwargs[3:]
@@ -365,6 +363,14 @@ class SoniTranslate(SoniTrCache):
             return self.multilingual_media_conversion(
                 media_batch[0], "", "", *kwargs
             )
         if "SET_LIMIT" == os.getenv("DEMO") or "TRUE" == os.getenv("ZERO_GPU"):
             media_batch = [media_batch[0]]
@@ -672,7 +678,7 @@ class SoniTranslate(SoniTrCache):
                     raise RuntimeError(
                         "The audio is too long to process in this demo. Alternatively, you"
                         " can install the app locally or use the Colab notebook available "
-                        "in the SoniTranslate repository."
                     )
                 elif duration_verify > 300:
                     tts_voices_list = [
@@ -687,7 +693,7 @@ class SoniTranslate(SoniTrCache):
                                 "XTTS is too slow to be used for audio longer than 5 "
                                 "minutes in this demo. Alternatively, you can install "
                                 "the app locally or use the Colab notebook available in"
-                                " the SoniTranslate repository."
                             )
             if not self.task_in_cache("refine_vocals", [vocal_refinement], {}):
@@ -1342,7 +1348,7 @@ class SoniTranslate(SoniTrCache):
                 "This option is disabled in this demo. "
                 "Alternatively, you can install "
                 "the app locally or use the Colab notebook available in"
-                " the SoniTranslate repository."
             )
         if "videobook" in output_type:
@@ -1477,7 +1483,7 @@ class SoniTranslate(SoniTrCache):
         return output
-title = "<center><strong><font size='7'>📽️ SoniTranslate 🈷️</font></strong></center>"
 def create_gui(theme, logs_in_gui=False):
@@ -1582,7 +1588,7 @@ def create_gui(theme, logs_in_gui=False):
                     max_speakers = gr.Slider(
                         1,
                         MAX_TTS,
-                        value=1,
                         step=1,
                         label=lg_conf["max_sk"],
                     )
@@ -1606,7 +1612,7 @@ def create_gui(theme, logs_in_gui=False):
                         SoniTr.tts_info.tts_list(),
                         value="en-US-AndrewMultilingualNeural-Male",
                         label=lg_conf["sk2"],
-                        visible=False,
                         interactive=True,
                     )
                     tts_voice02 = gr.Dropdown(
@@ -1796,6 +1802,12 @@ def create_gui(theme, logs_in_gui=False):
                         with gr.Accordion(
                             lg_conf["extra_setting"], open=False
                         ):
                             audio_accelerate = gr.Slider(
                                 label=lg_conf["acc_max_label"],
                                 value=1.9,
@@ -2004,7 +2016,7 @@ def create_gui(theme, logs_in_gui=False):
                     edit_sub_check = gr.Checkbox(
                         label=lg_conf["edit_sub_label"],
                         info=lg_conf["edit_sub_info"],
-                        interactive=(False if os.environ.get("IS_DEMO") == "TRUE" else True),
                     )
                     dummy_false_check = gr.Checkbox(
                         False,
@@ -2584,16 +2596,16 @@ def create_gui(theme, logs_in_gui=False):
             class Logger:
                 def __init__(self, filename):
-                    self.terminal = sys.stdout
-                    self.log = open(filename, "w")
                 def write(self, message):
-                    self.terminal.write(message)
-                    self.log.write(message)
                 def flush(self):
-                    self.terminal.flush()
-                    self.log.flush()
                 def isatty(self):
                     return False
@@ -2689,6 +2701,7 @@ def create_gui(theme, logs_in_gui=False):
                 AUDIO_MIX,
                 audio_accelerate,
                 acceleration_rate_regulation_gui,
                 volume_original_mix,
                 volume_translated_mix,
                 sub_type_output,
@@ -2756,6 +2769,7 @@ def create_gui(theme, logs_in_gui=False):
                 AUDIO_MIX,
                 audio_accelerate,
                 acceleration_rate_regulation_gui,
                 volume_original_mix,
                 volume_translated_mix,
                 sub_type_output,
@@ -2917,7 +2931,7 @@ if __name__ == "__main__":
     app.launch(
         max_threads=1,
-        share=args.public_url,
         show_error=True,
         quiet=False,
         debug=(True if logger.isEnabledFor(logging.DEBUG) else False),

 os.system("pip install -q -r requirements_xtts.txt")
 os.system("pip install -q TTS==0.21.1  --no-deps")
 import spaces
 import librosa
 from soni_translate.logging_setup import (
     logger,
     configure_logging_libs,
 ); configure_logging_libs() # noqa
 import whisperx
+import torch
 import os
 from soni_translate.audio_segments import create_translated_audio
 from soni_translate.text_to_speech import (
         edit_text_arg = kwargs[31]
         get_text_arg = kwargs[32]
+        video_acceleration_rate_regulation = kwargs[34]  # Adjust the index as needed
         is_gui_arg = kwargs[-1]
         kwargs = kwargs[3:]
             return self.multilingual_media_conversion(
                 media_batch[0], "", "", *kwargs
             )
+        if video_acceleration_rate_regulation:
+            logger.info("Video acceleration rate regulation is enabled.")
+            try:
+                self.accelerate_video_segments()
+                logger.info("Video segments accelerated successfully.")
+            except Exception as e:
+                logger.error(f"Failed to accelerate video segments: {e}")
+                raise
         if "SET_LIMIT" == os.getenv("DEMO") or "TRUE" == os.getenv("ZERO_GPU"):
             media_batch = [media_batch[0]]
                     raise RuntimeError(
                         "The audio is too long to process in this demo. Alternatively, you"
                         " can install the app locally or use the Colab notebook available "
+                        "in the ALEPH-WEBETA repository."
                     )
                 elif duration_verify > 300:
                     tts_voices_list = [
                                 "XTTS is too slow to be used for audio longer than 5 "
                                 "minutes in this demo. Alternatively, you can install "
                                 "the app locally or use the Colab notebook available in"
+                                " the aleph-webeta repository."
                             )
             if not self.task_in_cache("refine_vocals", [vocal_refinement], {}):
                 "This option is disabled in this demo. "
                 "Alternatively, you can install "
                 "the app locally or use the Colab notebook available in"
+                " the ALEPH-WEBETA repository."
             )
         if "videobook" in output_type:
         return output
+title = "<center><strong><font size='7'>📽️ ALEPH-WEO-WEBETA V2 🈷️</font></strong></center>"
 def create_gui(theme, logs_in_gui=False):
                     max_speakers = gr.Slider(
                         1,
                         MAX_TTS,
+                        value=2,
                         step=1,
                         label=lg_conf["max_sk"],
                     )
                         SoniTr.tts_info.tts_list(),
                         value="en-US-AndrewMultilingualNeural-Male",
                         label=lg_conf["sk2"],
+                        visible=True,
                         interactive=True,
                     )
                     tts_voice02 = gr.Dropdown(
                         with gr.Accordion(
                             lg_conf["extra_setting"], open=False
                         ):
+                            # Add the new video acceleration rate regulation option
+                            video_acceleration_rate_regulation_gui = gr.Checkbox(
+                                False,
+                                label="Video Acceleration Rate Regulation",
+                                info="Enable this option to regulate the video segments rate to match the translated audio segments length and regulate overall video length.",
+                            )
                             audio_accelerate = gr.Slider(
                                 label=lg_conf["acc_max_label"],
                                 value=1.9,
                     edit_sub_check = gr.Checkbox(
                         label=lg_conf["edit_sub_label"],
                         info=lg_conf["edit_sub_info"],
+                        interactive=True,  # Always enable the checkbox
                     )
                     dummy_false_check = gr.Checkbox(
                         False,
             class Logger:
                 def __init__(self, filename):
+                    this.terminal = sys.stdout
+                    this.log = open(filename, "w")
                 def write(self, message):
+                    this.terminal.write(message)
+                    this.log.write(message)
                 def flush(self):
+                    this.terminal.flush()
+                    this.log.flush()
                 def isatty(self):
                     return False
                 AUDIO_MIX,
                 audio_accelerate,
                 acceleration_rate_regulation_gui,
+                video_acceleration_rate_regulation_gui,  # New option
                 volume_original_mix,
                 volume_translated_mix,
                 sub_type_output,
                 AUDIO_MIX,
                 audio_accelerate,
                 acceleration_rate_regulation_gui,
+                video_acceleration_rate_regulation_gui,  # New option
                 volume_original_mix,
                 volume_translated_mix,
                 sub_type_output,
     app.launch(
         max_threads=1,
+        share=True,
         show_error=True,
         quiet=False,
         debug=(True if logger.isEnabledFor(logging.DEBUG) else False),