whisper-webui-translate

Runtime error

App Files Files Community

SoybeanMilk commited on Dec 17, 2023

Commit

0052b96

1 Parent(s): a87bad5

Upload 2 files

Browse files

Files changed (2) hide show

app.py +71 -37
config.json5 +50 -1

app.py CHANGED Viewed

@@ -40,7 +40,7 @@ from src.whisper.whisperFactory import create_whisper_container
 from src.translation.translationModel import TranslationModel
 from src.translation.translationLangs import (TranslationLang,
                                               _TO_LANG_CODE_WHISPER, get_lang_whisper_names, get_lang_from_whisper_name, get_lang_from_whisper_code,
-                                              get_lang_nllb_names, get_lang_from_nllb_name, get_lang_m2m100_names, get_lang_from_m2m100_name)
 import shutil
 import zhconv
 import tqdm
@@ -233,6 +233,8 @@ class WhisperTranscriber:
             mt5LangName:      str = decodeOptions.pop("mt5LangName")
             ALMAModelName:    str = decodeOptions.pop("ALMAModelName")
             ALMALangName:     str = decodeOptions.pop("ALMALangName")
             translationBatchSize:         int = decodeOptions.pop("translationBatchSize")
             translationNoRepeatNgramSize: int = decodeOptions.pop("translationNoRepeatNgramSize")
@@ -250,6 +252,7 @@ class WhisperTranscriber:
             vadPadding:          float = decodeOptions.pop("vadPadding", self.app_config.vad_padding)
             vadPromptWindow:     float = decodeOptions.pop("vadPromptWindow", self.app_config.vad_prompt_window)
             vadInitialPromptMode: str  = decodeOptions.pop("vadInitialPromptMode", self.app_config.vad_initial_prompt_mode)
             diarization:              bool = decodeOptions.pop("diarization", False)
             diarization_speakers:     int  = decodeOptions.pop("diarization_speakers", 2)
@@ -267,19 +270,22 @@ class WhisperTranscriber:
             if whisperNoRepeatNgramSize is not None and whisperNoRepeatNgramSize <= 1:
                 decodeOptions.pop("no_repeat_ngram_size")
-            # word_timestamps                   = options.get("word_timestamps", False)
-            # condition_on_previous_text        = options.get("condition_on_previous_text", False)
-            # prepend_punctuations              = options.get("prepend_punctuations", None)
-            # append_punctuations               = options.get("append_punctuations", None)
-            # initial_prompt                    = options.get("initial_prompt", None)
-            # best_of                           = options.get("best_of", None)
-            # beam_size                         = options.get("beam_size", None)
-            # patience                          = options.get("patience", None)
-            # length_penalty                    = options.get("length_penalty", None)
-            # suppress_tokens                   = options.get("suppress_tokens", None)
-            # compression_ratio_threshold       = options.get("compression_ratio_threshold", None)
-            # logprob_threshold                 = options.get("logprob_threshold", None)
             vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow, vadInitialPromptMode)
@@ -340,6 +346,10 @@ class WhisperTranscriber:
                     selectedModelName = ALMAModelName if ALMAModelName is not None and len(ALMAModelName) > 0 else "ALMA-13B-GPTQ/TheBloke"
                     selectedModel = next((modelConfig for modelConfig in self.app_config.models["ALMA"] if modelConfig.name == selectedModelName), None)
                     translationLang = get_lang_from_m2m100_name(ALMALangName)
                 if translationLang is not None:
                     translationModel = TranslationModel(modelConfig=selectedModel, whisperLang=whisperLang, translationLang=translationLang, batchSize=translationBatchSize, noRepeatNgramSize=translationNoRepeatNgramSize, numBeams=translationNumBeams)
@@ -384,7 +394,7 @@ class WhisperTranscriber:
                     # Transcribe
                     result = self.transcribe_file(model, source.source_path, whisperLangCode, task, vadOptions, scaled_progress_listener, **decodeOptions)
-                    if whisperLang is None and result["language"] is not None and len(result["language"]) > 0:
                         whisperLang = get_lang_from_whisper_code(result["language"])
                         translationModel.whisperLang = whisperLang
@@ -413,7 +423,7 @@ class WhisperTranscriber:
                             out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
                             outRsult = out.run(overwrite_output=True)
                         except Exception as e:
-                            # Ignore error - it's just a cleanup
                             print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
                     elif self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
                         print("Saving downloaded file [" + source.source_name + "]")
@@ -421,7 +431,7 @@ class WhisperTranscriber:
                             save_path = os.path.join(self.app_config.output_dir, filePrefix)
                             shutil.copy(source.source_path, save_path + suffix)
                         except Exception as e:
-                            # Ignore error - it's just a cleanup
                             print("Error saving downloaded file: \n" + source.source_path + ", \n" + str(e))
                     if len(sources) > 1:
@@ -473,7 +483,7 @@ class WhisperTranscriber:
                         try:
                             os.remove(source.source_path)
                         except Exception as e:
-                            # Ignore error - it's just a cleanup
                             print("Error deleting temporary source file: \n" + source.source_path + ", \n" + str(e))
         except ExceededMaximumDuration as e:
@@ -619,7 +629,7 @@ class WhisperTranscriber:
     def _create_silero_config(self, non_speech_strategy: NonSpeechStrategy, vadOptions: VadOptions):
         # Use Silero VAD
         if (self.vad_model is None):
-            self.vad_model = VadSileroTranscription()
         config = TranscriptionConfig(non_speech_strategy = non_speech_strategy,
                 max_silent_period=vadOptions.vadMergeWindow, max_merge_size=vadOptions.vadMaxMergeSize,
@@ -661,7 +671,6 @@ class WhisperTranscriber:
                 print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
             except Exception as e:
-                # Ignore error - it's just a cleanup
                 print(traceback.format_exc())
                 print("Error process segments: " + str(e))
@@ -771,8 +780,15 @@ class WhisperTranscriber:
             self.diarization = None
 def create_ui(app_config: ApplicationConfig):
     optionsMd: str = None
     readmeMd: str = None
     try:
         optionsPath = pathlib.Path("docs/options.md")
         with open(optionsPath, "r", encoding="utf-8") as optionsFile:
@@ -817,23 +833,16 @@ def create_ui(app_config: ApplicationConfig):
         uiDescription += "\n\n" + "Max audio file length: " + str(app_config.input_audio_max_duration) + " s"
     uiArticle = "Read the [documentation here](https://gitlab.com/aadnk/whisper-webui/-/blob/main/docs/options.md)."
-    uiArticle += "\n\nWhisper's Task 'translate' only implements the functionality of translating other languages into English. "
-    uiArticle += "OpenAI does not guarantee translations between arbitrary languages. In such cases, you can choose to use the NLLB Model to implement the translation task. "
-    uiArticle += "However, it's important to note that the NLLB Model runs slowly, and the completion time may be twice as long as usual. "
-    uiArticle += "\n\nThe larger the parameters of the NLLB model, the better its performance is expected to be. "
-    uiArticle += "However, it also requires higher computational resources, making it slower to operate. "
-    uiArticle += "On the other hand, the version converted from ct2 (CTranslate2) requires lower resources and operates at a faster speed."
-    uiArticle += "\n\nCurrently, enabling word-level timestamps cannot be used in conjunction with NLLB Model translation "
-    uiArticle += "because Word Timestamps will split the source text, and after translation, it becomes a non-word-level string. "
-    uiArticle += "\n\nThe 'mt5-zh-ja-en-trimmed' model is finetuned from Google's 'mt5-base' model. "
-    uiArticle += "This model has a relatively good translation speed, but it only supports three languages: Chinese, Japanese, and English. "
     whisper_models = app_config.get_model_names("whisper")
     nllb_models = app_config.get_model_names("nllb")
     m2m100_models = app_config.get_model_names("m2m100")
     mt5_models = app_config.get_model_names("mt5")
     ALMA_models = app_config.get_model_names("ALMA")
     common_whisper_inputs = lambda : {
         gr.Dropdown(label="Whisper - Model (for audio)", choices=whisper_models, value=app_config.default_model_name, elem_id="whisperModelName"),
         gr.Dropdown(label="Whisper - Language", choices=sorted(get_lang_whisper_names()), value=app_config.language, elem_id="whisperLangName"),
@@ -852,7 +861,11 @@ def create_ui(app_config: ApplicationConfig):
     }
     common_ALMA_inputs = lambda : {
         gr.Dropdown(label="ALMA - Model (for translate)", choices=ALMA_models, elem_id="ALMAModelName"),
-        gr.Dropdown(label="ALMA - Language", choices=sorted(get_lang_m2m100_names(["en", "ja", "zh"])), elem_id="ALMALangName"),
     }
     common_translation_inputs = lambda : {
@@ -865,6 +878,7 @@ def create_ui(app_config: ApplicationConfig):
         gr.Dropdown(choices=["none", "silero-vad", "silero-vad-skip-gaps", "silero-vad-expand-into-gaps", "periodic-vad"], value=app_config.default_vad, label="VAD", elem_id="vad"),
         gr.Number(label="VAD - Merge Window (s)", precision=0, value=app_config.vad_merge_window, elem_id="vadMergeWindow"),
         gr.Number(label="VAD - Max Merge Size (s)", precision=0, value=app_config.vad_max_merge_size, elem_id="vadMaxMergeSize"),
     }
     common_word_timestamps_inputs = lambda : {
@@ -917,10 +931,14 @@ def create_ui(app_config: ApplicationConfig):
                     with gr.Tab(label="ALMA") as simpleALMATab:
                         with gr.Row():
                             simpleInputDict.update(common_ALMA_inputs())
                     simpleM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [simpleTranslateInput] )
                     simpleNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [simpleTranslateInput] )
                     simpleMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [simpleTranslateInput] )
                     simpleALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [simpleTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as simpleUrlTab:
                         simpleInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
@@ -942,8 +960,10 @@ def create_ui(app_config: ApplicationConfig):
                         simpleInputDict.update(common_translation_inputs())
             with gr.Column():
                 simpleOutput = common_output()
-        with gr.Accordion("Article"):
-            gr.Markdown(uiArticle)
         if optionsMd is not None:
             with gr.Accordion("docs/options.md", open=False):
                 gr.Markdown(optionsMd)
@@ -957,7 +977,7 @@ def create_ui(app_config: ApplicationConfig):
     fullInputDict = {}
     fullDescription = uiDescription + "\n\n\n\n" + "Be careful when changing some of the options in the full interface - this can cause the model to crash."
     with gr.Blocks() as fullTranscribe:
         fullTranslateInput = gr.State(value="m2m100", elem_id = "translateInput")
         fullSourceInput = gr.State(value="urlData", elem_id = "sourceInput")
@@ -980,10 +1000,14 @@ def create_ui(app_config: ApplicationConfig):
                     with gr.Tab(label="ALMA") as fullALMATab:
                         with gr.Row():
                             fullInputDict.update(common_ALMA_inputs())
                     fullM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [fullTranslateInput] )
                     fullNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [fullTranslateInput] )
                     fullMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [fullTranslateInput] )
                     fullALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [fullTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as fullUrlTab:
                         fullInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
@@ -1013,7 +1037,7 @@ def create_ui(app_config: ApplicationConfig):
                             gr.Number(label="Best Of - Non-zero temperature", value=app_config.best_of, precision=0, elem_id = "best_of"),
                             gr.Number(label="Beam Size - Zero temperature", value=app_config.beam_size, precision=0, elem_id = "beam_size"),
                             gr.Number(label="Patience - Zero temperature", value=app_config.patience, elem_id = "patience"),
-                            gr.Number(label="Length Penalty - Any temperature", value=app_config.length_penalty, elem_id = "length_penalty"),
                             gr.Text(label="Suppress Tokens - Comma-separated list of token IDs", value=app_config.suppress_tokens, elem_id = "suppress_tokens"),
                             gr.Checkbox(label="Condition on previous text", value=app_config.condition_on_previous_text, elem_id = "condition_on_previous_text"),
                             gr.Checkbox(label="FP16", value=app_config.fp16, elem_id = "fp16"),
@@ -1054,7 +1078,7 @@ def create_ui(app_config: ApplicationConfig):
         print("Queue mode enabled (concurrency count: " + str(app_config.queue_concurrency_count) + ")")
     else:
         print("Queue mode disabled - progress bars will not be shown.")
     demo.launch(inbrowser=app_config.autolaunch, share=app_config.share, server_name=app_config.server_name, server_port=app_config.server_port)
     # Clean up
@@ -1136,6 +1160,16 @@ if __name__ == '__main__':
     # updated_config.autolaunch = True
     # updated_config.auto_parallel = False
     # updated_config.save_downloaded_files = True
     if (threads := args.pop("threads")) > 0:
         torch.set_num_threads(threads)

 from src.translation.translationModel import TranslationModel
 from src.translation.translationLangs import (TranslationLang,
                                               _TO_LANG_CODE_WHISPER, get_lang_whisper_names, get_lang_from_whisper_name, get_lang_from_whisper_code,
+                                              get_lang_nllb_names, get_lang_from_nllb_name, get_lang_m2m100_names, get_lang_from_m2m100_name, sort_lang_by_whisper_codes)
 import shutil
 import zhconv
 import tqdm
             mt5LangName:      str = decodeOptions.pop("mt5LangName")
             ALMAModelName:    str = decodeOptions.pop("ALMAModelName")
             ALMALangName:     str = decodeOptions.pop("ALMALangName")
+            madlad400ModelName:    str = decodeOptions.pop("madlad400ModelName")
+            madlad400LangName:     str = decodeOptions.pop("madlad400LangName")
             translationBatchSize:         int = decodeOptions.pop("translationBatchSize")
             translationNoRepeatNgramSize: int = decodeOptions.pop("translationNoRepeatNgramSize")
             vadPadding:          float = decodeOptions.pop("vadPadding", self.app_config.vad_padding)
             vadPromptWindow:     float = decodeOptions.pop("vadPromptWindow", self.app_config.vad_prompt_window)
             vadInitialPromptMode: str  = decodeOptions.pop("vadInitialPromptMode", self.app_config.vad_initial_prompt_mode)
+            self.vad_process_timeout:    float = decodeOptions.pop("vadPocessTimeout", self.vad_process_timeout)
             diarization:              bool = decodeOptions.pop("diarization", False)
             diarization_speakers:     int  = decodeOptions.pop("diarization_speakers", 2)
             if whisperNoRepeatNgramSize is not None and whisperNoRepeatNgramSize <= 1:
                 decodeOptions.pop("no_repeat_ngram_size")
+            for key, value in list(decodeOptions.items()):
+                if value == "":
+                    del decodeOptions[key]
+            # word_timestamps             = decodeOptions.get("word_timestamps", False)
+            # condition_on_previous_text  = decodeOptions.get("condition_on_previous_text", False)
+            # prepend_punctuations        = decodeOptions.get("prepend_punctuations", None)
+            # append_punctuations         = decodeOptions.get("append_punctuations", None)
+            # initial_prompt              = decodeOptions.get("initial_prompt", None)
+            # best_of                     = decodeOptions.get("best_of", None)
+            # beam_size                   = decodeOptions.get("beam_size", None)
+            # patience                    = decodeOptions.get("patience", None)
+            # length_penalty              = decodeOptions.get("length_penalty", None)
+            # suppress_tokens             = decodeOptions.get("suppress_tokens", None)
+            # compression_ratio_threshold = decodeOptions.get("compression_ratio_threshold", None)
+            # logprob_threshold           = decodeOptions.get("logprob_threshold", None)
             vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow, vadInitialPromptMode)
                     selectedModelName = ALMAModelName if ALMAModelName is not None and len(ALMAModelName) > 0 else "ALMA-13B-GPTQ/TheBloke"
                     selectedModel = next((modelConfig for modelConfig in self.app_config.models["ALMA"] if modelConfig.name == selectedModelName), None)
                     translationLang = get_lang_from_m2m100_name(ALMALangName)
+                elif translateInput == "madlad400" and madlad400LangName is not None and len(madlad400LangName) > 0:
+                    selectedModelName = madlad400ModelName if madlad400ModelName is not None and len(madlad400ModelName) > 0 else "madlad400-10b-mt-ct2-int8_float16"
+                    selectedModel = next((modelConfig for modelConfig in self.app_config.models["madlad400"] if modelConfig.name == selectedModelName), None)
+                    translationLang = get_lang_from_m2m100_name(madlad400LangName)
                 if translationLang is not None:
                     translationModel = TranslationModel(modelConfig=selectedModel, whisperLang=whisperLang, translationLang=translationLang, batchSize=translationBatchSize, noRepeatNgramSize=translationNoRepeatNgramSize, numBeams=translationNumBeams)
                     # Transcribe
                     result = self.transcribe_file(model, source.source_path, whisperLangCode, task, vadOptions, scaled_progress_listener, **decodeOptions)
+                    if translationModel is not None and whisperLang is None and result["language"] is not None and len(result["language"]) > 0:
                         whisperLang = get_lang_from_whisper_code(result["language"])
                         translationModel.whisperLang = whisperLang
                             out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
                             outRsult = out.run(overwrite_output=True)
                         except Exception as e:
+                            print(traceback.format_exc())
                             print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
                     elif self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
                         print("Saving downloaded file [" + source.source_name + "]")
                             save_path = os.path.join(self.app_config.output_dir, filePrefix)
                             shutil.copy(source.source_path, save_path + suffix)
                         except Exception as e:
+                            print(traceback.format_exc())
                             print("Error saving downloaded file: \n" + source.source_path + ", \n" + str(e))
                     if len(sources) > 1:
                         try:
                             os.remove(source.source_path)
                         except Exception as e:
+                            print(traceback.format_exc())
                             print("Error deleting temporary source file: \n" + source.source_path + ", \n" + str(e))
         except ExceededMaximumDuration as e:
     def _create_silero_config(self, non_speech_strategy: NonSpeechStrategy, vadOptions: VadOptions):
         # Use Silero VAD
         if (self.vad_model is None):
+            self.vad_model = VadSileroTranscription() #vad_model is snakers4/silero-vad
         config = TranscriptionConfig(non_speech_strategy = non_speech_strategy,
                 max_silent_period=vadOptions.vadMergeWindow, max_merge_size=vadOptions.vadMaxMergeSize,
                 print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
             except Exception as e:
                 print(traceback.format_exc())
                 print("Error process segments: " + str(e))
             self.diarization = None
 def create_ui(app_config: ApplicationConfig):
+    translateModelMd: str = None
     optionsMd: str = None
     readmeMd: str = None
+    try:
+        translateModelPath = pathlib.Path("docs/translateModel.md")
+        with open(translateModelPath, "r", encoding="utf-8") as translateModelFile:
+            translateModelMd = translateModelFile.read()
+    except Exception as e:
+        print("Error occurred during read translateModel.md file: ", str(e))
     try:
         optionsPath = pathlib.Path("docs/options.md")
         with open(optionsPath, "r", encoding="utf-8") as optionsFile:
         uiDescription += "\n\n" + "Max audio file length: " + str(app_config.input_audio_max_duration) + " s"
     uiArticle = "Read the [documentation here](https://gitlab.com/aadnk/whisper-webui/-/blob/main/docs/options.md)."
     whisper_models = app_config.get_model_names("whisper")
     nllb_models = app_config.get_model_names("nllb")
     m2m100_models = app_config.get_model_names("m2m100")
     mt5_models = app_config.get_model_names("mt5")
     ALMA_models = app_config.get_model_names("ALMA")
+    madlad400_models = app_config.get_model_names("madlad400")
+    if not torch.cuda.is_available(): #Due to the poor support of GPTQ for CPUs, the execution time per iteration exceeds a thousand seconds when operating on a CPU. Therefore, when the system does not support a GPU, the GPTQ model is removed from the list.
+        ALMA_models = list(filter(lambda alma: "GPTQ" not in alma, ALMA_models))
     common_whisper_inputs = lambda : {
         gr.Dropdown(label="Whisper - Model (for audio)", choices=whisper_models, value=app_config.default_model_name, elem_id="whisperModelName"),
         gr.Dropdown(label="Whisper - Language", choices=sorted(get_lang_whisper_names()), value=app_config.language, elem_id="whisperLangName"),
     }
     common_ALMA_inputs = lambda : {
         gr.Dropdown(label="ALMA - Model (for translate)", choices=ALMA_models, elem_id="ALMAModelName"),
+        gr.Dropdown(label="ALMA - Language", choices=sort_lang_by_whisper_codes(["en", "de", "cs", "is", "ru", "zh", "ja"]), elem_id="ALMALangName"),
+    }
+    common_madlad400_inputs = lambda : {
+        gr.Dropdown(label="madlad400 - Model (for translate)", choices=madlad400_models, elem_id="madlad400ModelName"),
+        gr.Dropdown(label="madlad400 - Language", choices=sorted(get_lang_m2m100_names()), elem_id="madlad400LangName"),
     }
     common_translation_inputs = lambda : {
         gr.Dropdown(choices=["none", "silero-vad", "silero-vad-skip-gaps", "silero-vad-expand-into-gaps", "periodic-vad"], value=app_config.default_vad, label="VAD", elem_id="vad"),
         gr.Number(label="VAD - Merge Window (s)", precision=0, value=app_config.vad_merge_window, elem_id="vadMergeWindow"),
         gr.Number(label="VAD - Max Merge Size (s)", precision=0, value=app_config.vad_max_merge_size, elem_id="vadMaxMergeSize"),
+        gr.Number(label="VAD - Process Timeout (s)", precision=0, value=app_config.vad_process_timeout, elem_id="vadPocessTimeout"),
     }
     common_word_timestamps_inputs = lambda : {
                     with gr.Tab(label="ALMA") as simpleALMATab:
                         with gr.Row():
                             simpleInputDict.update(common_ALMA_inputs())
+                    with gr.Tab(label="madlad400") as simplemadlad400Tab:
+                        with gr.Row():
+                            simpleInputDict.update(common_madlad400_inputs())
                     simpleM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [simpleTranslateInput] )
                     simpleNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [simpleTranslateInput] )
                     simpleMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [simpleTranslateInput] )
                     simpleALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [simpleTranslateInput] )
+                    simplemadlad400Tab.select(fn=lambda: "madlad400", inputs = [], outputs= [simpleTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as simpleUrlTab:
                         simpleInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
                         simpleInputDict.update(common_translation_inputs())
             with gr.Column():
                 simpleOutput = common_output()
+        gr.Markdown(uiArticle)
+        if translateModelMd is not None:
+            with gr.Accordion("docs/translateModel.md", open=False):
+                gr.Markdown(translateModelMd)
         if optionsMd is not None:
             with gr.Accordion("docs/options.md", open=False):
                 gr.Markdown(optionsMd)
     fullInputDict = {}
     fullDescription = uiDescription + "\n\n\n\n" + "Be careful when changing some of the options in the full interface - this can cause the model to crash."
     with gr.Blocks() as fullTranscribe:
         fullTranslateInput = gr.State(value="m2m100", elem_id = "translateInput")
         fullSourceInput = gr.State(value="urlData", elem_id = "sourceInput")
                     with gr.Tab(label="ALMA") as fullALMATab:
                         with gr.Row():
                             fullInputDict.update(common_ALMA_inputs())
+                    with gr.Tab(label="madlad400") as fullmadlad400Tab:
+                        with gr.Row():
+                            fullInputDict.update(common_madlad400_inputs())
                     fullM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [fullTranslateInput] )
                     fullNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [fullTranslateInput] )
                     fullMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [fullTranslateInput] )
                     fullALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [fullTranslateInput] )
+                    fullmadlad400Tab.select(fn=lambda: "madlad400", inputs = [], outputs= [fullTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as fullUrlTab:
                         fullInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
                             gr.Number(label="Best Of - Non-zero temperature", value=app_config.best_of, precision=0, elem_id = "best_of"),
                             gr.Number(label="Beam Size - Zero temperature", value=app_config.beam_size, precision=0, elem_id = "beam_size"),
                             gr.Number(label="Patience - Zero temperature", value=app_config.patience, elem_id = "patience"),
+                            gr.Number(label="Length Penalty - Any temperature", value=lambda : None if app_config.length_penalty is None else app_config.length_penalty, elem_id = "length_penalty"),
                             gr.Text(label="Suppress Tokens - Comma-separated list of token IDs", value=app_config.suppress_tokens, elem_id = "suppress_tokens"),
                             gr.Checkbox(label="Condition on previous text", value=app_config.condition_on_previous_text, elem_id = "condition_on_previous_text"),
                             gr.Checkbox(label="FP16", value=app_config.fp16, elem_id = "fp16"),
         print("Queue mode enabled (concurrency count: " + str(app_config.queue_concurrency_count) + ")")
     else:
         print("Queue mode disabled - progress bars will not be shown.")
     demo.launch(inbrowser=app_config.autolaunch, share=app_config.share, server_name=app_config.server_name, server_port=app_config.server_port)
     # Clean up
     # updated_config.autolaunch = True
     # updated_config.auto_parallel = False
     # updated_config.save_downloaded_files = True
+    try:
+        if torch.cuda.is_available():
+            deviceId = torch.cuda.current_device()
+            totalVram = torch.cuda.get_device_properties(deviceId).total_memory
+            if totalVram/(1024*1024*1024) <= 4: #VRAM <= 4 GB
+                updated_config.vad_process_timeout = 0
+    except Exception as e:
+        print(traceback.format_exc())
+        print("Error detect vram: " + str(e))
     if (threads := args.pop("threads")) > 0:
         torch.set_num_threads(threads)

config.json5 CHANGED Viewed

@@ -23,6 +23,10 @@
         "name": "large",
         "url": "large"
       },
       {
         "name": "large-v2",
         "url": "large-v2"
@@ -189,10 +193,55 @@
       }
     ],
     "ALMA": [
       {
         "name": "ALMA-13B-GPTQ/TheBloke",
         "url": "TheBloke/ALMA-13B-GPTQ",
-        "type": "huggingface",
       },
     ]
   },

         "name": "large",
         "url": "large"
       },
+      {
+        "name": "large-v1",
+        "url": "large-v1"
+      },
       {
         "name": "large-v2",
         "url": "large-v2"
       }
     ],
     "ALMA": [
+      {
+        "name": "ALMA-7B-GPTQ/TheBloke",
+        "url": "TheBloke/ALMA-7B-GPTQ",
+        "type": "huggingface"
+      },
       {
         "name": "ALMA-13B-GPTQ/TheBloke",
         "url": "TheBloke/ALMA-13B-GPTQ",
+        "type": "huggingface"
+      },
+      {
+        "name": "ALMA-7B-GGUF-Q4_K_M/TheBloke",
+        "url": "TheBloke/ALMA-7B-GGUF",
+        "type": "huggingface",
+        "model_file": "alma-7b.Q4_K_M.gguf",
+        "tokenizer_url": "haoranxu/ALMA-7B"
+      },
+      {
+        "name": "ALMA-13B-GGUF-Q4_K_M/TheBloke",
+        "url": "TheBloke/ALMA-13B-GGUF",
+        "type": "huggingface",
+        "model_file": "alma-13b.Q4_K_M.gguf",
+        "tokenizer_url": "haoranxu/ALMA-13B"
+      },
+      {
+        "name": "ALMA-7B-ct2:int8_float16/avan",
+        "url": "avans06/ALMA-7B-ct2-int8_float16",
+        "type": "huggingface",
+        "tokenizer_url": "haoranxu/ALMA-7B"
+      },
+      {
+        "name": "ALMA-13B-ct2:int8_float16/avan",
+        "url": "avans06/ALMA-13B-ct2-int8_float16",
+        "type": "huggingface",
+        "tokenizer_url": "haoranxu/ALMA-13B"
+      },
+    ],
+    "madlad400": [
+      {
+        "name": "madlad400-3b-mt-ct2-int8_float16/SoybeanMilk",
+        "url": "SoybeanMilk/madlad400-3b-mt-ct2-int8_float16",
+        "type": "huggingface",
+        "tokenizer_url": "jbochi/madlad400-3b-mt"
+      },
+      {
+        "name": "madlad400-10b-mt-ct2-int8_float16/SoybeanMilk",
+        "url": "SoybeanMilk/madlad400-10b-mt-ct2-int8_float16",
+        "type": "huggingface",
+        "tokenizer_url": "jbochi/madlad400-10b-mt"
       },
     ]
   },