whisper-webui-translate

Sleeping

App Files Files Community

SoybeanMilk commited on Dec 17, 2023

Commit

4e2f72e

1 Parent(s): 2def7a1

Add madlad400 support.

Browse files

Files changed (4) hide show

app.py +19 -0
config.json5 +14 -0
src/config.py +2 -2
src/translation/translationModel.py +11 -1

app.py CHANGED Viewed

@@ -233,6 +233,8 @@ class WhisperTranscriber:
             mt5LangName:      str = decodeOptions.pop("mt5LangName")
             ALMAModelName:    str = decodeOptions.pop("ALMAModelName")
             ALMALangName:     str = decodeOptions.pop("ALMALangName")
             translationBatchSize:         int = decodeOptions.pop("translationBatchSize")
             translationNoRepeatNgramSize: int = decodeOptions.pop("translationNoRepeatNgramSize")
@@ -368,6 +370,10 @@ class WhisperTranscriber:
                     selectedModelName = ALMAModelName if ALMAModelName is not None and len(ALMAModelName) > 0 else "ALMA-13B-GPTQ/TheBloke"
                     selectedModel = next((modelConfig for modelConfig in self.app_config.models["ALMA"] if modelConfig.name == selectedModelName), None)
                     translationLang = get_lang_from_m2m100_name(ALMALangName)
                 if translationLang is not None:
                     translationModel = TranslationModel(modelConfig=selectedModel, whisperLang=whisperLang, translationLang=translationLang, batchSize=translationBatchSize, noRepeatNgramSize=translationNoRepeatNgramSize, numBeams=translationNumBeams)
@@ -929,6 +935,7 @@ def create_ui(app_config: ApplicationConfig):
     m2m100_models = app_config.get_model_names("m2m100")
     mt5_models = app_config.get_model_names("mt5")
     ALMA_models = app_config.get_model_names("ALMA")
     if not torch.cuda.is_available(): #Due to the poor support of GPTQ for CPUs, the execution time per iteration exceeds a thousand seconds when operating on a CPU. Therefore, when the system does not support a GPU, the GPTQ model is removed from the list.
         ALMA_models = list(filter(lambda alma: "GPTQ" not in alma, ALMA_models))
@@ -952,6 +959,10 @@ def create_ui(app_config: ApplicationConfig):
         gr.Dropdown(label="ALMA - Model (for translate)", choices=ALMA_models, elem_id="ALMAModelName"),
         gr.Dropdown(label="ALMA - Language", choices=sort_lang_by_whisper_codes(["en", "de", "cs", "is", "ru", "zh", "ja"]), elem_id="ALMALangName"),
     }
     common_translation_inputs = lambda : {
         gr.Number(label="Translation - Batch Size", precision=0, value=app_config.translation_batch_size, elem_id="translationBatchSize"),
@@ -1036,10 +1047,14 @@ def create_ui(app_config: ApplicationConfig):
                     with gr.Tab(label="ALMA") as simpleALMATab:
                         with gr.Row():
                             simpleInputDict.update(common_ALMA_inputs())
                     simpleM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [simpleTranslateInput] )
                     simpleNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [simpleTranslateInput] )
                     simpleMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [simpleTranslateInput] )
                     simpleALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [simpleTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as simpleUrlTab:
                         simpleInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
@@ -1103,10 +1118,14 @@ def create_ui(app_config: ApplicationConfig):
                     with gr.Tab(label="ALMA") as fullALMATab:
                         with gr.Row():
                             fullInputDict.update(common_ALMA_inputs())
                     fullM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [fullTranslateInput] )
                     fullNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [fullTranslateInput] )
                     fullMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [fullTranslateInput] )
                     fullALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [fullTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as fullUrlTab:
                         fullInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})

             mt5LangName:      str = decodeOptions.pop("mt5LangName")
             ALMAModelName:    str = decodeOptions.pop("ALMAModelName")
             ALMALangName:     str = decodeOptions.pop("ALMALangName")
+            madlad400ModelName: str = decodeOptions.pop("madlad400ModelName")
+            madlad400LangName:  str = decodeOptions.pop("madlad400LangName")
             translationBatchSize:         int = decodeOptions.pop("translationBatchSize")
             translationNoRepeatNgramSize: int = decodeOptions.pop("translationNoRepeatNgramSize")
                     selectedModelName = ALMAModelName if ALMAModelName is not None and len(ALMAModelName) > 0 else "ALMA-13B-GPTQ/TheBloke"
                     selectedModel = next((modelConfig for modelConfig in self.app_config.models["ALMA"] if modelConfig.name == selectedModelName), None)
                     translationLang = get_lang_from_m2m100_name(ALMALangName)
+                elif translateInput == "madlad400" and madlad400LangName is not None and len(madlad400LangName) > 0:
+                    selectedModelName = madlad400ModelName if madlad400ModelName is not None and len(madlad400ModelName) > 0 else "madlad400-10b-mt-ct2-int8_float16"
+                    selectedModel = next((modelConfig for modelConfig in self.app_config.models["madlad400"] if modelConfig.name == selectedModelName), None)
+                    translationLang = get_lang_from_m2m100_name(madlad400LangName)
                 if translationLang is not None:
                     translationModel = TranslationModel(modelConfig=selectedModel, whisperLang=whisperLang, translationLang=translationLang, batchSize=translationBatchSize, noRepeatNgramSize=translationNoRepeatNgramSize, numBeams=translationNumBeams)
     m2m100_models = app_config.get_model_names("m2m100")
     mt5_models = app_config.get_model_names("mt5")
     ALMA_models = app_config.get_model_names("ALMA")
+    madlad400_models = app_config.get_model_names("madlad400")
     if not torch.cuda.is_available(): #Due to the poor support of GPTQ for CPUs, the execution time per iteration exceeds a thousand seconds when operating on a CPU. Therefore, when the system does not support a GPU, the GPTQ model is removed from the list.
         ALMA_models = list(filter(lambda alma: "GPTQ" not in alma, ALMA_models))
         gr.Dropdown(label="ALMA - Model (for translate)", choices=ALMA_models, elem_id="ALMAModelName"),
         gr.Dropdown(label="ALMA - Language", choices=sort_lang_by_whisper_codes(["en", "de", "cs", "is", "ru", "zh", "ja"]), elem_id="ALMALangName"),
     }
+    common_madlad400_inputs = lambda : {
+        gr.Dropdown(label="madlad400 - Model (for translate)", choices=madlad400_models, elem_id="madlad400ModelName"),
+        gr.Dropdown(label="madlad400 - Language", choices=sorted(get_lang_m2m100_names()), elem_id="madlad400LangName"),
+    }
     common_translation_inputs = lambda : {
         gr.Number(label="Translation - Batch Size", precision=0, value=app_config.translation_batch_size, elem_id="translationBatchSize"),
                     with gr.Tab(label="ALMA") as simpleALMATab:
                         with gr.Row():
                             simpleInputDict.update(common_ALMA_inputs())
+                    with gr.Tab(label="madlad400") as simplemadlad400Tab:
+                        with gr.Row():
+                            simpleInputDict.update(common_madlad400_inputs())
                     simpleM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [simpleTranslateInput] )
                     simpleNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [simpleTranslateInput] )
                     simpleMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [simpleTranslateInput] )
                     simpleALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [simpleTranslateInput] )
+                    simplemadlad400Tab.select(fn=lambda: "madlad400", inputs = [], outputs= [simpleTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as simpleUrlTab:
                         simpleInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
                     with gr.Tab(label="ALMA") as fullALMATab:
                         with gr.Row():
                             fullInputDict.update(common_ALMA_inputs())
+                    with gr.Tab(label="madlad400") as fullmadlad400Tab:
+                        with gr.Row():
+                            fullInputDict.update(common_madlad400_inputs())
                     fullM2M100Tab.select(fn=lambda: "m2m100", inputs = [], outputs= [fullTranslateInput] )
                     fullNllbTab.select(fn=lambda: "nllb", inputs = [], outputs= [fullTranslateInput] )
                     fullMT5Tab.select(fn=lambda: "mt5", inputs = [], outputs= [fullTranslateInput] )
                     fullALMATab.select(fn=lambda: "ALMA", inputs = [], outputs= [fullTranslateInput] )
+                    fullmadlad400Tab.select(fn=lambda: "madlad400", inputs = [], outputs= [fullTranslateInput] )
                 with gr.Column():
                     with gr.Tab(label="URL") as fullUrlTab:
                         fullInputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})

config.json5 CHANGED Viewed

@@ -229,6 +229,20 @@
         "type": "huggingface",
         "tokenizer_url": "haoranxu/ALMA-13B"
       },
     ]
   },
   // Configuration options that will be used if they are not specified in the command line arguments.

         "type": "huggingface",
         "tokenizer_url": "haoranxu/ALMA-13B"
       },
+    ],
+    "madlad400": [
+      {
+        "name": "madlad400-3b-mt-ct2-int8_float16/SoybeanMilk",
+        "url": "SoybeanMilk/madlad400-3b-mt-ct2-int8_float16",
+        "type": "huggingface",
+        "tokenizer_url": "jbochi/madlad400-3b-mt"
+      },
+      {
+        "name": "madlad400-10b-mt-ct2-int8_float16/SoybeanMilk",
+        "url": "SoybeanMilk/madlad400-10b-mt-ct2-int8_float16",
+        "type": "huggingface",
+        "tokenizer_url": "jbochi/madlad400-10b-mt"
+      },
     ]
   },
   // Configuration options that will be used if they are not specified in the command line arguments.

src/config.py CHANGED Viewed

@@ -50,7 +50,7 @@ class VadInitialPromptMode(Enum):
             return None
 class ApplicationConfig:
-    def __init__(self, models: Dict[Literal["whisper", "m2m100", "nllb", "mt5", "ALMA"], List[ModelConfig]],
                  input_audio_max_duration: int = 600, share: bool = False, server_name: str = None, server_port: int = 7860,
                  queue_concurrency_count: int = 1, delete_uploaded_files: bool = True,
                  whisper_implementation: str = "whisper", default_model_name: str = "medium",
@@ -181,7 +181,7 @@ class ApplicationConfig:
             # Load using json5
             data = json5.load(f)
             data_models = data.pop("models", [])
-            models: Dict[Literal["whisper", "m2m100", "nllb", "mt5", "ALMA"], List[ModelConfig]] = {
                 key: [ModelConfig(**item) for item in value]
                 for key, value in data_models.items()
             }

             return None
 class ApplicationConfig:
+    def __init__(self, models: Dict[Literal["whisper", "m2m100", "nllb", "mt5", "ALMA", "madlad400"], List[ModelConfig]],
                  input_audio_max_duration: int = 600, share: bool = False, server_name: str = None, server_port: int = 7860,
                  queue_concurrency_count: int = 1, delete_uploaded_files: bool = True,
                  whisper_implementation: str = "whisper", default_model_name: str = "medium",
             # Load using json5
             data = json5.load(f)
             data_models = data.pop("models", [])
+            models: Dict[Literal["whisper", "m2m100", "nllb", "mt5", "ALMA", "madlad400"], List[ModelConfig]] = {
                 key: [ModelConfig(**item) for item in value]
                 for key, value in data_models.items()
             }

src/translation/translationModel.py CHANGED Viewed

@@ -159,6 +159,10 @@ class TranslationModel:
                     self.transTokenizer = transformers.AutoTokenizer.from_pretrained(self.modelConfig.tokenizer_url if self.modelConfig.tokenizer_url is not None and len(self.modelConfig.tokenizer_url) > 0 else self.modelPath)
                     self.ALMAPrefix = "Translate this from " + self.whisperLang.whisper.names[0] + " to " + self.translationLang.whisper.names[0] + ":\n" + self.whisperLang.whisper.names[0] + ": "
                     self.transModel = ctranslate2.Generator(self.modelPath, compute_type="auto", device=self.device)
             elif "mt5" in self.modelPath:
                 self.mt5Prefix = self.whisperLang.whisper.code + "2" + self.translationLang.whisper.code + ": "
                 self.transTokenizer = transformers.T5Tokenizer.from_pretrained(self.modelPath, legacy=False) #requires spiece.model
@@ -277,6 +281,11 @@ class TranslationModel:
                     output = self.transModel.generate_batch([source], max_length=max_length, max_batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, beam_size=self.numBeams, sampling_temperature=0.7, sampling_topp=0.9, repetition_penalty=1.1, include_prompt_in_result=False) #, sampling_topk=40
                     target = output[0]
                     result = self.transTokenizer.decode(target.sequences_ids[0])
             elif "mt5" in self.modelPath:
                 output = self.transTranslator(self.mt5Prefix + text, max_length=max_length, batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, num_beams=self.numBeams) #, num_return_sequences=2
                 result = output[0]['generated_text']
@@ -299,7 +308,8 @@ class TranslationModel:
 _MODELS = ["nllb-200",
            "m2m100",
            "mt5",
-           "ALMA"]
 def check_model_name(name):
     return any(allowed_name in name for allowed_name in _MODELS)

                     self.transTokenizer = transformers.AutoTokenizer.from_pretrained(self.modelConfig.tokenizer_url if self.modelConfig.tokenizer_url is not None and len(self.modelConfig.tokenizer_url) > 0 else self.modelPath)
                     self.ALMAPrefix = "Translate this from " + self.whisperLang.whisper.names[0] + " to " + self.translationLang.whisper.names[0] + ":\n" + self.whisperLang.whisper.names[0] + ": "
                     self.transModel = ctranslate2.Generator(self.modelPath, compute_type="auto", device=self.device)
+                elif "madlad400" in self.modelPath:
+                    self.madlad400Prefix = "<2" + self.translationLang.whisper.code + "> "
+                    self.transTokenizer = transformers.AutoTokenizer.from_pretrained(self.modelConfig.tokenizer_url if self.modelConfig.tokenizer_url is not None and len(self.modelConfig.tokenizer_url) > 0 else self.modelPath, src_lang=self.whisperLang.m2m100.code)
+                    self.transModel = ctranslate2.Translator(self.modelPath, compute_type="auto", device=self.device)
             elif "mt5" in self.modelPath:
                 self.mt5Prefix = self.whisperLang.whisper.code + "2" + self.translationLang.whisper.code + ": "
                 self.transTokenizer = transformers.T5Tokenizer.from_pretrained(self.modelPath, legacy=False) #requires spiece.model
                     output = self.transModel.generate_batch([source], max_length=max_length, max_batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, beam_size=self.numBeams, sampling_temperature=0.7, sampling_topp=0.9, repetition_penalty=1.1, include_prompt_in_result=False) #, sampling_topk=40
                     target = output[0]
                     result = self.transTokenizer.decode(target.sequences_ids[0])
+                elif "madlad400" in self.modelPath:
+                    source = self.transTokenizer.convert_ids_to_tokens(self.transTokenizer.encode(self.madlad400Prefix + text))
+                    output = self.transModel.translate_batch([source], max_batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, beam_size=self.numBeams)
+                    target = output[0].hypotheses[0]
+                    result = self.transTokenizer.decode(self.transTokenizer.convert_tokens_to_ids(target))
             elif "mt5" in self.modelPath:
                 output = self.transTranslator(self.mt5Prefix + text, max_length=max_length, batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, num_beams=self.numBeams) #, num_return_sequences=2
                 result = output[0]['generated_text']
 _MODELS = ["nllb-200",
            "m2m100",
            "mt5",
+           "ALMA",
+           "madlad400"]
 def check_model_name(name):
     return any(allowed_name in name for allowed_name in _MODELS)