Spaces:

TiberiuCristianLeon
/

StreamlitTranslate

Running

App Files Files

TiberiuCristianLeon commited on 11 days ago

Commit

7b7cc25

verified ·

1 Parent(s): b1fb154

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -19

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import streamlit as st
 import polars as pl
 from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, logging, AutoModelForCausalLM
 import torch
-import os, gc
 import httpx
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -19,11 +19,25 @@ langs.extend(list(all_langs.keys())) # Language options as list, add favourite l
 # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
 iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
-models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "t5-base", "t5-small", "t5-large",
           "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
-          "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
-          "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2", "tencent/Hunyuan-MT-7B",
-         "openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6"]
 allmodels = ["Helsinki-NLP",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
@@ -49,11 +63,25 @@ class Translators:
         response = httpx.get(url)
         return response.json()[0][0][0]
-    def hplt(self):
-        hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt'
-        pipe = pipeline("translation", model=hplt_model, device=self.device)
-        translation = pipe(self.input_text)
-        return translation[0]['translation_text']
     @staticmethod
     def quickmttranslate(model_path, input_text):
@@ -96,7 +124,7 @@ class Translators:
         # Direct translation model
         if f"{self.sl}-{self.tl}" in quickmt_models:
             model_path = Translators.quickmtdownload(model_name)
-            translation = Translators.quickmttranslate(model_path, self.input_text)
             message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {model_name}.'
         # Pivot language English
         elif self.sl in available_languages and self.tl in available_languages:
@@ -105,12 +133,12 @@ class Translators:
             entranslation = Translators.quickmttranslate(model_path, self.input_text)
             model_name = f"quickmt-en-{self.tl}"
             model_path = Translators.quickmtdownload(model_name)
-            translation = Translators.quickmttranslate(model_path, entranslation)
             message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with pivot language English.'
         else:
-            translation = f'Model {model_name} from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
             message = f"Available models: {', '.join(quickmt_models)}"
-        return translation, message
     @staticmethod
     def download_argos_model(from_code, to_code):
@@ -172,6 +200,15 @@ class Translators:
         # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
         return output_text
     def HelsinkiNLP_mulroa(self):
         try:
@@ -501,16 +538,22 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
         return translated_text, message_text
     message_text = f'Translated from {s_language} to {t_language} with {model_name}'
     translated_text = None
-    try:
-        if "-mul" in model_name.lower() or "mul-" in model_name.lower() or "-roa" in model_name.lower():
             translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
         elif model_name == "Helsinki-NLP":
             translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
-        elif model_name == "HPLT":
-            translated_text = Translators(model_name, sl, tl, input_text).hplt()
         elif model_name == 'Argos':
             translated_text = Translators(model_name, sl, tl, input_text).argos()

 import polars as pl
 from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, logging, AutoModelForCausalLM
 import torch
+import os
 import httpx
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
 iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
+models = ["Helsinki-NLP", "QUICKMT", "Argos", "Lego-MT/Lego-MT", "HPLT", "HPLT-OPUS", "Google",
+          "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
+          "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
+          "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa",
+          "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
+          "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
+          "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
+          "facebook/m2m100_418M", "facebook/m2m100_1.2B",
+          "bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
+          "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
+          "t5-small", "t5-base", "t5-large",
+          "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
+          "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
           "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
+         "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
+          "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
+         "tencent/Hunyuan-MT-7B",
+         "openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6",
+         ]
 allmodels = ["Helsinki-NLP",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
         response = httpx.get(url)
         return response.json()[0][0][0]
+    def hplt(self, opus = False):
+        # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
+        hplt_models = ['ar-en', 'bs-en', 'ca-en', 'en-ar', 'en-bs', 'en-ca', 'en-et', 'en-eu', 'en-fi',
+                  'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
+                  'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
+                  'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
+        if opus:
+            hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
+        else:
+            hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt' # HPLT/translate-en-hr-v1.0-hplt
+        if f'{self.sl}-{self.tl}' in hplt_models:
+            pipe = pipeline("translation", model=hplt_model, device=self.device)
+            translation = pipe(self.input_text)
+            translated_text = translation[0]['translation_text']
+            message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {hplt_model}.'
+        else:
+            translated_text = f'HPLT model from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
+            message = f"Available models: {', '.join(hplt_models)}"
+        return translated_text, message
     @staticmethod
     def quickmttranslate(model_path, input_text):
         # Direct translation model
         if f"{self.sl}-{self.tl}" in quickmt_models:
             model_path = Translators.quickmtdownload(model_name)
+            translated_text = Translators.quickmttranslate(model_path, self.input_text)
             message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {model_name}.'
         # Pivot language English
         elif self.sl in available_languages and self.tl in available_languages:
             entranslation = Translators.quickmttranslate(model_path, self.input_text)
             model_name = f"quickmt-en-{self.tl}"
             model_path = Translators.quickmtdownload(model_name)
+            translated_text = Translators.quickmttranslate(model_path, entranslation)
             message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with pivot language English.'
         else:
+            translated_text = f'Model {model_name} from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
             message = f"Available models: {', '.join(quickmt_models)}"
+        return translated_text, message
     @staticmethod
     def download_argos_model(from_code, to_code):
         # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
         return output_text
+    def simplepipe(self):
+        try:
+            pipe = pipeline("translation", model=self.model_name, device=self.device)
+            translation = pipe(self.input_text)
+            message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {self.model_name}.'
+            return translation[0]['translation_text'], message
+        except Exception as error:
+            return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
     def HelsinkiNLP_mulroa(self):
         try:
         return translated_text, message_text
     message_text = f'Translated from {s_language} to {t_language} with {model_name}'
     translated_text = None
+    try:
+        if in model_name == "Helsinki-NLP/opus-mt-tc-bible-big-roa-en":
+            translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
+        elif "-mul" in model_name.lower() or "mul-" in model_name.lower() or "-roa" in model_name.lower():
             translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
         elif model_name == "Helsinki-NLP":
             translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
+        elif "HPLT" in model_name:
+            if model_name == "HPLT-OPUS":
+                translated_text, message = Translators(model_name, sl, tl, input_text).hplt(opus = True)
+            else:
+                translated_text, message = Translators(model_name, sl, tl, input_text).hplt()
         elif model_name == 'Argos':
             translated_text = Translators(model_name, sl, tl, input_text).argos()