TiberiuCristianLeon commited on
Commit
7b7cc25
·
verified ·
1 Parent(s): b1fb154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -19
app.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  import polars as pl
3
  from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, logging, AutoModelForCausalLM
4
  import torch
5
- import os, gc
6
  import httpx
7
 
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -19,11 +19,25 @@ langs.extend(list(all_langs.keys())) # Language options as list, add favourite l
19
  # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
20
  iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
21
 
22
- models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "t5-base", "t5-small", "t5-large",
 
 
 
 
 
 
 
 
 
 
 
 
23
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
24
- "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
25
- "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2", "tencent/Hunyuan-MT-7B",
26
- "openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6"]
 
 
27
  allmodels = ["Helsinki-NLP",
28
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
29
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
@@ -49,11 +63,25 @@ class Translators:
49
  response = httpx.get(url)
50
  return response.json()[0][0][0]
51
 
52
- def hplt(self):
53
- hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt'
54
- pipe = pipeline("translation", model=hplt_model, device=self.device)
55
- translation = pipe(self.input_text)
56
- return translation[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  @staticmethod
59
  def quickmttranslate(model_path, input_text):
@@ -96,7 +124,7 @@ class Translators:
96
  # Direct translation model
97
  if f"{self.sl}-{self.tl}" in quickmt_models:
98
  model_path = Translators.quickmtdownload(model_name)
99
- translation = Translators.quickmttranslate(model_path, self.input_text)
100
  message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {model_name}.'
101
  # Pivot language English
102
  elif self.sl in available_languages and self.tl in available_languages:
@@ -105,12 +133,12 @@ class Translators:
105
  entranslation = Translators.quickmttranslate(model_path, self.input_text)
106
  model_name = f"quickmt-en-{self.tl}"
107
  model_path = Translators.quickmtdownload(model_name)
108
- translation = Translators.quickmttranslate(model_path, entranslation)
109
  message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with pivot language English.'
110
  else:
111
- translation = f'Model {model_name} from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
112
  message = f"Available models: {', '.join(quickmt_models)}"
113
- return translation, message
114
 
115
  @staticmethod
116
  def download_argos_model(from_code, to_code):
@@ -172,6 +200,15 @@ class Translators:
172
  # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
173
  output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
174
  return output_text
 
 
 
 
 
 
 
 
 
175
 
176
  def HelsinkiNLP_mulroa(self):
177
  try:
@@ -501,16 +538,22 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
501
  return translated_text, message_text
502
  message_text = f'Translated from {s_language} to {t_language} with {model_name}'
503
  translated_text = None
504
- try:
505
- if "-mul" in model_name.lower() or "mul-" in model_name.lower() or "-roa" in model_name.lower():
 
 
 
506
  translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
507
 
508
  elif model_name == "Helsinki-NLP":
509
  translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
510
 
511
- elif model_name == "HPLT":
512
- translated_text = Translators(model_name, sl, tl, input_text).hplt()
513
-
 
 
 
514
  elif model_name == 'Argos':
515
  translated_text = Translators(model_name, sl, tl, input_text).argos()
516
 
 
2
  import polars as pl
3
  from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, logging, AutoModelForCausalLM
4
  import torch
5
+ import os
6
  import httpx
7
 
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
19
  # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
20
  iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
21
 
22
+ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Lego-MT/Lego-MT", "HPLT", "HPLT-OPUS", "Google",
23
+ "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
24
+ "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
25
+ "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa",
26
+ "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
27
+ "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
28
+ "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
29
+ "facebook/m2m100_418M", "facebook/m2m100_1.2B",
30
+ "bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
31
+ "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
32
+ "t5-small", "t5-base", "t5-large",
33
+ "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
34
+ "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
35
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
36
+ "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
37
+ "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
38
+ "tencent/Hunyuan-MT-7B",
39
+ "openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6",
40
+ ]
41
  allmodels = ["Helsinki-NLP",
42
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
43
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
 
63
  response = httpx.get(url)
64
  return response.json()[0][0][0]
65
 
66
+ def hplt(self, opus = False):
67
+ # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
68
+ hplt_models = ['ar-en', 'bs-en', 'ca-en', 'en-ar', 'en-bs', 'en-ca', 'en-et', 'en-eu', 'en-fi',
69
+ 'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
70
+ 'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
71
+ 'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
72
+ if opus:
73
+ hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
74
+ else:
75
+ hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt' # HPLT/translate-en-hr-v1.0-hplt
76
+ if f'{self.sl}-{self.tl}' in hplt_models:
77
+ pipe = pipeline("translation", model=hplt_model, device=self.device)
78
+ translation = pipe(self.input_text)
79
+ translated_text = translation[0]['translation_text']
80
+ message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {hplt_model}.'
81
+ else:
82
+ translated_text = f'HPLT model from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
83
+ message = f"Available models: {', '.join(hplt_models)}"
84
+ return translated_text, message
85
 
86
  @staticmethod
87
  def quickmttranslate(model_path, input_text):
 
124
  # Direct translation model
125
  if f"{self.sl}-{self.tl}" in quickmt_models:
126
  model_path = Translators.quickmtdownload(model_name)
127
+ translated_text = Translators.quickmttranslate(model_path, self.input_text)
128
  message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {model_name}.'
129
  # Pivot language English
130
  elif self.sl in available_languages and self.tl in available_languages:
 
133
  entranslation = Translators.quickmttranslate(model_path, self.input_text)
134
  model_name = f"quickmt-en-{self.tl}"
135
  model_path = Translators.quickmtdownload(model_name)
136
+ translated_text = Translators.quickmttranslate(model_path, entranslation)
137
  message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with pivot language English.'
138
  else:
139
+ translated_text = f'Model {model_name} from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
140
  message = f"Available models: {', '.join(quickmt_models)}"
141
+ return translated_text, message
142
 
143
  @staticmethod
144
  def download_argos_model(from_code, to_code):
 
200
  # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
201
  output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
202
  return output_text
203
+
204
+ def simplepipe(self):
205
+ try:
206
+ pipe = pipeline("translation", model=self.model_name, device=self.device)
207
+ translation = pipe(self.input_text)
208
+ message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {self.model_name}.'
209
+ return translation[0]['translation_text'], message
210
+ except Exception as error:
211
+ return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
212
 
213
  def HelsinkiNLP_mulroa(self):
214
  try:
 
538
  return translated_text, message_text
539
  message_text = f'Translated from {s_language} to {t_language} with {model_name}'
540
  translated_text = None
541
+ try:
542
+ if in model_name == "Helsinki-NLP/opus-mt-tc-bible-big-roa-en":
543
+ translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
544
+
545
+ elif "-mul" in model_name.lower() or "mul-" in model_name.lower() or "-roa" in model_name.lower():
546
  translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
547
 
548
  elif model_name == "Helsinki-NLP":
549
  translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
550
 
551
+ elif "HPLT" in model_name:
552
+ if model_name == "HPLT-OPUS":
553
+ translated_text, message = Translators(model_name, sl, tl, input_text).hplt(opus = True)
554
+ else:
555
+ translated_text, message = Translators(model_name, sl, tl, input_text).hplt()
556
+
557
  elif model_name == 'Argos':
558
  translated_text = Translators(model_name, sl, tl, input_text).argos()
559