Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,139 +1,131 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import
|
3 |
|
4 |
-
# Define
|
5 |
LANGUAGE_MODELS = {
|
6 |
-
'Afrikaans':
|
7 |
-
'Albanian':
|
8 |
-
'
|
9 |
-
'
|
10 |
-
'
|
11 |
-
'
|
12 |
-
'
|
13 |
-
'
|
14 |
-
'
|
15 |
-
'
|
16 |
-
'
|
17 |
-
'
|
18 |
-
'
|
19 |
-
'
|
20 |
-
'
|
21 |
-
'
|
22 |
-
'
|
23 |
-
'
|
24 |
-
'
|
25 |
-
'
|
26 |
-
'
|
27 |
-
'
|
28 |
-
'
|
29 |
-
'
|
30 |
-
'
|
31 |
-
'
|
32 |
-
'
|
33 |
-
'
|
34 |
-
'
|
35 |
-
'
|
36 |
-
'
|
37 |
-
'
|
38 |
-
'
|
39 |
-
'
|
40 |
-
'
|
41 |
-
'
|
42 |
-
'
|
43 |
-
'
|
44 |
-
'
|
45 |
-
'
|
46 |
-
'
|
47 |
-
'
|
48 |
-
'
|
49 |
-
'
|
50 |
-
'
|
51 |
-
'
|
52 |
-
'
|
53 |
-
'
|
54 |
-
'
|
55 |
-
'
|
56 |
-
'
|
57 |
-
'
|
58 |
-
'
|
59 |
-
'
|
60 |
-
'
|
61 |
-
'
|
62 |
-
'
|
63 |
-
'
|
64 |
-
'
|
65 |
-
'
|
66 |
-
'
|
67 |
-
'
|
68 |
-
'
|
69 |
-
'
|
70 |
-
'
|
71 |
-
'
|
72 |
-
'
|
73 |
-
'
|
74 |
-
'
|
75 |
-
'
|
76 |
-
'
|
77 |
-
'
|
78 |
-
'
|
79 |
-
'
|
80 |
-
'
|
81 |
-
'
|
82 |
-
'
|
83 |
-
'
|
84 |
-
'
|
85 |
-
'
|
86 |
-
'
|
87 |
-
'
|
88 |
-
'
|
|
|
|
|
|
|
|
|
89 |
}
|
90 |
|
91 |
@st.cache_resource
|
92 |
-
def load_model(
|
93 |
-
|
94 |
-
|
95 |
-
st.error(f"Model for language '{target_language}' not found.")
|
96 |
-
return None, None
|
97 |
-
|
98 |
-
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
99 |
-
model = MarianMTModel.from_pretrained(model_name)
|
100 |
return tokenizer, model
|
101 |
|
102 |
-
def
|
103 |
-
tokenizer, model = load_model(
|
104 |
-
if tokenizer is None or model is None:
|
105 |
-
return ""
|
106 |
-
|
107 |
-
# Tokenize the input text
|
108 |
-
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
109 |
|
110 |
-
#
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
115 |
-
return translated_text
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
if st.button("Translate"):
|
131 |
-
if source_text:
|
132 |
-
translated_text = translate_text(source_text, target_language)
|
133 |
-
st.write(f"Translated text ({target_language}):")
|
134 |
-
st.write(translated_text)
|
135 |
-
else:
|
136 |
-
st.warning("Please enter text to translate.")
|
137 |
|
138 |
-
if
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
|
3 |
|
4 |
+
# Define the dictionary of language models
|
5 |
LANGUAGE_MODELS = {
|
6 |
+
'Afrikaans': 'af',
|
7 |
+
'Albanian': 'sq',
|
8 |
+
'Amharic': 'am',
|
9 |
+
'Arabic': 'ar',
|
10 |
+
'Armenian': 'hy',
|
11 |
+
'Bengali': 'bn',
|
12 |
+
'Bosnian': 'bs',
|
13 |
+
'Catalan': 'ca',
|
14 |
+
'Croatian': 'hr',
|
15 |
+
'Czech': 'cs',
|
16 |
+
'Danish': 'da',
|
17 |
+
'Dutch': 'nl',
|
18 |
+
'Esperanto': 'eo',
|
19 |
+
'Estonian': 'et',
|
20 |
+
'Finnish': 'fi',
|
21 |
+
'French': 'fr',
|
22 |
+
'German': 'de',
|
23 |
+
'Greek': 'el',
|
24 |
+
'Gujarati': 'gu',
|
25 |
+
'Haitian Creole': 'ht',
|
26 |
+
'Hausa': 'ha',
|
27 |
+
'Hawaiian': 'haw',
|
28 |
+
'Hebrew': 'he',
|
29 |
+
'Hindi': 'hi',
|
30 |
+
'Hungarian': 'hu',
|
31 |
+
'Icelandic': 'is',
|
32 |
+
'Igbo': 'ig',
|
33 |
+
'Indonesian': 'id',
|
34 |
+
'Irish': 'ga',
|
35 |
+
'Italian': 'it',
|
36 |
+
'Japanese': 'ja',
|
37 |
+
'Javanese': 'jw',
|
38 |
+
'Kannada': 'kn',
|
39 |
+
'Khmer': 'km',
|
40 |
+
'Korean': 'ko',
|
41 |
+
'Latin': 'la',
|
42 |
+
'Latvian': 'lv',
|
43 |
+
'Lithuanian': 'lt',
|
44 |
+
'Luxembourgish': 'lb',
|
45 |
+
'Macedonian': 'mk',
|
46 |
+
'Malagasy': 'mg',
|
47 |
+
'Malayalam': 'ml',
|
48 |
+
'Maltese': 'mt',
|
49 |
+
'Maori': 'mi',
|
50 |
+
'Marathi': 'mr',
|
51 |
+
'Myanmar': 'my',
|
52 |
+
'Nepali': 'ne',
|
53 |
+
'Norwegian': 'no',
|
54 |
+
'Nyanja': 'ny',
|
55 |
+
'Odia': 'or',
|
56 |
+
'Oromo': 'om',
|
57 |
+
'Pashto': 'ps',
|
58 |
+
'Persian': 'fa',
|
59 |
+
'Polish': 'pl',
|
60 |
+
'Portuguese': 'pt',
|
61 |
+
'Punjabi': 'pa',
|
62 |
+
'Quechua': 'qu',
|
63 |
+
'Romanian': 'ro',
|
64 |
+
'Russian': 'ru',
|
65 |
+
'Samoan': 'sm',
|
66 |
+
'Scots Gaelic': 'gd',
|
67 |
+
'Serbian': 'sr',
|
68 |
+
'Sesotho': 'st',
|
69 |
+
'Shona': 'sn',
|
70 |
+
'Sindhi': 'sd',
|
71 |
+
'Sinhala': 'si',
|
72 |
+
'Slovak': 'sk',
|
73 |
+
'Slovenian': 'sl',
|
74 |
+
'Somali': 'so',
|
75 |
+
'Spanish': 'es',
|
76 |
+
'Sundanese': 'su',
|
77 |
+
'Swahili': 'sw',
|
78 |
+
'Swedish': 'sv',
|
79 |
+
'Tajik': 'tg',
|
80 |
+
'Tamil': 'ta',
|
81 |
+
'Telugu': 'te',
|
82 |
+
'Thai': 'th',
|
83 |
+
'Turkmen': 'tk',
|
84 |
+
'Turkish': 'tr',
|
85 |
+
'Ukrainian': 'uk',
|
86 |
+
'Urdu': 'ur',
|
87 |
+
'Vietnamese': 'vi',
|
88 |
+
'Welsh': 'cy',
|
89 |
+
'Xhosa': 'xh',
|
90 |
+
'Yiddish': 'yi',
|
91 |
+
'Yoruba': 'yo',
|
92 |
+
'Zulu': 'zu',
|
93 |
}
|
94 |
|
95 |
@st.cache_resource
|
96 |
+
def load_model():
|
97 |
+
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
|
98 |
+
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
|
|
|
|
|
|
|
|
|
|
|
99 |
return tokenizer, model
|
100 |
|
101 |
+
def translate(text, target_language):
|
102 |
+
tokenizer, model = load_model()
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
# Set the target language code for translation
|
105 |
+
target_lang_code = LANGUAGE_MODELS.get(target_language)
|
106 |
+
if not target_lang_code:
|
107 |
+
return "Target language not supported."
|
|
|
|
|
108 |
|
109 |
+
tokenizer.src_lang = "en"
|
110 |
+
encoded_input = tokenizer(text, return_tensors="pt")
|
111 |
+
generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(target_lang_code))
|
112 |
+
translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
113 |
+
return translation
|
114 |
|
115 |
+
st.title('English to Any Language Translator')
|
116 |
+
|
117 |
+
text_input = st.text_area("Enter text in English:", "Hello, how are you?")
|
118 |
+
|
119 |
+
target_language = st.selectbox(
|
120 |
+
'Select the target language:',
|
121 |
+
list(LANGUAGE_MODELS.keys())
|
122 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
if st.button('Translate'):
|
125 |
+
with st.spinner('Translating...'):
|
126 |
+
try:
|
127 |
+
translation = translate(text_input, target_language)
|
128 |
+
st.write(f'Translation ({target_language}):')
|
129 |
+
st.write(translation)
|
130 |
+
except Exception as e:
|
131 |
+
st.error(f"Error: {e}")
|