AreesaAshfaq commited on
Commit
e44a4c2
·
verified ·
1 Parent(s): 51b811c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import MarianMTModel, MarianTokenizer
3
+
4
+ # Define a comprehensive dictionary of language names, codes, and model names
5
+ LANGUAGE_MODELS = {
6
+ 'Afrikaans': ('af', 'Helsinki-NLP/opus-mt-en-af'),
7
+ 'Albanian': ('sq', 'Helsinki-NLP/opus-mt-en-sq'),
8
+ 'Amharic': ('am', 'Helsinki-NLP/opus-mt-en-am'),
9
+ 'Arabic': ('ar', 'Helsinki-NLP/opus-mt-en-ar'),
10
+ 'Armenian': ('hy', 'Helsinki-NLP/opus-mt-en-hy'),
11
+ 'Bengali': ('bn', 'Helsinki-NLP/opus-mt-en-bn'),
12
+ 'Bosnian': ('bs', 'Helsinki-NLP/opus-mt-en-bs'),
13
+ 'Catalan': ('ca', 'Helsinki-NLP/opus-mt-en-ca'),
14
+ 'Croatian': ('hr', 'Helsinki-NLP/opus-mt-en-hr'),
15
+ 'Czech': ('cs', 'Helsinki-NLP/opus-mt-en-cs'),
16
+ 'Danish': ('da', 'Helsinki-NLP/opus-mt-en-da'),
17
+ 'Dutch': ('nl', 'Helsinki-NLP/opus-mt-en-nl'),
18
+ 'Esperanto': ('eo', 'Helsinki-NLP/opus-mt-en-eo'),
19
+ 'Estonian': ('et', 'Helsinki-NLP/opus-mt-en-et'),
20
+ 'Finnish': ('fi', 'Helsinki-NLP/opus-mt-en-fi'),
21
+ 'French': ('fr', 'Helsinki-NLP/opus-mt-en-fr'),
22
+ 'German': ('de', 'Helsinki-NLP/opus-mt-en-de'),
23
+ 'Greek': ('el', 'Helsinki-NLP/opus-mt-en-el'),
24
+ 'Gujarati': ('gu', 'Helsinki-NLP/opus-mt-en-gu'),
25
+ 'Haitian Creole': ('ht', 'Helsinki-NLP/opus-mt-en-ht'),
26
+ 'Hausa': ('ha', 'Helsinki-NLP/opus-mt-en-ha'),
27
+ 'Hawaiian': ('haw', 'Helsinki-NLP/opus-mt-en-haw'),
28
+ 'Hebrew': ('he', 'Helsinki-NLP/opus-mt-en-he'),
29
+ 'Hindi': ('hi', 'Helsinki-NLP/opus-mt-en-hi'),
30
+ 'Hungarian': ('hu', 'Helsinki-NLP/opus-mt-en-hu'),
31
+ 'Icelandic': ('is', 'Helsinki-NLP/opus-mt-en-is'),
32
+ 'Igbo': ('ig', 'Helsinki-NLP/opus-mt-en-ig'),
33
+ 'Indonesian': ('id', 'Helsinki-NLP/opus-mt-en-id'),
34
+ 'Irish': ('ga', 'Helsinki-NLP/opus-mt-en-ga'),
35
+ 'Italian': ('it', 'Helsinki-NLP/opus-mt-en-it'),
36
+ 'Japanese': ('ja', 'Helsinki-NLP/opus-mt-en-ja'),
37
+ 'Javanese': ('jw', 'Helsinki-NLP/opus-mt-en-jw'),
38
+ 'Kannada': ('kn', 'Helsinki-NLP/opus-mt-en-kn'),
39
+ 'Khmer': ('km', 'Helsinki-NLP/opus-mt-en-km'),
40
+ 'Korean': ('ko', 'Helsinki-NLP/opus-mt-en-ko'),
41
+ 'Latin': ('la', 'Helsinki-NLP/opus-mt-en-la'),
42
+ 'Latvian': ('lv', 'Helsinki-NLP/opus-mt-en-lv'),
43
+ 'Lithuanian': ('lt', 'Helsinki-NLP/opus-mt-en-lt'),
44
+ 'Luxembourgish': ('lb', 'Helsinki-NLP/opus-mt-en-lb'),
45
+ 'Macedonian': ('mk', 'Helsinki-NLP/opus-mt-en-mk'),
46
+ 'Malagasy': ('mg', 'Helsinki-NLP/opus-mt-en-mg'),
47
+ 'Malayalam': ('ml', 'Helsinki-NLP/opus-mt-en-ml'),
48
+ 'Maltese': ('mt', 'Helsinki-NLP/opus-mt-en-mt'),
49
+ 'Maori': ('mi', 'Helsinki-NLP/opus-mt-en-mi'),
50
+ 'Marathi': ('mr', 'Helsinki-NLP/opus-mt-en-mr'),
51
+ 'Myanmar': ('my', 'Helsinki-NLP/opus-mt-en-my'),
52
+ 'Nepali': ('ne', 'Helsinki-NLP/opus-mt-en-ne'),
53
+ 'Norwegian': ('no', 'Helsinki-NLP/opus-mt-en-no'),
54
+ 'Nyanja': ('ny', 'Helsinki-NLP/opus-mt-en-ny'),
55
+ 'Odia': ('or', 'Helsinki-NLP/opus-mt-en-or'),
56
+ 'Oromo': ('om', 'Helsinki-NLP/opus-mt-en-om'),
57
+ 'Pashto': ('ps', 'Helsinki-NLP/opus-mt-en-ps'),
58
+ 'Persian': ('fa', 'Helsinki-NLP/opus-mt-en-fa'),
59
+ 'Polish': ('pl', 'Helsinki-NLP/opus-mt-en-pl'),
60
+ 'Portuguese': ('pt', 'Helsinki-NLP/opus-mt-en-pt'),
61
+ 'Punjabi': ('pa', 'Helsinki-NLP/opus-mt-en-pa'),
62
+ 'Quechua': ('qu', 'Helsinki-NLP/opus-mt-en-qu'),
63
+ 'Romanian': ('ro', 'Helsinki-NLP/opus-mt-en-ro'),
64
+ 'Russian': ('ru', 'Helsinki-NLP/opus-mt-en-ru'),
65
+ 'Samoan': ('sm', 'Helsinki-NLP/opus-mt-en-sm'),
66
+ 'Scots Gaelic': ('gd', 'Helsinki-NLP/opus-mt-en-gd'),
67
+ 'Serbian': ('sr', 'Helsinki-NLP/opus-mt-en-sr'),
68
+ 'Sesotho': ('st', 'Helsinki-NLP/opus-mt-en-st'),
69
+ 'Shona': ('sn', 'Helsinki-NLP/opus-mt-en-sn'),
70
+ 'Sindhi': ('sd', 'Helsinki-NLP/opus-mt-en-sd'),
71
+ 'Sinhala': ('si', 'Helsinki-NLP/opus-mt-en-si'),
72
+ 'Slovak': ('sk', 'Helsinki-NLP/opus-mt-en-sk'),
73
+ 'Slovenian': ('sl', 'Helsinki-NLP/opus-mt-en-sl'),
74
+ 'Somali': ('so', 'Helsinki-NLP/opus-mt-en-so'),
75
+ 'Spanish': ('es', 'Helsinki-NLP/opus-mt-en-es'),
76
+ 'Sundanese': ('su', 'Helsinki-NLP/opus-mt-en-su'),
77
+ 'Swahili': ('sw', 'Helsinki-NLP/opus-mt-en-sw'),
78
+ 'Swedish': ('sv', 'Helsinki-NLP/opus-mt-en-sv'),
79
+ 'Tajik': ('tg', 'Helsinki-NLP/opus-mt-en-tg'),
80
+ 'Tamil': ('ta', 'Helsinki-NLP/opus-mt-en-ta'),
81
+ 'Telugu': ('te', 'Helsinki-NLP/opus-mt-en-te'),
82
+ 'Thai': ('th', 'Helsinki-NLP/opus-mt-en-th'),
83
+ 'Turkmen': ('tk', 'Helsinki-NLP/opus-mt-en-tk'),
84
+ 'Turkish': ('tr', 'Helsinki-NLP/opus-mt-en-tr'),
85
+ 'Ukrainian': ('uk', 'Helsinki-NLP/opus-mt-en-uk'),
86
+ 'Urdu': ('ur', 'Helsinki-NLP/opus-mt-en-ur'),
87
+ 'Vietnamese': ('vi', 'Helsinki-NLP/opus-mt-en-vi'),
88
+ 'Welsh': ('cy', 'Helsinki-NLP/opus-mt-en-cy'),
89
+ 'Xhosa': ('xh', 'Helsinki-NLP/opus-mt-en-xh'),
90
+ 'Yiddish': ('yi', 'Helsinki-NLP/opus-mt-en-yi'),
91
+ 'Yoruba': ('yo', 'Helsinki-NLP/opus-mt-en-yo'),
92
+ 'Zulu': ('zu', 'Helsinki-NLP/opus-mt-en-zu'),
93
+ }
94
+
95
+ @st.cache_resource
96
+ def load_model(target_language):
97
+ code, model_name = LANGUAGE_MODELS.get(target_language, (None, None))
98
+ if not model_name:
99
+ st.error(f"Model for language '{target_language}' not found.")
100
+ return None, None
101
+
102
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
103
+ model = MarianMTModel.from_pretrained(model_name)
104
+ return tokenizer, model
105
+
106
+ def translate_text(text, target_language):
107
+ tokenizer, model = load_model(target_language)
108
+ if tokenizer is None or model is None:
109
+ return ""
110
+
111
+ # Tokenize the input text
112
+ inputs = tokenizer(text, return_tensors="pt", padding=True)
113
+
114
+ # Generate translation
115
+ translated = model.generate(**inputs)
116
+
117
+ # Decode the translated text
118
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
119
+ return translated_text
120
+
121
+ def main():
122
+ st.title("Language Translator")
123
+ st.write("Translate English text to any language.")
124
+
125
+ # Input text from the user
126
+ source_text = st.text_area("Enter text in English:", "")
127
+
128
+ # Select target language
129
+ target_language = st.selectbox(
130
+ "Select target language:",
131
+ options=list(LANGUAGE_MODELS.keys())
132
+ )
133
+
134
+ if st.button("Translate"):
135
+ if source_text:
136
+ translated_text = translate_text(source_text, target_language)
137
+ st.write(f"Translated text ({target_language}):")
138
+ st.write(translated_text)
139
+ else:
140
+ st.warning("Please enter text to translate.")
141
+
142
+ if __name__ == "__main__":
143
+ main()