musfiqdehan commited on
Commit
cef75d0
1 Parent(s): d1e55f6

Fix bug in login functionality

Browse files
Files changed (1) hide show
  1. helper/translators.py +238 -83
helper/translators.py CHANGED
@@ -4,18 +4,8 @@ This file contains the functions to translate the text from one language to anot
4
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
  from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator
6
  from .text_preprocess import decontracting_words, space_punc
7
- from dotenv import load_dotenv
8
- import os
9
 
10
 
11
- # Load the environment variables from the .env file
12
- load_dotenv()
13
-
14
- # Translators API Keys
15
- MICROSOFT_API_KEY = os.getenv("MICROSOFT_TRANSLATOR_KEY")
16
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
- YANDEX_API_KEY = os.getenv("YANDEX_API_KEY")
18
-
19
  # Digit Translation
20
  digit_converter = {
21
  '০': '0',
@@ -46,20 +36,7 @@ def get_translated_digit(sentence):
46
 
47
  return "".join(each for each in translated_sentence)
48
 
49
- # Bangla to English Translation (BUET BanglaNMT)
50
- translation_model_bn_en = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/banglat5_nmt_bn_en")
51
- translation_tokenizer_bn_en = AutoTokenizer.from_pretrained("csebuetnlp/banglat5_nmt_bn_en")
52
-
53
- def banglanmt_translation(input_text):
54
- """
55
- Translate a sentence from Bengali to English using BUET BanglaNMT
56
- """
57
- inputs = translation_tokenizer_bn_en(input_text, return_tensors="pt")
58
- outputs = translation_model_bn_en.generate(**inputs)
59
- translated_text = translation_tokenizer_bn_en.decode(outputs[0], skip_special_tokens=True)
60
- return translated_text
61
-
62
- def google_translation(sentence: str, source="bn", target="en") -> str:
63
  """
64
  Translate a sentence from one language to another using Google Translator.\n
65
  At first install dependencies \n
@@ -70,46 +47,6 @@ def google_translation(sentence: str, source="bn", target="en") -> str:
70
  sentence, source=source, target=target)
71
  return translated_sentence
72
 
73
- def microsoft_translation(sentence: str, source="bn", target="en") -> str:
74
- """
75
- Translate a sentence from one language to another using Microsoft Translator.\n
76
- At first install dependencies \n
77
- `!pip install -U deep-translator`
78
- """
79
- translator = MicrosoftTranslator(api_key=MICROSOFT_API_KEY, target='en')
80
- translated_sentence = translator.translate(sentence)
81
- return translated_sentence
82
-
83
- def chatgpt_translation(sentence: str, source="bn", target="en") -> str:
84
- """
85
- Translate a sentence from one language to another using ChatGPT Translator.\n
86
- At first install dependencies \n
87
- `!pip install -U deep-translator`
88
- """
89
- translator = ChatGptTranslator(api_key=OPENAI_API_KEY, target=target)
90
- translated_sentence = translator.translate(sentence)
91
- return translated_sentence
92
-
93
- def yandex_translation(sentence: str, source="bn", target="en") -> str:
94
- """
95
- Translate a sentence from one language to another using Yandex Translator.\n
96
- At first install dependencies \n
97
- `!pip install -U deep-translator`
98
- """
99
- translator = YandexTranslator(api_key=YANDEX_API_KEY)
100
- translated_sentence = translator.translate(
101
- sentence, source=source, target=target)
102
- return translated_sentence
103
-
104
- def mymemory_translation(sentence: str, source="bn-IN", target="en-US") -> str:
105
- """
106
- Translate a sentence from one language to another using MyMemory Translator.\n
107
- At first install dependencies \n
108
- `!pip install -U deep-translator`
109
- """
110
- translator = MyMemoryTranslator(source=source, target=target)
111
- translated_sentence = translator.translate(sentence)
112
- return translated_sentence
113
 
114
  def get_better_translation(translator_func, src=""):
115
  src_mod = get_translated_digit(src)
@@ -118,24 +55,242 @@ def get_better_translation(translator_func, src=""):
118
  tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas')
119
  return tgt
120
 
121
- def select_translator(src, translator):
122
- """
123
- Select the translator
124
- """
125
- tgt = None
126
- tgt_base = None
127
 
128
- if translator == "Google":
129
- tgt = get_better_translation(google_translation, src)
130
- tgt = space_punc(tgt)
131
- tgt_base = google_translation(src)
132
- elif translator == "BanglaNMT":
133
- tgt = get_better_translation(banglanmt_translation, src)
134
- tgt = space_punc(tgt)
135
- tgt_base = banglanmt_translation(src)
136
- elif translator == "MyMemory":
137
- tgt = get_better_translation(mymemory_translation, src)
138
- tgt = space_punc(tgt)
139
- tgt_base = mymemory_translation(src)
140
 
141
- return tgt_base, tgt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
  from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator
6
  from .text_preprocess import decontracting_words, space_punc
 
 
7
 
8
 
 
 
 
 
 
 
 
 
9
  # Digit Translation
10
  digit_converter = {
11
  '০': '0',
 
36
 
37
  return "".join(each for each in translated_sentence)
38
 
39
+ def google_translation(sentence: str, source="auto", target="en") -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  """
41
  Translate a sentence from one language to another using Google Translator.\n
42
  At first install dependencies \n
 
47
  sentence, source=source, target=target)
48
  return translated_sentence
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def get_better_translation(translator_func, src=""):
52
  src_mod = get_translated_digit(src)
 
55
  tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas')
56
  return tgt
57
 
58
+ # def select_translator(src, translator):
59
+ # """
60
+ # Select the translator
61
+ # """
62
+ # tgt = None
63
+ # tgt_base = None
64
 
65
+ # if translator == "Google":
66
+ # tgt = get_better_translation(google_translation, src)
67
+ # tgt = space_punc(tgt)
68
+ # tgt_base = google_translation(src)
69
+ # elif translator == "BanglaNMT":
70
+ # tgt = get_better_translation(banglanmt_translation, src)
71
+ # tgt = space_punc(tgt)
72
+ # tgt_base = banglanmt_translation(src)
73
+ # elif translator == "MyMemory":
74
+ # tgt = get_better_translation(mymemory_translation, src)
75
+ # tgt = space_punc(tgt)
76
+ # tgt_base = mymemory_translation(src)
77
 
78
+ # return tgt_base, tgt
79
+
80
+ # Afrikaans
81
+ # Albanian
82
+ # Arabic
83
+ # Aragonese
84
+ # Armenian
85
+ # Asturian
86
+ # Azerbaijani
87
+ # Bashkir
88
+ # Basque
89
+ # Bavarian
90
+ # Belarusian
91
+ # Bengali
92
+ # Bishnupriya Manipuri
93
+ # Bosnian
94
+ # Breton
95
+ # Bulgarian
96
+ # Burmese
97
+ # Catalan
98
+ # Cebuano
99
+ # Chechen
100
+ # Chinese (Simplified)
101
+ # Chinese (Traditional)
102
+ # Chuvash
103
+ # Croatian
104
+ # Czech
105
+ # Danish
106
+ # Dutch
107
+ # English
108
+ # Estonian
109
+ # Finnish
110
+ # French
111
+ # Galician
112
+ # Georgian
113
+ # German
114
+ # Greek
115
+ # Gujarati
116
+ # Haitian
117
+ # Hebrew
118
+ # Hindi
119
+ # Hungarian
120
+ # Icelandic
121
+ # Ido
122
+ # Indonesian
123
+ # Irish
124
+ # Italian
125
+ # Japanese
126
+ # Javanese
127
+ # Kannada
128
+ # Kazakh
129
+ # Kirghiz
130
+ # Korean
131
+ # Latin
132
+ # Latvian
133
+ # Lithuanian
134
+ # Lombard
135
+ # Low Saxon
136
+ # Luxembourgish
137
+ # Macedonian
138
+ # Malagasy
139
+ # Malay
140
+ # Malayalam
141
+ # Marathi
142
+ # Minangkabau
143
+ # Nepali
144
+ # Newar
145
+ # Norwegian (Bokmal)
146
+ # Norwegian (Nynorsk)
147
+ # Occitan
148
+ # Persian (Farsi)
149
+ # Piedmontese
150
+ # Polish
151
+ # Portuguese
152
+ # Punjabi
153
+ # Romanian
154
+ # Russian
155
+ # Scots
156
+ # Serbian
157
+ # Serbo-Croatian
158
+ # Sicilian
159
+ # Slovak
160
+ # Slovenian
161
+ # South Azerbaijani
162
+ # Spanish
163
+ # Sundanese
164
+ # Swahili
165
+ # Swedish
166
+ # Tagalog
167
+ # Tajik
168
+ # Tamil
169
+ # Tatar
170
+ # Telugu
171
+ # Turkish
172
+ # Ukrainian
173
+ # Urdu
174
+ # Uzbek
175
+ # Vietnamese
176
+ # Volapük
177
+ # Waray-Waray
178
+ # Welsh
179
+ # West Frisian
180
+ # Western Punjabi
181
+ # Yoruba
182
+ # Thai
183
+ # Mongolian
184
+
185
+ target_lang_dict = {
186
+ "Afrikaans": "af",
187
+ "Albanian": "sq",
188
+ "Arabic": "ar",
189
+ "Aragonese": "an",
190
+ "Armenian": "hy",
191
+ "Asturian": "ast",
192
+ "Azerbaijani": "az",
193
+ "Bashkir": "ba",
194
+ "Basque": "eu",
195
+ "Bavarian": "bar",
196
+ "Belarusian": "be",
197
+ "Bengali": "bn",
198
+ "Bishnupriya Manipuri": "bpy",
199
+ "Bosnian": "bs",
200
+ "Breton": "br",
201
+ "Bulgarian": "bg",
202
+ "Burmese": "my",
203
+ "Catalan": "ca",
204
+ "Cebuano": "ceb",
205
+ "Chechen": "ce",
206
+ "Chinese (Simplified)": "zh",
207
+ "Chinese (Traditional)": "zh-tw",
208
+ "Chuvash": "cv",
209
+ "Croatian": "hr",
210
+ "Czech": "cs",
211
+ "Danish": "da",
212
+ "Dutch": "nl",
213
+ "English": "en",
214
+ "Estonian": "et",
215
+ "Finnish": "fi",
216
+ "French": "fr",
217
+ "Galician": "gl",
218
+ "Georgian": "ka",
219
+ "German": "de",
220
+ "Greek": "el",
221
+ "Gujarati": "gu",
222
+ "Haitian": "ht",
223
+ "Hebrew": "he",
224
+ "Hindi": "hi",
225
+ "Hungarian": "hu",
226
+ "Icelandic": "is",
227
+ "Ido": "io",
228
+ "Indonesian": "id",
229
+ "Irish": "ga",
230
+ "Italian": "it",
231
+ "Japanese": "ja",
232
+ "Javanese": "jv",
233
+ "Kannada": "kn",
234
+ "Kazakh": "kk",
235
+ "Kirghiz": "ky",
236
+ "Korean": "ko",
237
+ "Latin": "la",
238
+ "Latvian": "lv",
239
+ "Lithuanian": "lt",
240
+ "Lombard": "lmo",
241
+ "Low Saxon": "nds",
242
+ "Luxembourgish": "lb",
243
+ "Macedonian": "mk",
244
+ "Malagasy": "mg",
245
+ "Malay": "ms",
246
+ "Malayalam": "ml",
247
+ "Marathi": "mr",
248
+ "Minangkabau": "min",
249
+ "Nepali": "ne",
250
+ "Newar": "new",
251
+ "Norwegian (Bokmal)": "nb",
252
+ "Norwegian (Nynorsk)": "nn",
253
+ "Occitan": "oc",
254
+ "Persian (Farsi)": "fa",
255
+ "Piedmontese": "pms",
256
+ "Polish": "pl",
257
+ "Portuguese": "pt",
258
+ "Punjabi": "pa",
259
+ "Romanian": "ro",
260
+ "Russian": "ru",
261
+ "Scots": "sco",
262
+ "Serbian": "sr",
263
+ "Serbo-Croatian": "sh",
264
+ "Sicilian": "scn",
265
+ "Slovak": "sk",
266
+ "Slovenian": "sl",
267
+ "South Azerbaijani": "azb",
268
+ "Spanish": "es",
269
+ "Sundanese": "su",
270
+ "Swahili": "sw",
271
+ "Swedish": "sv",
272
+ "Tagalog": "tl",
273
+ "Tajik": "tg",
274
+ "Tamil": "ta",
275
+ "Tatar": "tt",
276
+ "Telugu": "te",
277
+ "Turkish": "tr",
278
+ "Ukrainian": "uk",
279
+ "Urdu": "ur",
280
+ "Uzbek": "uz",
281
+ "Vietnamese": "vi",
282
+ "Volapük": "vo",
283
+ "Waray-Waray": "war",
284
+ "Welsh": "cy",
285
+ "West Frisian": "fy",
286
+ "Western Punjabi": "pnb",
287
+ "Yoruba": "yo",
288
+ "Thai": "th",
289
+ "Mongolian": "mn"
290
+ }
291
+
292
+ def select_target_lang_code(lang):
293
+ """
294
+ Select the target language code
295
+ """
296
+ return target_lang_dict[lang] if lang in target_lang_dict else "en"