""" This file contains the functions to translate the text from one language to another. """ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator from .text_preprocess import decontracting_words, space_punc # Digit Translation digit_converter = { '০': '0', '১': '1', '২': '2', '৩': '3', '৪': '4', '৫': '5', '৬': '6', '৭': '7', '৮': '8', '৯': '9' } def get_translated_digit(sentence): """ Translate the digits from Bengali to English """ translated_sentence = [] for each_letter in sentence: if each_letter in digit_converter.keys(): translated_sentence.append(digit_converter[each_letter]) # print(digit_converter[each_letter], end="") else: translated_sentence.append(each_letter) # print(each_letter, end="") return "".join(each for each in translated_sentence) def google_translation(sentence, tgt_lang_code): """ Translate a sentence from one language to another using Google Translator.\n At first install dependencies \n `!pip install -U deep-translator` """ translated = GoogleTranslator(source='auto', target=tgt_lang_code).translate(sentence) return translated def get_better_translation(src, tgt_lang_code): src_mod = get_translated_digit(src) tgt = google_translation(src_mod, tgt_lang_code) tgt = decontracting_words(tgt) tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas') return tgt target_lang_dict = { "Afrikaans": "af", "Albanian": "sq", "Arabic": "ar", "Aragonese": "an", "Armenian": "hy", "Asturian": "ast", "Azerbaijani": "az", "Bashkir": "ba", "Basque": "eu", "Bavarian": "bar", "Belarusian": "be", "Bengali": "bn", "Bishnupriya Manipuri": "bpy", "Bosnian": "bs", "Breton": "br", "Bulgarian": "bg", "Burmese": "my", "Catalan": "ca", "Cebuano": "ceb", "Chechen": "ce", "Chinese (Simplified)": "zh", "Chinese (Traditional)": "zh-tw", "Chuvash": "cv", "Croatian": "hr", "Czech": "cs", "Danish": "da", "Dutch": "nl", "English": "en", "Estonian": "et", "Finnish": "fi", "French": "fr", "Galician": "gl", "Georgian": "ka", "German": "de", "Greek": "el", "Gujarati": "gu", "Haitian": "ht", "Hebrew": "he", "Hindi": "hi", "Hungarian": "hu", "Icelandic": "is", "Ido": "io", "Indonesian": "id", "Irish": "ga", "Italian": "it", "Japanese": "ja", "Javanese": "jv", "Kannada": "kn", "Kazakh": "kk", "Kirghiz": "ky", "Korean": "ko", "Latin": "la", "Latvian": "lv", "Lithuanian": "lt", "Lombard": "lmo", "Low Saxon": "nds", "Luxembourgish": "lb", "Macedonian": "mk", "Malagasy": "mg", "Malay": "ms", "Malayalam": "ml", "Marathi": "mr", "Minangkabau": "min", "Nepali": "ne", "Newar": "new", "Norwegian (Bokmal)": "nb", "Norwegian (Nynorsk)": "nn", "Occitan": "oc", "Persian (Farsi)": "fa", "Piedmontese": "pms", "Polish": "pl", "Portuguese": "pt", "Punjabi": "pa", "Romanian": "ro", "Russian": "ru", "Scots": "sco", "Serbian": "sr", "Serbo-Croatian": "sh", "Sicilian": "scn", "Slovak": "sk", "Slovenian": "sl", "South Azerbaijani": "azb", "Spanish": "es", "Sundanese": "su", "Swahili": "sw", "Swedish": "sv", "Tagalog": "tl", "Tajik": "tg", "Tamil": "ta", "Tatar": "tt", "Telugu": "te", "Turkish": "tr", "Ukrainian": "uk", "Urdu": "ur", "Uzbek": "uz", "Vietnamese": "vi", "Volapük": "vo", "Waray-Waray": "war", "Welsh": "cy", "West Frisian": "fy", "Western Punjabi": "pnb", "Yoruba": "yo", "Thai": "th", "Mongolian": "mn" } def select_target_lang_code(lang): """ Select the target language code """ return target_lang_dict[lang] if lang in target_lang_dict else "en"