musfiqdehan's picture
Add word alignment mapping functions
4a52b88
raw
history blame
4.93 kB
"""
This file contains the functions to translate the text from one language to another.
"""
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator
from .text_preprocess import decontracting_words, space_punc
from dotenv import load_dotenv
import os
# Load the environment variables from the .env file
load_dotenv()
# Translators API Keys
MICROSOFT_API_KEY = os.getenv("MICROSOFT_TRANSLATOR_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YANDEX_API_KEY = os.getenv("YANDEX_API_KEY")
# Digit Translation
digit_converter = {
'০': '0',
'১': '1',
'২': '2',
'৩': '3',
'৪': '4',
'৫': '5',
'৬': '6',
'৭': '7',
'৮': '8',
'৯': '9'
}
def get_translated_digit(sentence):
"""
Translate the digits from Bengali to English
"""
translated_sentence = []
for each_letter in sentence:
if each_letter in digit_converter.keys():
translated_sentence.append(digit_converter[each_letter])
# print(digit_converter[each_letter], end="")
else:
translated_sentence.append(each_letter)
# print(each_letter, end="")
return "".join(each for each in translated_sentence)
# Bangla to English Translation (BUET BanglaNMT)
translation_model_bn_en = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/banglat5_nmt_bn_en")
translation_tokenizer_bn_en = AutoTokenizer.from_pretrained("csebuetnlp/banglat5_nmt_bn_en")
def banglanmt_translation(input_text):
"""
Translate a sentence from Bengali to English using BUET BanglaNMT
"""
inputs = translation_tokenizer_bn_en(input_text, return_tensors="pt")
outputs = translation_model_bn_en.generate(**inputs)
translated_text = translation_tokenizer_bn_en.decode(outputs[0], skip_special_tokens=True)
return translated_text
def google_translation(sentence: str, source="bn", target="en") -> str:
"""
Translate a sentence from one language to another using Google Translator.\n
At first install dependencies \n
`!pip install -U deep-translator`
"""
translator = GoogleTranslator()
translated_sentence = translator.translate(
sentence, source=source, target=target)
return translated_sentence
def microsoft_translation(sentence: str, source="bn", target="en") -> str:
"""
Translate a sentence from one language to another using Microsoft Translator.\n
At first install dependencies \n
`!pip install -U deep-translator`
"""
translator = MicrosoftTranslator(api_key=MICROSOFT_API_KEY, target='en')
translated_sentence = translator.translate(sentence)
return translated_sentence
def chatgpt_translation(sentence: str, source="bn", target="en") -> str:
"""
Translate a sentence from one language to another using ChatGPT Translator.\n
At first install dependencies \n
`!pip install -U deep-translator`
"""
translator = ChatGptTranslator(api_key=OPENAI_API_KEY, target=target)
translated_sentence = translator.translate(sentence)
return translated_sentence
def yandex_translation(sentence: str, source="bn", target="en") -> str:
"""
Translate a sentence from one language to another using Yandex Translator.\n
At first install dependencies \n
`!pip install -U deep-translator`
"""
translator = YandexTranslator(api_key=YANDEX_API_KEY)
translated_sentence = translator.translate(
sentence, source=source, target=target)
return translated_sentence
def mymemory_translation(sentence: str, source="bn-IN", target="en-US") -> str:
"""
Translate a sentence from one language to another using MyMemory Translator.\n
At first install dependencies \n
`!pip install -U deep-translator`
"""
translator = MyMemoryTranslator(source=source, target=target)
translated_sentence = translator.translate(sentence)
return translated_sentence
def get_better_translation(translator_func, src=""):
src_mod = get_translated_digit(src)
tgt = translator_func(src_mod)
tgt = decontracting_words(tgt)
tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas')
return tgt
def select_translator(src, translator):
"""
Select the translator
"""
tgt = None
tgt_base = None
if translator == "Google":
tgt = get_better_translation(google_translation, src)
tgt = space_punc(tgt)
tgt_base = google_translation(src)
elif translator == "BanglaNMT":
tgt = get_better_translation(banglanmt_translation, src)
tgt = space_punc(tgt)
tgt_base = banglanmt_translation(src)
elif translator == "MyMemory":
tgt = get_better_translation(mymemory_translation, src)
tgt = space_punc(tgt)
tgt_base = mymemory_translation(src)
return tgt_base, tgt