from transformers import MarianMTModel, MarianTokenizer from translations.model_name_mapping import MODEL_NAME_MAPPING def load_model_and_tokenizer(from_lang: str, to_lang: str): print(f"load_model_and_tokenizer from: {from_lang}, to: {to_lang}") model_name = MODEL_NAME_MAPPING.get((from_lang, to_lang)) print(f"model_name: {model_name}") tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return model, tokenizer def translate(text, model, tokenizer): translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True)) translated_text = [ tokenizer.decode(t, skip_special_tokens=True) for t in translated ] return translated_text[0] def test_translations(from_lang, to_lang, text_to_translate): model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang) translation = translate(text_to_translate, model, tokenizer) print(f"Translated text from: {from_lang}, to: {to_lang}, translation: {translation}") if __name__ == "__main__": # text_to_translate = "hola amigos, tengo hambre" # test_translations(from_lang="es", to_lang="en", text_to_translate=text_to_translate) # test_translations(from_lang="es", to_lang="ca", text_to_translate=text_to_translate) # test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate) text_to_translate = "hello friends, who's hungry?" test_translations(from_lang="en", to_lang="es", text_to_translate=text_to_translate) # test_translations(from_lang="en", to_lang="ca", text_to_translate=text_to_translate) test_translations(from_lang="en", to_lang="pt", text_to_translate=text_to_translate) # test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate) text_to_translate = "un cafè sense sucre i amb llet, si us plau" test_translations(from_lang="ca", to_lang="es", text_to_translate=text_to_translate)