File size: 2,686 Bytes
9003587 463444e c3e5a3b 463444e 9003587 93d168d 463444e c3e5a3b 463444e 06d2814 93d168d 06d2814 463444e 9003587 06d2814 9003587 06d2814 463444e 48ff56c 93d168d 9003587 93d168d 48ff56c 9003587 48ff56c 9003587 48ff56c 9003587 48ff56c 9003587 48ff56c 9003587 48ff56c 463444e 93d168d 06d2814 463444e 9003587 463444e 93d168d 463444e 9003587 463444e 9003587 463444e 9003587 463444e 9003587 463444e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores200_codes import flores_codes
def load_models():
model_name_dict = {
"nllb-distilled-600M": "facebook/nllb-200-distilled-600M",
}
model_dict = {}
for call_name, real_name in model_name_dict.items():
print("\tLoading model:", call_name)
model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
tokenizer = AutoTokenizer.from_pretrained(real_name)
model_dict[call_name] = {
"model": model,
"tokenizer": tokenizer,
}
return model_dict
# Load models and tokenizers once during initialization
model_dict = load_models()
# Translate text using preloaded models and tokenizers
def translate_text(source, target, text):
model_name = "nllb-distilled-600M"
if model_name in model_dict:
model = model_dict[model_name]["model"]
tokenizer = model_dict[model_name]["tokenizer"]
start_time = time.time()
source = flores_codes[source]
target = flores_codes[target]
translator = pipeline(
"translation",
model=model,
tokenizer=tokenizer,
src_lang=source,
tgt_lang=target,
)
output = translator(text, max_length=400)
end_time = time.time()
output = output[0]["translation_text"]
result = {
"inference_time": end_time - start_time,
"source": source,
"target": target,
"result": output,
}
return result
else:
raise KeyError(f"Model '{model_name}' not found in model_dict")
if __name__ == "__main__":
print("\tInitializing models")
lang_codes = list(flores_codes.keys())
inputs = [
gr.inputs.Dropdown(lang_codes, default="English", label="Source"),
gr.inputs.Dropdown(lang_codes, default="Nepali", label="Target"),
gr.inputs.Textbox(lines=5, label="Input text"),
]
outputs = gr.outputs.JSON()
title = "The Master Betters Translator"
desc = This is a beta version of The Master Betters Translator that utilizes pre-trained language models for translation. To use this app you need to have chosen the source and target language with your input text to get the output."
description = (
f"{desc}"
)
examples = [["English", "Nepali", "Hi. nice to meet you"]]
gr.Interface(
translate_text,
inputs,
outputs,
title=title,
description=description,
examples=examples,
examples_per_page=50,
).launch()
|