File size: 2,487 Bytes
463444e
 
 
c3e5a3b
463444e
 
06d2814
463444e
 
c3e5a3b
463444e
 
 
 
 
06d2814
820763d
463444e
06d2814
 
 
 
463444e
 
 
06d2814
 
 
463444e
 
06d2814
 
463444e
06d2814
 
463444e
 
 
 
 
06d2814
 
463444e
06d2814
463444e
 
 
06d2814
463444e
06d2814
 
 
463444e
06d2814
463444e
 
06d2814
463444e
06d2814
463444e
 
 
 
06d2814
 
 
463444e
 
 
 
06d2814
463444e
06d2814
 
463444e
06d2814
463444e
 
06d2814
463444e
 
 
06d2814
463444e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores200_codes import flores_codes

# Load models and tokenizers once during initialization
def load_models():
    model_name_dict = {
        "nllb-distilled-600M": "facebook/nllb-200-distilled-600M",
    }

    model_dict = {}

    for call_name, real_name in model_name_dict.items():
        print("\tLoading model:", call_name)
        model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
        tokenizer = AutoTokenizer.from_pretrained(real_name)
        model_dict[call_name] = {
            "model": model,
            "tokenizer": tokenizer,
        }

    return model_dict

# Translate text using preloaded models and tokenizers
def translate_text(source_lang, target_lang, input_text, model_dict):
    model_name = "nllb-distilled-600M"

    start_time = time.time()
    source_code = flores_codes[source_lang]
    target_code = flores_codes[target_lang]

    model = model_dict[model_name]["model"]
    tokenizer = model_dict[model_name]["tokenizer"]

    translator = pipeline(
        "translation",
        model=model,
        tokenizer=tokenizer,
        src_lang=source_code,
        tgt_lang=target_code,
    )
    translated_output = translator(input_text, max_length=400)

    end_time = time.time()

    translated_result = {
        "inference_time": end_time - start_time,
        "source": source_lang,
        "target": target_lang,
        "result": translated_output[0]["translation_text"],
    }
    return translated_result

if __name__ == "__main__":
    print("\tInitializing models")

    # Load models and tokenizers
    model_dict = load_models()

    lang_codes = list(flores_codes.keys())
    inputs = [
        gr.inputs.Dropdown(lang_codes, default="English", label="Source Language"),
        gr.inputs.Dropdown(lang_codes, default="Nepali", label="Target Language"),
        gr.inputs.Textbox(lines=5, label="Input Text"),
    ]

    outputs = gr.outputs.JSON()

    title = "Masterful Translator"

    app_description = (
        "This is a beta version of the Masterful Translator that utilizes pre-trained language models for translation."
    )
    examples = [["English", "Nepali", "Hello, how are you?"]]

    gr.Interface(
        translate_text,
        inputs,
        outputs,
        title=title,
        description=app_description,
        examples=examples,
        examples_per_page=50,
    ).launch()