File size: 3,100 Bytes
6b9283a
 
 
b31c836
 
 
 
 
 
6b9283a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b31c836
6b9283a
b5d5b5e
 
 
6b9283a
b5d5b5e
6b9283a
b5d5b5e
b31c836
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d5b5e
b31c836
 
b5d5b5e
6b9283a
 
 
b5d5b5e
 
 
 
b31c836
b5d5b5e
6b9283a
b5d5b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b9283a
b5d5b5e
 
 
 
 
6b9283a
 
b5d5b5e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
import torch
import nltk

# Download punkt for sentence tokenization
nltk.download('punkt')

from nltk.tokenize import sent_tokenize

# Cache for storing models and tokenizers
models_cache = {}

def load_model(model_name):
    """
    Load and cache the MarianMT model and tokenizer.
    """
    if model_name not in models_cache:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        if torch.cuda.is_available():
            model = model.to('cuda')
        models_cache[model_name] = (model, tokenizer)
    return models_cache[model_name]

def translate_text(model_name, text):
    """
    Translate input text sentence by sentence using the specified model.
    """
    if not model_name or not text:
        return "Please select a model and provide text for translation."
    
    try:
        # Load the model and tokenizer
        model, tokenizer = load_model(model_name)
        
        # Split text into sentences
        sentences = sent_tokenize(text)
        translated_sentences = []
        
        for sentence in sentences:
            # Tokenize the sentence
            tokens = tokenizer(sentence, return_tensors="pt", padding=True)
            if torch.cuda.is_available():
                tokens = {k: v.to('cuda') for k, v in tokens.items()}
            
            # Generate translation for the sentence
            translated = model.generate(**tokens)
            translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
            translated_sentences.append(translated_text)
        
        # Join translated sentences back into a single string
        return " ".join(translated_sentences)
    
    except Exception as e:
        return f"Error: {str(e)}"

# Model options
model_options = [
    ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
    ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
    # Add other models here...
]

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🌍 Real-Time Sentence Translation")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            label="Select Translation Model",
            choices=[option[1] for option in model_options],
            type="value",
        )
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Enter text (complete sentences)",
            lines=5,
            placeholder="Type here...",
        )
    
    with gr.Row():
        translate_button = gr.Button("Translate")
        clear_button = gr.Button("Clear")
    
    output_text = gr.Textbox(label="Translated Text", interactive=False)
    
    def clear_inputs():
        return "", ""
    
    translate_button.click(
        fn=translate_text,
        inputs=[model_dropdown, input_text],
        outputs=output_text,
    )
    
    clear_button.click(
        fn=clear_inputs,
        inputs=[],
        outputs=[input_text, output_text],
    )

# Run the Gradio app
demo.launch()