File size: 7,936 Bytes

34aa1cf

# Import libraries
import tkinter as tk
from tkinter import ttk, messagebox

from keras.layers import TextVectorization
import re
import tensorflow.strings as tf_strings
import json
import string
from keras.models import load_model
import tensorflow as tf
from keras.preprocessing.text import tokenizer_from_json
from keras.utils import pad_sequences
import numpy as np
import difflib

# English to Spanish translation
strip_chars = string.punctuation + "¿"
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

def custom_standardization(input_string):
    lowercase = tf_strings.lower(input_string)
    return tf_strings.regex_replace(lowercase, f"[{re.escape(strip_chars)}]", "")

# Load the English vectorization layer configuration
with open('eng_vectorization_config.json') as json_file:
    eng_vectorization_config = json.load(json_file)

# Recreate the English vectorization layer with basic configuration
eng_vectorization = TextVectorization(
    max_tokens=eng_vectorization_config['max_tokens'],
    output_mode=eng_vectorization_config['output_mode'],
    output_sequence_length=eng_vectorization_config['output_sequence_length']
)

# Apply the custom standardization function
eng_vectorization.standardize = custom_standardization

# Load the Spanish vectorization layer configuration
with open('spa_vectorization_config.json') as json_file:
    spa_vectorization_config = json.load(json_file)

# Recreate the Spanish vectorization layer with basic configuration
spa_vectorization = TextVectorization(
    max_tokens=spa_vectorization_config['max_tokens'],
    output_mode=spa_vectorization_config['output_mode'],
    output_sequence_length=spa_vectorization_config['output_sequence_length'],
    standardize=custom_standardization
)

# Load and set the English vocabulary
with open('eng_vocab.json') as json_file:
    eng_vocab = json.load(json_file)
    eng_vectorization.set_vocabulary(eng_vocab)

# Load and set the Spanish vocabulary
with open('spa_vocab.json') as json_file:
    spa_vocab = json.load(json_file)
    spa_vectorization.set_vocabulary(spa_vocab)

# Load the Spanish model
transformer = load_model('transformer_model')

spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))
max_decoded_sentence_length = 20

# Initialize list to track incorrect words
incorrect_words = []

def beam_search_decode(input_sentence, beam_width=3):
    tokenized_input_sentence = eng_vectorization([input_sentence])
    decoded_sentences = [("[start]", 0.0)]
    
    for i in range(max_decoded_sentence_length):
        all_candidates = []
        for decoded_sentence, score in decoded_sentences:
            tokenized_target_sentence = spa_vectorization([decoded_sentence])[:, :-1]
            predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])
            top_k = tf.math.top_k(predictions[0, i, :], k=beam_width)
            
            for j in range(beam_width):
                predicted_token_index = top_k.indices[j].numpy()
                predicted_token = spa_index_lookup[predicted_token_index]
                candidate = (decoded_sentence + " " + predicted_token, score + top_k.values[j].numpy())
                all_candidates.append(candidate)
        
        ordered = sorted(all_candidates, key=lambda x: x[1], reverse=True)
        decoded_sentences = ordered[:beam_width]
        
        if all(sentence[0].endswith("[end]") for sentence in decoded_sentences):
            break
    
    return decoded_sentences[0][0]

# English to French translation
# Load French model
model = load_model('english_to_french_model')

# Load Tokenizer
with open('english_tokenizer.json') as f:
    data = json.load(f)
    english_tokenizer = tokenizer_from_json(data)
    
with open('french_tokenizer.json') as f:
    data = json.load(f)
    french_tokenizer = tokenizer_from_json(data)
    
# Load max length
with open('sequence_length.json') as f:
    max_length = json.load(f)
    
def pad(x, length=None):
    return pad_sequences(x, maxlen=length, padding='post')

def translate_to_french(english_sentence):
    english_sentence = english_sentence.lower()
    english_sentence = re.sub(r'[.?!,]', '', english_sentence)
    english_sentence = english_tokenizer.texts_to_sequences([english_sentence])
    english_sentence = pad(english_sentence, max_length)
    english_sentence = english_sentence.reshape((-1, max_length))
    
    french_sentence = model.predict(english_sentence)[0]
    french_sentence = [np.argmax(word) for word in french_sentence]
    french_sentence = french_tokenizer.sequences_to_texts([french_sentence])[0]
    
    return french_sentence

def get_word_suggestions(word, vocab):
    return difflib.get_close_matches(word, vocab, n=3, cutoff=0.6)

def check_and_correct_sentence(sentence, vocab):
    words = sentence.split()
    incorrect_words.clear()
    corrected_sentence = []
    for word in words:
        if word not in vocab:
            suggestions = get_word_suggestions(word, vocab)
            incorrect_words.append((word, suggestions))
        else:
            corrected_sentence.append(word)
    
    if incorrect_words:
        message = f"Incorrect word(s) detected: {', '.join([w[0] for w in incorrect_words])}\n"
        for word, suggestions in incorrect_words:
            message += f"Suggestions for '{word}': {', '.join(suggestions) if suggestions else 'No suggestions available'}\n"
        if len(incorrect_words) >= 2:
            messagebox.showerror("Error", message)
        return False
    return True

def translate_to_spanish(english_sentence):
    if not check_and_correct_sentence(english_sentence, eng_vocab):
        return ""
    spanish_sentence = beam_search_decode(english_sentence)
    return spanish_sentence.replace("[start]", "").replace("[end]", "").strip()

# Function to handle translation request based on selected language
def handle_translate():
    selected_language = language_var.get()
    english_sentence = text_input.get("1.0", "end-1c").strip()
    
    if not english_sentence:
        messagebox.showwarning("Warning", "Please enter a sentence to translate.")
        return
    
    if selected_language == "French":
        translation = translate_to_french(english_sentence)
    elif selected_language == "Spanish":
        translation = translate_to_spanish(english_sentence)
        
    translation_output.delete("1.0", "end")
    translation_output.insert("end", f"{selected_language} translation: {translation}")

# Setting up the main window
root = tk.Tk()
root.title("Language Translator")
root.geometry("550x600")

# Font configuration
font_style = "Times New Roman"
font_size = 14

# Frame for input
input_frame = tk.Frame(root)
input_frame.pack(pady=10)

# Heading for input
input_heading = tk.Label(input_frame, text="Enter the text to be translated", font=(font_style, font_size, 'bold'))
input_heading.pack()
# Text input for English sentence
text_input = tk.Text(input_frame, height=5, width=50, font=(font_style, font_size))
text_input.pack()

# Language selection
language_var = tk.StringVar()
language_label = tk.Label(root, text="Select the language to translate to", font=(font_style, font_size, 'bold'))
language_label.pack()
language_select = ttk.Combobox(root, textvariable=language_var, values=["French", "Spanish"], font=(font_style, font_size), state="readonly")
language_select.pack()

# Submit button
submit_button = ttk.Button(root, text="Translate", command=handle_translate)
submit_button.pack(pady=10)

# Frame for output
output_frame = tk.Frame(root)
output_frame.pack(pady=10)
# Heading for output
output_heading = tk.Label(output_frame, text="Translation: ", font=(font_style, font_size, 'bold'))
output_heading.pack()

# Text output for translations
translation_output = tk.Text(output_frame, height=10, width=50, font=(font_style, font_size))
translation_output.pack()

# Running the application
root.mainloop()