NullClass_Task-2 / Task 2 /word detection.py
prajwath's picture
Upload 28 files
34aa1cf verified
# Import libraries
import tkinter as tk
from tkinter import ttk, messagebox
from keras.layers import TextVectorization
import re
import tensorflow.strings as tf_strings
import json
import string
from keras.models import load_model
import tensorflow as tf
from keras.preprocessing.text import tokenizer_from_json
from keras.utils import pad_sequences
import numpy as np
import difflib
# English to Spanish translation
strip_chars = string.punctuation + "¿"
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")
def custom_standardization(input_string):
lowercase = tf_strings.lower(input_string)
return tf_strings.regex_replace(lowercase, f"[{re.escape(strip_chars)}]", "")
# Load the English vectorization layer configuration
with open('eng_vectorization_config.json') as json_file:
eng_vectorization_config = json.load(json_file)
# Recreate the English vectorization layer with basic configuration
eng_vectorization = TextVectorization(
max_tokens=eng_vectorization_config['max_tokens'],
output_mode=eng_vectorization_config['output_mode'],
output_sequence_length=eng_vectorization_config['output_sequence_length']
)
# Apply the custom standardization function
eng_vectorization.standardize = custom_standardization
# Load the Spanish vectorization layer configuration
with open('spa_vectorization_config.json') as json_file:
spa_vectorization_config = json.load(json_file)
# Recreate the Spanish vectorization layer with basic configuration
spa_vectorization = TextVectorization(
max_tokens=spa_vectorization_config['max_tokens'],
output_mode=spa_vectorization_config['output_mode'],
output_sequence_length=spa_vectorization_config['output_sequence_length'],
standardize=custom_standardization
)
# Load and set the English vocabulary
with open('eng_vocab.json') as json_file:
eng_vocab = json.load(json_file)
eng_vectorization.set_vocabulary(eng_vocab)
# Load and set the Spanish vocabulary
with open('spa_vocab.json') as json_file:
spa_vocab = json.load(json_file)
spa_vectorization.set_vocabulary(spa_vocab)
# Load the Spanish model
transformer = load_model('transformer_model')
spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))
max_decoded_sentence_length = 20
# Initialize list to track incorrect words
incorrect_words = []
def beam_search_decode(input_sentence, beam_width=3):
tokenized_input_sentence = eng_vectorization([input_sentence])
decoded_sentences = [("[start]", 0.0)]
for i in range(max_decoded_sentence_length):
all_candidates = []
for decoded_sentence, score in decoded_sentences:
tokenized_target_sentence = spa_vectorization([decoded_sentence])[:, :-1]
predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])
top_k = tf.math.top_k(predictions[0, i, :], k=beam_width)
for j in range(beam_width):
predicted_token_index = top_k.indices[j].numpy()
predicted_token = spa_index_lookup[predicted_token_index]
candidate = (decoded_sentence + " " + predicted_token, score + top_k.values[j].numpy())
all_candidates.append(candidate)
ordered = sorted(all_candidates, key=lambda x: x[1], reverse=True)
decoded_sentences = ordered[:beam_width]
if all(sentence[0].endswith("[end]") for sentence in decoded_sentences):
break
return decoded_sentences[0][0]
# English to French translation
# Load French model
model = load_model('english_to_french_model')
# Load Tokenizer
with open('english_tokenizer.json') as f:
data = json.load(f)
english_tokenizer = tokenizer_from_json(data)
with open('french_tokenizer.json') as f:
data = json.load(f)
french_tokenizer = tokenizer_from_json(data)
# Load max length
with open('sequence_length.json') as f:
max_length = json.load(f)
def pad(x, length=None):
return pad_sequences(x, maxlen=length, padding='post')
def translate_to_french(english_sentence):
english_sentence = english_sentence.lower()
english_sentence = re.sub(r'[.?!,]', '', english_sentence)
english_sentence = english_tokenizer.texts_to_sequences([english_sentence])
english_sentence = pad(english_sentence, max_length)
english_sentence = english_sentence.reshape((-1, max_length))
french_sentence = model.predict(english_sentence)[0]
french_sentence = [np.argmax(word) for word in french_sentence]
french_sentence = french_tokenizer.sequences_to_texts([french_sentence])[0]
return french_sentence
def get_word_suggestions(word, vocab):
return difflib.get_close_matches(word, vocab, n=3, cutoff=0.6)
def check_and_correct_sentence(sentence, vocab):
words = sentence.split()
incorrect_words.clear()
corrected_sentence = []
for word in words:
if word not in vocab:
suggestions = get_word_suggestions(word, vocab)
incorrect_words.append((word, suggestions))
else:
corrected_sentence.append(word)
if incorrect_words:
message = f"Incorrect word(s) detected: {', '.join([w[0] for w in incorrect_words])}\n"
for word, suggestions in incorrect_words:
message += f"Suggestions for '{word}': {', '.join(suggestions) if suggestions else 'No suggestions available'}\n"
if len(incorrect_words) >= 2:
messagebox.showerror("Error", message)
return False
return True
def translate_to_spanish(english_sentence):
if not check_and_correct_sentence(english_sentence, eng_vocab):
return ""
spanish_sentence = beam_search_decode(english_sentence)
return spanish_sentence.replace("[start]", "").replace("[end]", "").strip()
# Function to handle translation request based on selected language
def handle_translate():
selected_language = language_var.get()
english_sentence = text_input.get("1.0", "end-1c").strip()
if not english_sentence:
messagebox.showwarning("Warning", "Please enter a sentence to translate.")
return
if selected_language == "French":
translation = translate_to_french(english_sentence)
elif selected_language == "Spanish":
translation = translate_to_spanish(english_sentence)
translation_output.delete("1.0", "end")
translation_output.insert("end", f"{selected_language} translation: {translation}")
# Setting up the main window
root = tk.Tk()
root.title("Language Translator")
root.geometry("550x600")
# Font configuration
font_style = "Times New Roman"
font_size = 14
# Frame for input
input_frame = tk.Frame(root)
input_frame.pack(pady=10)
# Heading for input
input_heading = tk.Label(input_frame, text="Enter the text to be translated", font=(font_style, font_size, 'bold'))
input_heading.pack()
# Text input for English sentence
text_input = tk.Text(input_frame, height=5, width=50, font=(font_style, font_size))
text_input.pack()
# Language selection
language_var = tk.StringVar()
language_label = tk.Label(root, text="Select the language to translate to", font=(font_style, font_size, 'bold'))
language_label.pack()
language_select = ttk.Combobox(root, textvariable=language_var, values=["French", "Spanish"], font=(font_style, font_size), state="readonly")
language_select.pack()
# Submit button
submit_button = ttk.Button(root, text="Translate", command=handle_translate)
submit_button.pack(pady=10)
# Frame for output
output_frame = tk.Frame(root)
output_frame.pack(pady=10)
# Heading for output
output_heading = tk.Label(output_frame, text="Translation: ", font=(font_style, font_size, 'bold'))
output_heading.pack()
# Text output for translations
translation_output = tk.Text(output_frame, height=10, width=50, font=(font_style, font_size))
translation_output.pack()
# Running the application
root.mainloop()