from flask import Flask, render_template, request, url_for import numpy as np from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.preprocessing.text import Tokenizer import pickle import json import re app = Flask(__name__, static_folder='static') cls_biLSTM = load_model("Classification/biLSTM_model.h5") cls_LSTM = load_model("Classification/LSTM_model.h5") cls_GRU = load_model("Classification/GRU_model.h5") gen_biLSTM = load_model("Generation/bilstm_model.h5") gen_LSTM = load_model("Generation/lstm_model.h5") gen_GRU = load_model("Generation/gru_model.h5") # Post-process Texts def postprocess_text(text): # Remove leading and trailing whitespace, consecutive spaces, and ensure a space after punctuation marks text = re.sub(r"\s+", " ", text.strip()) text = re.sub(r"(\w)([.!?])(\w)", r"\1\2 \3", text) # Capitalize the first letter of the sentence text = text[0].upper() + text[1:] # # Add a period at the end if missing # if not text.endswith("."): # text += "." return text @app.route('/') def index(): return render_template('index.html') @app.route('/classifier') def classifier(): return render_template('classifier.html') @app.route('/classification', methods=['GET', 'POST']) def classification(): if request.method == 'POST': sentence = request.form['sentence'] with open('Classification/data.json', 'r') as file: data = json.load(file) max_length = data['max_length'] padding_type = data['padding_type'] trunc_type = data['trunc_type'] threshold = data['threshold'] tokenizer = pickle.load(open('Classification/tokenizer.pkl', 'rb')) sequences = tokenizer.texts_to_sequences([sentence]) padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) biLSTM_pred = cls_biLSTM.predict(padded) LSTM_pred = cls_LSTM.predict(padded) GRU_pred = cls_GRU.predict(padded) biLSTM_label = "Positive" if biLSTM_pred > threshold else "Negative" LSTM_label = "Positive" if LSTM_pred > threshold else "Negative" GRU_label = "Positive" if GRU_pred > threshold else "Negative" biLSTM_pred = "{:.9f}".format(biLSTM_pred[0][0]) LSTM_pred = "{:.9f}".format(LSTM_pred[0][0]) GRU_pred = "{:.9f}".format(GRU_pred[0][0]) return render_template('classification.html', sentence=sentence, biLSTM_pred=biLSTM_pred, biLSTM_label=biLSTM_label, LSTM_pred=LSTM_pred, LSTM_label=LSTM_label, GRU_pred=GRU_pred, GRU_label=GRU_label) return render_template('classification.html') @app.route("/generation", methods=['GET', 'POST']) def generation(): if request.method == 'POST': sentence = postprocess_text(request.form['sentence']) next_words = int(request.form['valueradio']) # Generate text using LSTM LSTM_Pred = generate_text(sentence, next_words, "lstm") # Generate text using GRU GRU_Pred = generate_text(sentence, next_words, "gru") # Generate text using BiLSTM BILSTM_Pred = generate_text(sentence, next_words, "bilstm") return render_template("generation.html", sentence=sentence, next_words=next_words, LSTM_Pred=LSTM_Pred + ".", GRU_Pred=GRU_Pred + ".", BILSTM_Pred=BILSTM_Pred +".", valueradio=next_words) else: return render_template("generation.html") def generate_text(sentence, next_words, model_name): models = { "lstm": gen_LSTM, "gru": gen_GRU, "bilstm": gen_biLSTM } model = models[model_name] with open('Classification/Reviews.json', 'r') as f: data = json.load(f) reviews = [item['Reviews'] for item in data] tokenizer = Tokenizer() tokenizer.fit_on_texts(reviews) total_words = len(tokenizer.word_index) + 1 input_sequences = [] for line in reviews: token_list = tokenizer.texts_to_sequences([line])[0] for i in range(1, len(token_list)): n_gram_sequence = token_list[:i+1] input_sequences.append(n_gram_sequence) max_sequence_len = max([len(x) for x in input_sequences]) input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')) generated_text = sentence for _ in range(next_words): token_list = tokenizer.texts_to_sequences([generated_text])[0] token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') predicted = np.argmax(model.predict(token_list), axis=1) output_word = " " for word, index in tokenizer.word_index.items(): if index == predicted: output_word = word break generated_text += " " + output_word return generated_text if __name__ == '__main__': app.run(debug=True, port=8000)