text_gen_class / app.py
Scezui's picture
Refactor code for improved performance and readability
dfe58cd
from flask import Flask, render_template, request, url_for
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
import json
import re
app = Flask(__name__, static_folder='static')
cls_biLSTM = load_model("Classification/biLSTM_model.h5")
cls_LSTM = load_model("Classification/LSTM_model.h5")
cls_GRU = load_model("Classification/GRU_model.h5")
gen_biLSTM = load_model("Generation/bilstm_model.h5")
gen_LSTM = load_model("Generation/lstm_model.h5")
gen_GRU = load_model("Generation/gru_model.h5")
# Post-process Texts
def postprocess_text(text):
# Remove leading and trailing whitespace, consecutive spaces, and ensure a space after punctuation marks
text = re.sub(r"\s+", " ", text.strip())
text = re.sub(r"(\w)([.!?])(\w)", r"\1\2 \3", text)
# Capitalize the first letter of the sentence
text = text[0].upper() + text[1:]
# # Add a period at the end if missing
# if not text.endswith("."):
# text += "."
return text
@app.route('/')
def index():
return render_template('index.html')
@app.route('/classifier')
def classifier():
return render_template('classifier.html')
@app.route('/classification', methods=['GET', 'POST'])
def classification():
if request.method == 'POST':
sentence = request.form['sentence']
with open('Classification/data.json', 'r') as file:
data = json.load(file)
max_length = data['max_length']
padding_type = data['padding_type']
trunc_type = data['trunc_type']
threshold = data['threshold']
tokenizer = pickle.load(open('Classification/tokenizer.pkl', 'rb'))
sequences = tokenizer.texts_to_sequences([sentence])
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
biLSTM_pred = cls_biLSTM.predict(padded)
LSTM_pred = cls_LSTM.predict(padded)
GRU_pred = cls_GRU.predict(padded)
biLSTM_label = "Positive" if biLSTM_pred > threshold else "Negative"
LSTM_label = "Positive" if LSTM_pred > threshold else "Negative"
GRU_label = "Positive" if GRU_pred > threshold else "Negative"
biLSTM_pred = "{:.9f}".format(biLSTM_pred[0][0])
LSTM_pred = "{:.9f}".format(LSTM_pred[0][0])
GRU_pred = "{:.9f}".format(GRU_pred[0][0])
return render_template('classification.html', sentence=sentence,
biLSTM_pred=biLSTM_pred, biLSTM_label=biLSTM_label,
LSTM_pred=LSTM_pred, LSTM_label=LSTM_label,
GRU_pred=GRU_pred, GRU_label=GRU_label)
return render_template('classification.html')
@app.route("/generation", methods=['GET', 'POST'])
def generation():
if request.method == 'POST':
sentence = postprocess_text(request.form['sentence'])
next_words = int(request.form['valueradio'])
# Generate text using LSTM
LSTM_Pred = generate_text(sentence, next_words, "lstm")
# Generate text using GRU
GRU_Pred = generate_text(sentence, next_words, "gru")
# Generate text using BiLSTM
BILSTM_Pred = generate_text(sentence, next_words, "bilstm")
return render_template("generation.html", sentence=sentence, next_words=next_words, LSTM_Pred=LSTM_Pred + ".", GRU_Pred=GRU_Pred + ".", BILSTM_Pred=BILSTM_Pred +".", valueradio=next_words)
else:
return render_template("generation.html")
def generate_text(sentence, next_words, model_name):
models = {
"lstm": gen_LSTM,
"gru": gen_GRU,
"bilstm": gen_biLSTM
}
model = models[model_name]
with open('Classification/Reviews.json', 'r') as f:
data = json.load(f)
reviews = [item['Reviews'] for item in data]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reviews)
total_words = len(tokenizer.word_index) + 1
input_sequences = []
for line in reviews:
token_list = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
input_sequences.append(n_gram_sequence)
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
generated_text = sentence
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([generated_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
predicted = np.argmax(model.predict(token_list), axis=1)
output_word = " "
for word, index in tokenizer.word_index.items():
if index == predicted:
output_word = word
break
generated_text += " " + output_word
return generated_text
if __name__ == '__main__':
app.run(debug=True, port=8000)