Spaces:
Sleeping
Sleeping
from flask import Flask, render_template, request, url_for | |
import numpy as np | |
from tensorflow.keras.models import load_model | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
import pickle | |
import json | |
import re | |
app = Flask(__name__, static_folder='static') | |
cls_biLSTM = load_model("Classification/biLSTM_model.h5") | |
cls_LSTM = load_model("Classification/LSTM_model.h5") | |
cls_GRU = load_model("Classification/GRU_model.h5") | |
gen_biLSTM = load_model("Generation/bilstm_model.h5") | |
gen_LSTM = load_model("Generation/lstm_model.h5") | |
gen_GRU = load_model("Generation/gru_model.h5") | |
# Post-process Texts | |
def postprocess_text(text): | |
# Remove leading and trailing whitespace, consecutive spaces, and ensure a space after punctuation marks | |
text = re.sub(r"\s+", " ", text.strip()) | |
text = re.sub(r"(\w)([.!?])(\w)", r"\1\2 \3", text) | |
# Capitalize the first letter of the sentence | |
text = text[0].upper() + text[1:] | |
# # Add a period at the end if missing | |
# if not text.endswith("."): | |
# text += "." | |
return text | |
def index(): | |
return render_template('index.html') | |
def classifier(): | |
return render_template('classifier.html') | |
def classification(): | |
if request.method == 'POST': | |
sentence = request.form['sentence'] | |
with open('Classification/data.json', 'r') as file: | |
data = json.load(file) | |
max_length = data['max_length'] | |
padding_type = data['padding_type'] | |
trunc_type = data['trunc_type'] | |
threshold = data['threshold'] | |
tokenizer = pickle.load(open('Classification/tokenizer.pkl', 'rb')) | |
sequences = tokenizer.texts_to_sequences([sentence]) | |
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) | |
biLSTM_pred = cls_biLSTM.predict(padded) | |
LSTM_pred = cls_LSTM.predict(padded) | |
GRU_pred = cls_GRU.predict(padded) | |
biLSTM_label = "Positive" if biLSTM_pred > threshold else "Negative" | |
LSTM_label = "Positive" if LSTM_pred > threshold else "Negative" | |
GRU_label = "Positive" if GRU_pred > threshold else "Negative" | |
biLSTM_pred = "{:.9f}".format(biLSTM_pred[0][0]) | |
LSTM_pred = "{:.9f}".format(LSTM_pred[0][0]) | |
GRU_pred = "{:.9f}".format(GRU_pred[0][0]) | |
return render_template('classification.html', sentence=sentence, | |
biLSTM_pred=biLSTM_pred, biLSTM_label=biLSTM_label, | |
LSTM_pred=LSTM_pred, LSTM_label=LSTM_label, | |
GRU_pred=GRU_pred, GRU_label=GRU_label) | |
return render_template('classification.html') | |
def generation(): | |
if request.method == 'POST': | |
sentence = postprocess_text(request.form['sentence']) | |
next_words = int(request.form['valueradio']) | |
# Generate text using LSTM | |
LSTM_Pred = generate_text(sentence, next_words, "lstm") | |
# Generate text using GRU | |
GRU_Pred = generate_text(sentence, next_words, "gru") | |
# Generate text using BiLSTM | |
BILSTM_Pred = generate_text(sentence, next_words, "bilstm") | |
return render_template("generation.html", sentence=sentence, next_words=next_words, LSTM_Pred=LSTM_Pred + ".", GRU_Pred=GRU_Pred + ".", BILSTM_Pred=BILSTM_Pred +".", valueradio=next_words) | |
else: | |
return render_template("generation.html") | |
def generate_text(sentence, next_words, model_name): | |
models = { | |
"lstm": gen_LSTM, | |
"gru": gen_GRU, | |
"bilstm": gen_biLSTM | |
} | |
model = models[model_name] | |
with open('Classification/Reviews.json', 'r') as f: | |
data = json.load(f) | |
reviews = [item['Reviews'] for item in data] | |
tokenizer = Tokenizer() | |
tokenizer.fit_on_texts(reviews) | |
total_words = len(tokenizer.word_index) + 1 | |
input_sequences = [] | |
for line in reviews: | |
token_list = tokenizer.texts_to_sequences([line])[0] | |
for i in range(1, len(token_list)): | |
n_gram_sequence = token_list[:i+1] | |
input_sequences.append(n_gram_sequence) | |
max_sequence_len = max([len(x) for x in input_sequences]) | |
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')) | |
generated_text = sentence | |
for _ in range(next_words): | |
token_list = tokenizer.texts_to_sequences([generated_text])[0] | |
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') | |
predicted = np.argmax(model.predict(token_list), axis=1) | |
output_word = " " | |
for word, index in tokenizer.word_index.items(): | |
if index == predicted: | |
output_word = word | |
break | |
generated_text += " " + output_word | |
return generated_text | |
if __name__ == '__main__': | |
app.run(debug=True, port=8000) |