Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
import pickle | |
import spacy | |
from tqdm import tqdm | |
import gc | |
import os | |
# Download the SpaCy model | |
os.system("python -m spacy download en_core_web_lg") | |
# Load models | |
model_1 = tf.keras.models.load_model("model_1.h5") | |
model_2 = tf.keras.models.load_model("model_2.h5") | |
model_3 = tf.keras.models.load_model("model_3.h5") | |
model_4 = tf.keras.models.load_model("model_4.h5") | |
# Load dictionaries | |
with open('word_dict.pkl', 'rb') as f: | |
word_dict = pickle.load(f) | |
with open('lemma_dict.pkl', 'rb') as f: | |
lemma_dict = pickle.load(f) | |
# Load SpaCy NLP model | |
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger']) | |
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP) | |
def preprocess_text(text): | |
"""Preprocess the input text using SpaCy and return word indices.""" | |
docs = nlp.pipe([text], n_process=1) | |
word_seq = [] | |
for doc in docs: | |
for token in doc: | |
if token.pos_ != "PUNCT": | |
if token.text not in word_dict: | |
word_dict[token.text] = len(word_dict) + 1 # Increment index. | |
word_seq.append(word_dict[token.text]) | |
return word_seq | |
def classify_question(text): | |
# Preprocess the text | |
seq = preprocess_text(text) | |
padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed | |
BATCH_SIZE = 512 | |
# Get predictions from each model | |
pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
# Combine predictions | |
avg_pred = pred1 + pred2 + pred3 + pred4 | |
label = "Insincere" if avg_pred > 0.35 else "Sincere" | |
# Create a list of probabilities for each model | |
probs = { | |
"Probability": float(avg_pred), | |
"Model Probabilities": {"Model 1": float(pred1), "Model 2": float(pred2), "Model 3": float(pred3), "Model 4": float(pred4), "visible": False}, | |
"Sequence": {"value": seq, "visible": False} | |
} | |
return label, probs | |
# Example questions | |
examples = [ | |
"How do you train a pigeon to send messages?", | |
"Is USA a shithole country owing to a shithole president?", | |
"Why is Indian educationa total bullshit?", | |
"Which person has given the least f**ks and still turned out successful?" | |
] | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=classify_question, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter your question here..."), | |
], | |
outputs=[ | |
"text", # Output for label | |
"json" # Output for probabilities | |
], | |
title="Quora Insincere Questions Classifier", | |
examples=examples, | |
description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown." | |
) | |
interface.launch() |