non2013's picture
update interface
f227fd1
raw
history blame
3.12 kB
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
import spacy
from tqdm import tqdm
import gc
import os
# Download the SpaCy model
os.system("python -m spacy download en_core_web_lg")
# Load models
model_1 = tf.keras.models.load_model("model_1.h5")
model_2 = tf.keras.models.load_model("model_2.h5")
model_3 = tf.keras.models.load_model("model_3.h5")
model_4 = tf.keras.models.load_model("model_4.h5")
# Load dictionaries
with open('word_dict.pkl', 'rb') as f:
word_dict = pickle.load(f)
with open('lemma_dict.pkl', 'rb') as f:
lemma_dict = pickle.load(f)
# Load SpaCy NLP model
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
def preprocess_text(text):
"""Preprocess the input text using SpaCy and return word indices."""
docs = nlp.pipe([text], n_process=1)
word_seq = []
for doc in docs:
for token in doc:
if token.pos_ != "PUNCT":
if token.text not in word_dict:
word_dict[token.text] = len(word_dict) + 1 # Increment index.
word_seq.append(word_dict[token.text])
return word_seq
def classify_question(text):
# Preprocess the text
seq = preprocess_text(text)
padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed
BATCH_SIZE = 512
# Get predictions from each model
pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
# Combine predictions
avg_pred = pred1 + pred2 + pred3 + pred4
label = "Insincere" if avg_pred > 0.35 else "Sincere"
# Create a list of probabilities for each model
probs = {
"Probability": float(avg_pred),
"Model Probabilities": {"Model 1": float(pred1), "Model 2": float(pred2), "Model 3": float(pred3), "Model 4": float(pred4), "visible": False},
"Sequence": {"value": seq, "visible": False}
}
return label, probs
# Example questions
examples = [
"How do you train a pigeon to send messages?",
"Is USA a shithole country owing to a shithole president?",
"Why is Indian educationa total bullshit?",
"Which person has given the least f**ks and still turned out successful?"
]
# Gradio Interface
interface = gr.Interface(
fn=classify_question,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your question here..."),
],
outputs=[
"text", # Output for label
"json" # Output for probabilities
],
title="Quora Insincere Questions Classifier",
examples=examples,
description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
)
interface.launch()