Spaces:

non2013
/

SincereQuestions

Sleeping

App Files Files Community

SincereQuestions / app.py

non2013

update interface

f227fd1 4 months ago

raw

history blame

3.12 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	import tensorflow as tf
	import pickle
	import spacy
	from tqdm import tqdm
	import gc
	import os

	# Download the SpaCy model
	os.system("python -m spacy download en_core_web_lg")

	# Load models
	model_1 = tf.keras.models.load_model("model_1.h5")
	model_2 = tf.keras.models.load_model("model_2.h5")
	model_3 = tf.keras.models.load_model("model_3.h5")
	model_4 = tf.keras.models.load_model("model_4.h5")

	# Load dictionaries
	with open('word_dict.pkl', 'rb') as f:
	word_dict = pickle.load(f)

	with open('lemma_dict.pkl', 'rb') as f:
	lemma_dict = pickle.load(f)

	# Load SpaCy NLP model
	nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
	nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)

	def preprocess_text(text):
	"""Preprocess the input text using SpaCy and return word indices."""
	docs = nlp.pipe([text], n_process=1)
	word_seq = []
	for doc in docs:
	for token in doc:
	if token.pos_ != "PUNCT":
	if token.text not in word_dict:
	word_dict[token.text] = len(word_dict) + 1 # Increment index.
	word_seq.append(word_dict[token.text])
	return word_seq

	def classify_question(text):
	# Preprocess the text
	seq = preprocess_text(text)
	padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed
	BATCH_SIZE = 512
	# Get predictions from each model
	pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
	pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
	pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
	pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))

	# Combine predictions
	avg_pred = pred1 + pred2 + pred3 + pred4
	label = "Insincere" if avg_pred > 0.35 else "Sincere"

	# Create a list of probabilities for each model
	probs = {
	"Probability": float(avg_pred),
	"Model Probabilities": {"Model 1": float(pred1), "Model 2": float(pred2), "Model 3": float(pred3), "Model 4": float(pred4), "visible": False},
	"Sequence": {"value": seq, "visible": False}
	}

	return label, probs

	# Example questions
	examples = [
	"How do you train a pigeon to send messages?",
	"Is USA a shithole country owing to a shithole president?",
	"Why is Indian educationa total bullshit?",
	"Which person has given the least f**ks and still turned out successful?"
	]

	# Gradio Interface
	interface = gr.Interface(
	fn=classify_question,
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter your question here..."),
	],
	outputs=[
	"text", # Output for label
	"json" # Output for probabilities
	],
	title="Quora Insincere Questions Classifier",
	examples=examples,
	description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
	)

	interface.launch()