ByteBlaze's picture
Update app.py
5426b24 verified
import gradio as gr # Library for building User Interfaces
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
import joblib
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import re
import numpy as np
from gensim.models import Word2Vec
nltk.download('stopwords')
# import warnings
# from sklearn.exceptions import InconsistentVersionWarning
# warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
ps = PorterStemmer()
vectorizer = joblib.load('./vectorizer.pkl')
nb_classifier = joblib.load('./nb_classifier.pkl')
tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
random_forest = joblib.load('./random_forest.pkl')
word2vec_model = Word2Vec.load("./word2vec_model.bin")
svm_classifier = joblib.load('./svm_classifier.pkl')
scaler = joblib.load('./scaler.pkl')
def preprocess_for_bow_and_tfidf(text):
corpus = []
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
text = text.lower()
text = text.split()
text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
corpus.append(' '.join(text))
return corpus
def preprocess_for_word2vec(text):
corpus = []
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
text = text.lower()
text = text.split()
text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
corpus.append(text)
return corpus
def average_word_vectors(words, model, vocabulary, num_features):
feature_vector = np.zeros((num_features,), dtype="float64")
nwords = 0.
for word in words:
if word in vocabulary:
nwords = nwords + 1.
feature_vector = np.add(feature_vector, model.wv[word])
if nwords:
feature_vector = np.divide(feature_vector, nwords)
return feature_vector
def classify(text,choice):
corpus=[text]
if(choice == 1):
corpus = preprocess_for_bow_and_tfidf(text)
features = vectorizer.transform(corpus).toarray()
prediction = nb_classifier.predict(features)
elif(choice == 2):
corpus = preprocess_for_bow_and_tfidf(text)
features = tfidf_vectorizer.transform(corpus).toarray()
prediction = random_forest.predict(features)
elif(choice == 3):
corpus = preprocess_for_word2vec(text)
X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus]
X = np.array(X)
X = scaler.transform(X)
prediction = svm_classifier.predict(X)
if(prediction == 1):
return "Fake News"
else:
return "Not Fake News"
GUI = gr.Interface(
inputs = [
gr.Textbox(label = "Input" , placeholder = "Enter News Title here"),
gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ],
outputs = ['text'],
fn = classify,
title = "Fake News Detection System"
)
GUI.launch(debug = True)