Spaces:
Runtime error
Runtime error
import gradio as gr | |
import nltk | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.preprocessing import StandardScaler | |
import joblib | |
from nltk.stem.porter import PorterStemmer | |
from nltk.corpus import stopwords | |
import re | |
import numpy as np | |
nltk.download('stopwords') | |
# import warnings | |
# from sklearn.exceptions import InconsistentVersionWarning | |
# warnings.filterwarnings("ignore", category=InconsistentVersionWarning) | |
ps = PorterStemmer() | |
scaler = StandardScaler() | |
def preprocess_for_bow_and_tfidf(text): | |
corpus = [] | |
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text) | |
text = text.lower() | |
text = text.split() | |
text = [ps.stem(t) for t in text if t not in stopwords.words('english')] | |
corpus.append(' '.join(text)) | |
return corpus | |
def preprocess_for_word2vec(text): | |
corpus = [] | |
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text) | |
text = text.lower() | |
text = text.split() | |
text = [ps.stem(t) for t in text if t not in stopwords.words('english')] | |
corpus.append(text) | |
def average_word_vectors(words, model, vocabulary, num_features): | |
feature_vector = np.zeros((num_features,), dtype="float64") | |
nwords = 0. | |
for word in words: | |
if word in vocabulary: | |
nwords = nwords + 1. | |
feature_vector = np.add(feature_vector, model.wv[word]) | |
if nwords: | |
feature_vector = np.divide(feature_vector, nwords) | |
return feature_vector | |
vectorizer = joblib.load('./vectorizer.pkl') | |
nb_classifier = joblib.load('./nb_classifier.pkl') | |
tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl') | |
random_forest = joblib.load('./random_forest.pkl') | |
word2vec_model = joblib.load('./word2vec_model.pkl') | |
svm_classifier = joblib.load('./svm_classifier.pkl') | |
def classify(text,choice): | |
corpus=[text] | |
if(choice == 1): | |
corpus = preprocess_for_bow_and_tfidf(text) | |
features = vectorizer.transform(corpus).toarray() | |
prediction = nb_classifier.predict(features) | |
elif(choice == 2): | |
corpus = preprocess_for_bow_and_tfidf(text) | |
features = tfidf_vectorizer.transform(corpus).toarray() | |
prediction = random_forest.predict(features) | |
elif(choice == 3): | |
corpus = preprocess_for_word2vec(text) | |
X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus] | |
X = np.array(X) | |
X = scaler.transform(X) | |
prediction = svm_classifier.predict(X) | |
if(prediction == 1): | |
return "Fake News" | |
else: | |
return "Not Fake News" | |
GUI = gr.Interface( | |
inputs = [ | |
gr.Textxox(label = "Input" , placeholder = "Enter News Title here"), | |
gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ], | |
outputs = ['text'], | |
fn = classify, | |
title = "Fake News Detection System" | |
) | |
GUI.launch(debug = True) |