Spaces:
Runtime error
Runtime error
File size: 3,015 Bytes
69e42b8 5344de8 25842ad 00dd567 8a6b573 619fb86 25842ad 2e8a190 25842ad 8a6b573 851c9dd 5c96396 426fb1f 851c9dd 25842ad 8a6b573 619fb86 db0f41a db17b18 25842ad 8a6b573 25842ad db0f41a 00dd567 619fb86 00dd567 54db931 554b7cc 00dd567 8a6b573 00dd567 adeed96 554b7cc 00dd567 8a6b573 619fb86 8a6b573 74b882c 8a6b573 74b882c 0b03db9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr # Library for building User Interfaces
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
import joblib
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import re
import numpy as np
nltk.download('stopwords')
# import warnings
# from sklearn.exceptions import InconsistentVersionWarning
# warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
ps = PorterStemmer()
scaler = StandardScaler()
vectorizer = joblib.load('./vectorizer.pkl')
nb_classifier = joblib.load('./nb_classifier.pkl')
tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
random_forest = joblib.load('./random_forest.pkl')
word2vec_model = joblib.load('./word2vec_model.pkl')
svm_classifier = joblib.load('./svm_classifier.pkl')
def preprocess_for_bow_and_tfidf(text):
corpus = []
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
text = text.lower()
text = text.split()
text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
corpus.append(' '.join(text))
return corpus
def preprocess_for_word2vec(text):
corpus = []
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
text = text.lower()
text = text.split()
text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
corpus.append(text)
def average_word_vectors(words, model, vocabulary, num_features):
feature_vector = np.zeros((num_features,), dtype="float64")
nwords = 0.
for word in words:
if word in vocabulary:
nwords = nwords + 1.
feature_vector = np.add(feature_vector, model.wv[word])
if nwords:
feature_vector = np.divide(feature_vector, nwords)
return feature_vector
def classify(text,choice):
corpus=[text]
if(choice == 1):
corpus = preprocess_for_bow_and_tfidf(text)
features = vectorizer.transform(corpus).toarray()
prediction = nb_classifier.predict(features)
elif(choice == 2):
corpus = preprocess_for_bow_and_tfidf(text)
features = tfidf_vectorizer.transform(corpus).toarray()
prediction = random_forest.predict(features)
elif(choice == 3):
corpus = preprocess_for_word2vec(text)
X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus]
X = np.array(X)
X = scaler.transform(X)
prediction = svm_classifier.predict(X)
if(prediction == 1):
return "Fake News"
else:
return "Not Fake News"
GUI = gr.Interface(
inputs = [
gr.Textxox(label = "Input" , placeholder = "Enter News Title here"),
gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ],
outputs = ['text'],
fn = classify,
title = "Fake News Detection System"
)
GUI.launch(debug = True) |