Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,10 +2,12 @@ import gradio as gr
|
|
2 |
import nltk
|
3 |
from sklearn.feature_extraction.text import CountVectorizer
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
5 |
import joblib
|
6 |
from nltk.stem.porter import PorterStemmer
|
7 |
from nltk.corpus import stopwords
|
8 |
import re
|
|
|
9 |
|
10 |
nltk.download('stopwords')
|
11 |
|
@@ -14,6 +16,7 @@ nltk.download('stopwords')
|
|
14 |
# warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
|
15 |
|
16 |
ps = PorterStemmer()
|
|
|
17 |
|
18 |
def preprocess_for_bow_and_tfidf(text):
|
19 |
corpus = []
|
@@ -24,11 +27,35 @@ def preprocess_for_bow_and_tfidf(text):
|
|
24 |
corpus.append(' '.join(text))
|
25 |
|
26 |
return corpus
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
vectorizer = joblib.load('./vectorizer.pkl')
|
29 |
nb_classifier = joblib.load('./nb_classifier.pkl')
|
30 |
tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
|
31 |
random_forest = joblib.load('./random_forest.pkl')
|
|
|
|
|
32 |
|
33 |
def classify(text,choice):
|
34 |
corpus=[text]
|
@@ -36,16 +63,29 @@ def classify(text,choice):
|
|
36 |
corpus = preprocess_for_bow_and_tfidf(text)
|
37 |
features = vectorizer.transform(corpus).toarray()
|
38 |
prediction = nb_classifier.predict(features)
|
|
|
39 |
elif(choice == 2):
|
40 |
corpus = preprocess_for_bow_and_tfidf(text)
|
41 |
features = tfidf_vectorizer.transform(corpus).toarray()
|
42 |
prediction = random_forest.predict(features)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
if(prediction == 1):
|
44 |
return "Fake News"
|
45 |
else:
|
46 |
return "Not Fake News"
|
|
|
47 |
GUI = gr.Interface(
|
48 |
-
inputs = [
|
|
|
|
|
49 |
outputs = ['text'],
|
50 |
fn = classify,
|
51 |
title = "Fake News Detection System"
|
|
|
2 |
import nltk
|
3 |
from sklearn.feature_extraction.text import CountVectorizer
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.preprocessing import StandardScaler
|
6 |
import joblib
|
7 |
from nltk.stem.porter import PorterStemmer
|
8 |
from nltk.corpus import stopwords
|
9 |
import re
|
10 |
+
import numpy as np
|
11 |
|
12 |
nltk.download('stopwords')
|
13 |
|
|
|
16 |
# warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
|
17 |
|
18 |
ps = PorterStemmer()
|
19 |
+
scaler = StandardScaler()
|
20 |
|
21 |
def preprocess_for_bow_and_tfidf(text):
|
22 |
corpus = []
|
|
|
27 |
corpus.append(' '.join(text))
|
28 |
|
29 |
return corpus
|
30 |
+
|
31 |
+
def preprocess_for_word2vec(text):
|
32 |
+
corpus = []
|
33 |
+
text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
|
34 |
+
text = text.lower()
|
35 |
+
text = text.split()
|
36 |
+
text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
|
37 |
+
corpus.append(text)
|
38 |
+
|
39 |
+
def average_word_vectors(words, model, vocabulary, num_features):
|
40 |
+
feature_vector = np.zeros((num_features,), dtype="float64")
|
41 |
+
nwords = 0.
|
42 |
+
|
43 |
+
for word in words:
|
44 |
+
if word in vocabulary:
|
45 |
+
nwords = nwords + 1.
|
46 |
+
feature_vector = np.add(feature_vector, model.wv[word])
|
47 |
+
|
48 |
+
if nwords:
|
49 |
+
feature_vector = np.divide(feature_vector, nwords)
|
50 |
+
|
51 |
+
return feature_vector
|
52 |
|
53 |
vectorizer = joblib.load('./vectorizer.pkl')
|
54 |
nb_classifier = joblib.load('./nb_classifier.pkl')
|
55 |
tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
|
56 |
random_forest = joblib.load('./random_forest.pkl')
|
57 |
+
word2vec_model = joblib.load('./word2vec_model.pkl')
|
58 |
+
svm_classifier = joblib.load('./svm_classifier.pkl')
|
59 |
|
60 |
def classify(text,choice):
|
61 |
corpus=[text]
|
|
|
63 |
corpus = preprocess_for_bow_and_tfidf(text)
|
64 |
features = vectorizer.transform(corpus).toarray()
|
65 |
prediction = nb_classifier.predict(features)
|
66 |
+
|
67 |
elif(choice == 2):
|
68 |
corpus = preprocess_for_bow_and_tfidf(text)
|
69 |
features = tfidf_vectorizer.transform(corpus).toarray()
|
70 |
prediction = random_forest.predict(features)
|
71 |
+
|
72 |
+
elif(choice == 3):
|
73 |
+
corpus = preprocess_for_word2vec(text)
|
74 |
+
X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus]
|
75 |
+
X = np.array(X)
|
76 |
+
X = scaler.transform(X)
|
77 |
+
prediction = svm_classifier.predict(X)
|
78 |
+
|
79 |
+
|
80 |
if(prediction == 1):
|
81 |
return "Fake News"
|
82 |
else:
|
83 |
return "Not Fake News"
|
84 |
+
|
85 |
GUI = gr.Interface(
|
86 |
+
inputs = [
|
87 |
+
gr.Textxox(label = "Input" , placeholder = "Enter News Title here"),
|
88 |
+
gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ],
|
89 |
outputs = ['text'],
|
90 |
fn = classify,
|
91 |
title = "Fake News Detection System"
|