File size: 3,082 Bytes
69e42b8
5344de8
25842ad
00dd567
8a6b573
619fb86
25842ad
2e8a190
25842ad
8a6b573
ee423d7
851c9dd
5c96396
 
426fb1f
 
 
851c9dd
25842ad
db0f41a
 
 
 
ee423d7
db0f41a
5426b24
 
db0f41a
db17b18
25842ad
 
 
 
 
 
 
 
8a6b573
 
 
 
 
 
 
 
 
fa5d785
 
8a6b573
 
 
 
 
 
 
 
 
 
 
 
 
25842ad
db0f41a
00dd567
 
619fb86
00dd567
54db931
554b7cc
00dd567
8a6b573
00dd567
adeed96
554b7cc
00dd567
8a6b573
 
 
 
 
 
 
 
 
619fb86
 
 
 
8a6b573
74b882c
8a6b573
e9e9010
8a6b573
74b882c
 
 
 
0b03db9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr # Library for building User Interfaces
import nltk
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
import joblib
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import re
import numpy as np
from gensim.models import Word2Vec

nltk.download('stopwords')

# import warnings
# from sklearn.exceptions import InconsistentVersionWarning
# warnings.filterwarnings("ignore", category=InconsistentVersionWarning)

ps = PorterStemmer()
vectorizer = joblib.load('./vectorizer.pkl')
nb_classifier = joblib.load('./nb_classifier.pkl')
tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
random_forest = joblib.load('./random_forest.pkl')
word2vec_model = Word2Vec.load("./word2vec_model.bin")
svm_classifier = joblib.load('./svm_classifier.pkl')
scaler = joblib.load('./scaler.pkl')


def preprocess_for_bow_and_tfidf(text):
    corpus = []
    text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
    text = text.lower()
    text = text.split()
    text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
    corpus.append(' '.join(text))

    return corpus

def preprocess_for_word2vec(text):
    corpus = []
    text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
    text = text.lower()
    text = text.split()
    text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
    corpus.append(text)

    return corpus

def average_word_vectors(words, model, vocabulary, num_features):
    feature_vector = np.zeros((num_features,), dtype="float64")
    nwords = 0.
    
    for word in words:
        if word in vocabulary:
            nwords = nwords + 1.
            feature_vector = np.add(feature_vector, model.wv[word])
    
    if nwords:
        feature_vector = np.divide(feature_vector, nwords)
        
    return feature_vector
    


def classify(text,choice):
  corpus=[text]
  if(choice == 1):
      corpus = preprocess_for_bow_and_tfidf(text)
      features = vectorizer.transform(corpus).toarray()
      prediction = nb_classifier.predict(features)
      
  elif(choice == 2):
      corpus = preprocess_for_bow_and_tfidf(text)
      features = tfidf_vectorizer.transform(corpus).toarray()
      prediction = random_forest.predict(features)
      
  elif(choice == 3):
      corpus = preprocess_for_word2vec(text)
      X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus]
      X = np.array(X)
      X = scaler.transform(X)
      prediction = svm_classifier.predict(X)
      
      
  if(prediction == 1):
     return "Fake News"
  else:
     return "Not Fake News"
      
GUI = gr.Interface(
    inputs = [
    gr.Textbox(label = "Input" , placeholder = "Enter News Title here"),
    gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ],
    outputs = ['text'],
    fn = classify,
    title = "Fake News Detection System"
)
GUI.launch(debug = True)