ByteBlaze commited on
Commit
8a6b573
1 Parent(s): adeed96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -1
app.py CHANGED
@@ -2,10 +2,12 @@ import gradio as gr
2
  import nltk
3
  from sklearn.feature_extraction.text import CountVectorizer
4
  from sklearn.feature_extraction.text import TfidfVectorizer
 
5
  import joblib
6
  from nltk.stem.porter import PorterStemmer
7
  from nltk.corpus import stopwords
8
  import re
 
9
 
10
  nltk.download('stopwords')
11
 
@@ -14,6 +16,7 @@ nltk.download('stopwords')
14
  # warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
15
 
16
  ps = PorterStemmer()
 
17
 
18
  def preprocess_for_bow_and_tfidf(text):
19
  corpus = []
@@ -24,11 +27,35 @@ def preprocess_for_bow_and_tfidf(text):
24
  corpus.append(' '.join(text))
25
 
26
  return corpus
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  vectorizer = joblib.load('./vectorizer.pkl')
29
  nb_classifier = joblib.load('./nb_classifier.pkl')
30
  tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
31
  random_forest = joblib.load('./random_forest.pkl')
 
 
32
 
33
  def classify(text,choice):
34
  corpus=[text]
@@ -36,16 +63,29 @@ def classify(text,choice):
36
  corpus = preprocess_for_bow_and_tfidf(text)
37
  features = vectorizer.transform(corpus).toarray()
38
  prediction = nb_classifier.predict(features)
 
39
  elif(choice == 2):
40
  corpus = preprocess_for_bow_and_tfidf(text)
41
  features = tfidf_vectorizer.transform(corpus).toarray()
42
  prediction = random_forest.predict(features)
 
 
 
 
 
 
 
 
 
43
  if(prediction == 1):
44
  return "Fake News"
45
  else:
46
  return "Not Fake News"
 
47
  GUI = gr.Interface(
48
- inputs = ['text', gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) ] , value = 1 , label = "Model") ],
 
 
49
  outputs = ['text'],
50
  fn = classify,
51
  title = "Fake News Detection System"
 
2
  import nltk
3
  from sklearn.feature_extraction.text import CountVectorizer
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.preprocessing import StandardScaler
6
  import joblib
7
  from nltk.stem.porter import PorterStemmer
8
  from nltk.corpus import stopwords
9
  import re
10
+ import numpy as np
11
 
12
  nltk.download('stopwords')
13
 
 
16
  # warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
17
 
18
  ps = PorterStemmer()
19
+ scaler = StandardScaler()
20
 
21
  def preprocess_for_bow_and_tfidf(text):
22
  corpus = []
 
27
  corpus.append(' '.join(text))
28
 
29
  return corpus
30
+
31
+ def preprocess_for_word2vec(text):
32
+ corpus = []
33
+ text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
34
+ text = text.lower()
35
+ text = text.split()
36
+ text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
37
+ corpus.append(text)
38
+
39
+ def average_word_vectors(words, model, vocabulary, num_features):
40
+ feature_vector = np.zeros((num_features,), dtype="float64")
41
+ nwords = 0.
42
+
43
+ for word in words:
44
+ if word in vocabulary:
45
+ nwords = nwords + 1.
46
+ feature_vector = np.add(feature_vector, model.wv[word])
47
+
48
+ if nwords:
49
+ feature_vector = np.divide(feature_vector, nwords)
50
+
51
+ return feature_vector
52
 
53
  vectorizer = joblib.load('./vectorizer.pkl')
54
  nb_classifier = joblib.load('./nb_classifier.pkl')
55
  tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
56
  random_forest = joblib.load('./random_forest.pkl')
57
+ word2vec_model = joblib.load('./word2vec_model.pkl')
58
+ svm_classifier = joblib.load('./svm_classifier.pkl')
59
 
60
  def classify(text,choice):
61
  corpus=[text]
 
63
  corpus = preprocess_for_bow_and_tfidf(text)
64
  features = vectorizer.transform(corpus).toarray()
65
  prediction = nb_classifier.predict(features)
66
+
67
  elif(choice == 2):
68
  corpus = preprocess_for_bow_and_tfidf(text)
69
  features = tfidf_vectorizer.transform(corpus).toarray()
70
  prediction = random_forest.predict(features)
71
+
72
+ elif(choice == 3):
73
+ corpus = preprocess_for_word2vec(text)
74
+ X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus]
75
+ X = np.array(X)
76
+ X = scaler.transform(X)
77
+ prediction = svm_classifier.predict(X)
78
+
79
+
80
  if(prediction == 1):
81
  return "Fake News"
82
  else:
83
  return "Not Fake News"
84
+
85
  GUI = gr.Interface(
86
+ inputs = [
87
+ gr.Textxox(label = "Input" , placeholder = "Enter News Title here"),
88
+ gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ],
89
  outputs = ['text'],
90
  fn = classify,
91
  title = "Fake News Detection System"