import gradio as gr # Library for building User Interfaces import nltk from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.preprocessing import StandardScaler import joblib from nltk.stem.porter import PorterStemmer from nltk.corpus import stopwords import re import numpy as np from gensim.models import Word2Vec nltk.download('stopwords') # import warnings # from sklearn.exceptions import InconsistentVersionWarning # warnings.filterwarnings("ignore", category=InconsistentVersionWarning) ps = PorterStemmer() vectorizer = joblib.load('./vectorizer.pkl') nb_classifier = joblib.load('./nb_classifier.pkl') tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl') random_forest = joblib.load('./random_forest.pkl') word2vec_model = Word2Vec.load("./word2vec_model.bin") svm_classifier = joblib.load('./svm_classifier.pkl') scaler = joblib.load('./scaler.pkl') def preprocess_for_bow_and_tfidf(text): corpus = [] text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text) text = text.lower() text = text.split() text = [ps.stem(t) for t in text if t not in stopwords.words('english')] corpus.append(' '.join(text)) return corpus def preprocess_for_word2vec(text): corpus = [] text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text) text = text.lower() text = text.split() text = [ps.stem(t) for t in text if t not in stopwords.words('english')] corpus.append(text) return corpus def average_word_vectors(words, model, vocabulary, num_features): feature_vector = np.zeros((num_features,), dtype="float64") nwords = 0. for word in words: if word in vocabulary: nwords = nwords + 1. feature_vector = np.add(feature_vector, model.wv[word]) if nwords: feature_vector = np.divide(feature_vector, nwords) return feature_vector def classify(text,choice): corpus=[text] if(choice == 1): corpus = preprocess_for_bow_and_tfidf(text) features = vectorizer.transform(corpus).toarray() prediction = nb_classifier.predict(features) elif(choice == 2): corpus = preprocess_for_bow_and_tfidf(text) features = tfidf_vectorizer.transform(corpus).toarray() prediction = random_forest.predict(features) elif(choice == 3): corpus = preprocess_for_word2vec(text) X = [average_word_vectors(title, word2vec_model, word2vec_model.wv.key_to_index, 100) for title in corpus] X = np.array(X) X = scaler.transform(X) prediction = svm_classifier.predict(X) if(prediction == 1): return "Fake News" else: return "Not Fake News" GUI = gr.Interface( inputs = [ gr.Textbox(label = "Input" , placeholder = "Enter News Title here"), gr.Radio( choices = [("Naive Bayes",1) , ("Random Forest",2) , ("Support Vector Machine",3) ] , value = 1 , label = "Model") ], outputs = ['text'], fn = classify, title = "Fake News Detection System" ) GUI.launch(debug = True)