|
!pip3 install numpy |
|
!pip3 install pandas |
|
!pip3 install sklearn |
|
!pip3 install nltk |
|
|
|
|
|
|
|
import numpy as np |
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.stem.porter import PorterStemmer |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.metrics import accuracy_score |
|
|
|
|
|
|
|
nltk.download('stopwords') |
|
|
|
|
|
print(stopwords.words('english')) |
|
|
|
|
|
from google.colab import drive |
|
drive.mount('/content/drive') |
|
|
|
|
|
|
|
|
|
news_df = pd.read_csv('/content/drive/MyDrive/Mini project/train.csv') |
|
|
|
|
|
|
|
news_df.head() |
|
|
|
news_df.shape |
|
|
|
news_df.info() |
|
|
|
|
|
news_df.isna().sum() |
|
|
|
|
|
news_df = news_df.fillna('') |
|
news_df['article'] = news_df['title'] + news_df['author'] |
|
news_df |
|
|
|
|
|
|
|
|
|
news_df.drop(columns=['id'], inplace=True) |
|
|
|
|
|
|
|
news_df |
|
|
|
|
|
|
|
news_df["author"].value_counts() |
|
|
|
|
|
|
|
X = news_df.drop(columns='label', axis=1) |
|
Y = news_df['label'] |
|
|
|
X |
|
|
|
Y |
|
|
|
|
|
|
|
p_stemming = PorterStemmer() |
|
|
|
|
|
|
|
def stemming(content): |
|
stemmed_word = re.sub('[^a-zA-Z]',' ',content) |
|
stemmed_word = stemmed_word.lower() |
|
stemmed_word = stemmed_word.split() |
|
stemmed_word = [p_stemming.stem(word) for word in stemmed_word if not word in stopwords.words('english')] |
|
stemmed_word = ' '.join(stemmed_word) |
|
return stemmed_word |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
news_df['article'] = news_df['article'].apply(stemming) |
|
|
|
|
|
|
|
|
|
|
|
|
|
news_df['article'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
X = news_df['article'].values |
|
X |
|
|
|
|
|
|
|
Y = news_df['label'].values |
|
Y |
|
|
|
|
|
|
|
|
|
|
|
X |
|
|
|
|
|
|
|
vectorizer = TfidfVectorizer() |
|
vectorizer.fit(X) |
|
X = vectorizer.transform(X) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 1) |
|
|
|
|
|
|
|
|
|
ml_model = LogisticRegression() |
|
|
|
|
|
|
|
ml_model.fit(X_train, Y_train) |
|
|
|
|
|
|
|
|
|
X_train_predict = ml_model.predict(X_train) |
|
train_data_accuracy = accuracy_score(X_train_predict, Y_train) |
|
percent_tr_accuracy = train_data_accuracy * 100 |
|
print("Accuracy for Train data: ", percent_tr_accuracy) |
|
|
|
|
|
|
|
|
|
|
|
X_test_predict = ml_model.predict(X_test) |
|
test_data_accuracy = accuracy_score(X_test_predict, Y_test) |
|
percent_test_accuracy = test_data_accuracy * 100 |
|
print("Accuracy for Test data: ", percent_test_accuracy) |
|
|
|
|
|
|
|
|
|
|
|
def Detection(index): |
|
index = int (index) |
|
X_new = X_test[index] |
|
new_predict = ml_model.predict(X_new) |
|
real_news= "The News is real" if(new_predict[0]==0) else "The News is fake" |
|
return(real_news) |
|
Detection(index) |
|
|
|
|
|
|
|
|
|
|
|
pip install gradio |
|
import gradio as gr |
|
demo = gr.Interface(fn=Detection, inputs='number', outputs="text") |
|
demo.launch(share=True) |