Spaces:
Build error
Build error
#!pip3 install numpy | |
#!pip3 install pandas | |
#!pip3 install sklearn | |
#!pip3 install nltk | |
import numpy as np | |
import pandas as pd | |
import pandas as pd | |
import numpy as np | |
import re | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem.porter import PorterStemmer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import accuracy_score | |
nltk.download('stopwords') | |
print(stopwords.words('english')) | |
#from google.colab import drive | |
#drive.mount('/content/drive') | |
news_df = pd.read_csv('train.csv') | |
news_df.head() | |
news_df.shape | |
news_df.info() | |
news_df.isna().sum() | |
news_df = news_df.fillna('') | |
news_df['article'] = news_df['title'] + news_df['author'] | |
news_df | |
news_df.drop(columns=['id'], inplace=True) | |
news_df | |
news_df["author"].value_counts() | |
X = news_df.drop(columns='label', axis=1) | |
Y = news_df['label'] | |
X | |
Y | |
p_stemming = PorterStemmer() | |
def stemming(content): | |
stemmed_word = re.sub('[^a-zA-Z]',' ',content) | |
stemmed_word = stemmed_word.lower() | |
stemmed_word = stemmed_word.split() | |
stemmed_word = [p_stemming.stem(word) for word in stemmed_word if not word in stopwords.words('english')] | |
stemmed_word = ' '.join(stemmed_word) | |
return stemmed_word | |
news_df['article'] = news_df['article'].apply(stemming) | |
news_df['article'] | |
X = news_df['article'].values | |
X | |
Y = news_df['label'].values | |
Y | |
X | |
vectorizer = TfidfVectorizer() | |
vectorizer.fit(X) | |
X = vectorizer.transform(X) | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 1) | |
ml_model = LogisticRegression() | |
ml_model.fit(X_train, Y_train) | |
X_train_predict = ml_model.predict(X_train) | |
train_data_accuracy = accuracy_score(X_train_predict, Y_train) | |
percent_tr_accuracy = train_data_accuracy * 100 | |
print("Accuracy for Train data: ", percent_tr_accuracy) | |
X_test_predict = ml_model.predict(X_test) | |
test_data_accuracy = accuracy_score(X_test_predict, Y_test) | |
percent_test_accuracy = test_data_accuracy * 100 | |
print("Accuracy for Test data: ", percent_test_accuracy) | |
def Detection(index): | |
index = int (index) | |
X_new = X_test[index] | |
new_predict = ml_model.predict(X_new) | |
real_news= "The News is real" if(new_predict[0]==0) else "The News is fake" | |
return(real_news) | |
Detection(index) | |
#pip install gradio | |
import gradio as gr | |
iface = gr.Interface(fn=Detection, inputs=gr.Textbox(lines=2,placeholder="Enter News ID here:"), outputs="label" , title = 'Hoaxie') | |
iface.launch(auth = ('user' , 'sakec'), auth_message="Enter Username and password shared with you") |