AyahElBarq commited on
Commit
fd645e6
1 Parent(s): 7e8c780
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.preprocessing import LabelBinarizer
7
+ from sklearn.linear_model import LogisticRegression
8
+ from sklearn.metrics import accuracy_score, f1_score
9
+
10
+ # Define your functions and logic here
11
+ def load_and_prepare_data():
12
+ try:
13
+ file_path = 'WELFake_Dataset.csv' # Ensure this is the correct path
14
+ dataset = pd.read_csv(file_path)
15
+ print(f"Dataset loaded with {dataset.shape[0]} records")
16
+ dataset = dataset.drop(columns=['Unnamed: 0'])
17
+ dataset = dataset.dropna(subset=['title', 'text'])
18
+ dataset['clean_text'] = dataset['text'].apply(clean_text)
19
+ print(f"Dataset cleaned. Records after cleaning: {dataset.shape[0]}")
20
+ return dataset
21
+ except Exception as e:
22
+ return f"Error loading and preparing data: {e}"
23
+
24
+ def clean_text(text):
25
+ try:
26
+ text = re.sub(r'\W', ' ', text)
27
+ text = re.sub(r'\s+', ' ', text)
28
+ text = re.sub(r'\d', '', text)
29
+ text = text.lower().strip()
30
+ return text
31
+ except Exception as e:
32
+ return f"Error cleaning text: {e}"
33
+
34
+ def train_model(dataset):
35
+ try:
36
+ X_train, X_test, y_train, y_test = train_test_split(dataset['clean_text'], dataset['label'], test_size=0.2, random_state=42)
37
+ print(f"Training data size: {X_train.shape[0]}, Test data size: {X_test.shape[0]}")
38
+
39
+ vectorizer = TfidfVectorizer(max_features=10000)
40
+ X_train_tfidf = vectorizer.fit_transform(X_train)
41
+ X_test_tfidf = vectorizer.transform(X_test)
42
+
43
+ lb = LabelBinarizer()
44
+ y_train_binary = lb.fit_transform(y_train)
45
+ y_test_binary = lb.transform(y_test)
46
+
47
+ log_reg_model = LogisticRegression(max_iter=1000)
48
+ log_reg_model.fit(X_train_tfidf, y_train)
49
+
50
+ y_pred_log_reg_train = log_reg_model.predict(X_train_tfidf)
51
+ train_accuracy_log_reg = accuracy_score(y_train, y_pred_log_reg_train)
52
+ train_f1_log_reg = f1_score(y_train, y_pred_log_reg_train)
53
+
54
+ y_pred_log_reg = log_reg_model.predict(X_test_tfidf)
55
+ accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)
56
+ f1_log_reg = f1_score(y_test, y_pred_log_reg)
57
+
58
+ print(f"Train Accuracy: {train_accuracy_log_reg}, Train F1 Score: {train_f1_log_reg}")
59
+ print(f"Test Accuracy: {accuracy_log_reg}, Test F1 Score: {f1_log_reg}")
60
+
61
+ return vectorizer, lb, log_reg_model, accuracy_log_reg, f1_log_reg
62
+ except Exception as e:
63
+ return f"Error training model: {e}"
64
+
65
+ def fake_news_detection(text):
66
+ try:
67
+ dataset = load_and_prepare_data()
68
+ if isinstance(dataset, str): # Check if there was an error in loading data
69
+ return dataset
70
+ vectorizer, lb, log_reg_model, accuracy_log_reg, f1_log_reg = train_model(dataset)
71
+ if isinstance(vectorizer, str): # Check if there was an error in training models
72
+ return vectorizer
73
+
74
+ clean_text_input = clean_text(text)
75
+ text_tfidf = vectorizer.transform([clean_text_input])
76
+ prediction = log_reg_model.predict_proba(text_tfidf)
77
+ result = "Real" if prediction[0][1] >= 0.5 else "Fake"
78
+
79
+ return f"Prediction: {result}"
80
+ except Exception as e:
81
+ return f"Error in fake news detection: {e}"
82
+
83
+ iface = gr.Interface(
84
+ fn=fake_news_detection,
85
+ inputs=gr.Textbox(lines=2, placeholder="Enter news text here..."),
86
+ outputs="text",
87
+ title="Fake News Detector",
88
+ description="Enter a news headline or article text to check if it is fake or real."
89
+ )
90
+
91
+ if __name__ == "__main__":
92
+ iface.launch()