jeevitha-app commited on
Commit
d24d968
·
verified ·
1 Parent(s): 8ee79a3

Create model.pkl and vectorizer.pkl

Browse files
Files changed (1) hide show
  1. model.pkl and vectorizer.pkl +52 -0
model.pkl and vectorizer.pkl ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.linear_model import LogisticRegression
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem import PorterStemmer
7
+ import string
8
+ import nltk
9
+ import pickle
10
+
11
+ nltk.download('stopwords')
12
+
13
+ # Load data
14
+ true_df = pd.read_csv("True.csv")
15
+ fake_df = pd.read_csv("Fake.csv")
16
+
17
+ true_df["label"] = 1
18
+ fake_df["label"] = 0
19
+
20
+ df = pd.concat([true_df, fake_df], axis=0).reset_index(drop=True)
21
+
22
+ # Text Cleaning
23
+ stemmer = PorterStemmer()
24
+ stop_words = stopwords.words("english")
25
+
26
+ def clean_text(text):
27
+ text = text.lower()
28
+ text = "".join([c for c in text if c not in string.punctuation])
29
+ tokens = text.split()
30
+ tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
31
+ return " ".join(tokens)
32
+
33
+ df["cleaned_text"] = df["text"].apply(clean_text)
34
+
35
+ # TF-IDF and Model Training
36
+ X = df["cleaned_text"]
37
+ y = df["label"]
38
+
39
+ tfidf = TfidfVectorizer()
40
+ X_tfidf = tfidf.fit_transform(X)
41
+
42
+ X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)
43
+
44
+ model = LogisticRegression()
45
+ model.fit(X_train, y_train)
46
+
47
+ # Save model and vectorizer
48
+ with open("model.pkl", "wb") as f:
49
+ pickle.dump(model, f)
50
+
51
+ with open("vectorizer.pkl", "wb") as f:
52
+ pickle.dump(tfidf, f)