jeevitha-app commited on
Commit
fec2d2b
·
verified ·
1 Parent(s): d24d968

Delete model.pkl and vectorizer.pkl

Browse files
Files changed (1) hide show
  1. model.pkl and vectorizer.pkl +0 -52
model.pkl and vectorizer.pkl DELETED
@@ -1,52 +0,0 @@
1
- import pandas as pd
2
- from sklearn.model_selection import train_test_split
3
- from sklearn.feature_extraction.text import TfidfVectorizer
4
- from sklearn.linear_model import LogisticRegression
5
- from nltk.corpus import stopwords
6
- from nltk.stem import PorterStemmer
7
- import string
8
- import nltk
9
- import pickle
10
-
11
- nltk.download('stopwords')
12
-
13
- # Load data
14
- true_df = pd.read_csv("True.csv")
15
- fake_df = pd.read_csv("Fake.csv")
16
-
17
- true_df["label"] = 1
18
- fake_df["label"] = 0
19
-
20
- df = pd.concat([true_df, fake_df], axis=0).reset_index(drop=True)
21
-
22
- # Text Cleaning
23
- stemmer = PorterStemmer()
24
- stop_words = stopwords.words("english")
25
-
26
- def clean_text(text):
27
- text = text.lower()
28
- text = "".join([c for c in text if c not in string.punctuation])
29
- tokens = text.split()
30
- tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
31
- return " ".join(tokens)
32
-
33
- df["cleaned_text"] = df["text"].apply(clean_text)
34
-
35
- # TF-IDF and Model Training
36
- X = df["cleaned_text"]
37
- y = df["label"]
38
-
39
- tfidf = TfidfVectorizer()
40
- X_tfidf = tfidf.fit_transform(X)
41
-
42
- X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)
43
-
44
- model = LogisticRegression()
45
- model.fit(X_train, y_train)
46
-
47
- # Save model and vectorizer
48
- with open("model.pkl", "wb") as f:
49
- pickle.dump(model, f)
50
-
51
- with open("vectorizer.pkl", "wb") as f:
52
- pickle.dump(tfidf, f)