akshatsanghvi commited on
Commit
42855a6
1 Parent(s): 21f200d
Files changed (3) hide show
  1. app.py +54 -9
  2. data/phishing.csv +0 -0
  3. notebook.ipynb +445 -34
app.py CHANGED
@@ -1,14 +1,59 @@
1
  import gradio as gr
2
- from joblib import load
 
 
 
3
 
4
- model = load("Classifier.joblib")
 
5
 
6
- def pred(Email):
7
- l = model.predict([Email])
8
- if l[0]==1:
9
- return "Spam ⚠️"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  else:
11
- return "👍"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- iface = gr.Interface(fn=pred, inputs="text", outputs="text", allow_flagging="never", description="Enter Your Message Below :")
14
- iface.launch()
 
1
  import gradio as gr
2
+ import pickle
3
+ import pandas as pd
4
+ from urlextract import URLExtract
5
+ import URLFeatureExtraction
6
 
7
+ with open("models/spam-clf.pkl", "rb") as f:
8
+ model = pickle.load(f)
9
 
10
+ with open("models/xgb.dat", "rb") as f:
11
+ model2 = pickle.load(f)
12
+
13
+ df = pd.DataFrame(columns=['URL', 'Phishy?'])
14
+
15
+ def isPhishing(link):
16
+ global df
17
+
18
+ features = URLFeatureExtraction.featureExtraction(link)
19
+ print(features)
20
+
21
+ prediction = model2.predict([features])
22
+ print(prediction)
23
+
24
+ df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
25
+ return prediction[0]
26
+
27
+ def isSpam(Email):
28
+ out = model.predict([Email])
29
+ return "Spam" if out[0] else "Not Spam"
30
+
31
+ def check_URL(Email):
32
+ extractor = URLExtract()
33
+ urls = extractor.find_urls(Email)
34
+ n_urls = len(urls)
35
+ if urls:
36
+ bad_urls = sum([isPhishing(url) for url in urls])
37
  else:
38
+ bad_urls = 0
39
+ print("Out of {} urls {} are phishing".format(n_urls, bad_urls))
40
+
41
+ return bad_urls
42
+
43
+ def check_Mail(Email):
44
+ state = max(URLFeatureExtraction.state, 0)
45
+ return [isSpam(Email), check_URL(Email), state, df]
46
+
47
+ iface = gr.Interface(
48
+ fn=check_Mail,
49
+ inputs=gr.Textbox(lines=6, placeholder="Enter or paste email here", label="Email"),
50
+ outputs=[
51
+ gr.Textbox(label="Spam or Not"),
52
+ gr.Textbox(label="Phishing Links Detected"),
53
+ gr.Textbox(label="Consider this Mail as"),
54
+ gr.Dataframe(label="Insights", interactive=False)
55
+ ]
56
+ )
57
 
58
+ # Launch the Gradio app
59
+ iface.launch()
data/phishing.csv DELETED
The diff for this file is too large to render. See raw diff
 
notebook.ipynb CHANGED
@@ -2,18 +2,18 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
- "# Importing Essential libraries : \n",
10
  "import pandas as pd\n",
11
- "# import matplotlib.pyplot as plt"
12
  ]
13
  },
14
  {
15
  "cell_type": "code",
16
- "execution_count": 2,
17
  "metadata": {},
18
  "outputs": [
19
  {
@@ -80,20 +80,20 @@
80
  "4 ham Nah I don't think he goes to usf, he lives aro..."
81
  ]
82
  },
83
- "execution_count": 2,
84
  "metadata": {},
85
  "output_type": "execute_result"
86
  }
87
  ],
88
  "source": [
89
  "# importing data to work on :\n",
90
- "dataset = pd.read_csv(\"spam.csv\")\n",
91
  "dataset.head()"
92
  ]
93
  },
94
  {
95
  "cell_type": "code",
96
- "execution_count": 3,
97
  "metadata": {},
98
  "outputs": [
99
  {
@@ -102,7 +102,7 @@
102
  "(5572, 2)"
103
  ]
104
  },
105
- "execution_count": 3,
106
  "metadata": {},
107
  "output_type": "execute_result"
108
  }
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "cell_type": "code",
116
- "execution_count": 4,
117
  "metadata": {},
118
  "outputs": [
119
  {
@@ -122,7 +122,7 @@
122
  "Index(['Category', 'Message'], dtype='object')"
123
  ]
124
  },
125
- "execution_count": 4,
126
  "metadata": {},
127
  "output_type": "execute_result"
128
  }
@@ -133,7 +133,7 @@
133
  },
134
  {
135
  "cell_type": "code",
136
- "execution_count": 5,
137
  "metadata": {},
138
  "outputs": [
139
  {
@@ -160,7 +160,7 @@
160
  "dtype: int64"
161
  ]
162
  },
163
- "execution_count": 5,
164
  "metadata": {},
165
  "output_type": "execute_result"
166
  }
@@ -173,7 +173,7 @@
173
  },
174
  {
175
  "cell_type": "code",
176
- "execution_count": 6,
177
  "metadata": {},
178
  "outputs": [
179
  {
@@ -184,7 +184,7 @@
184
  "dtype: int64"
185
  ]
186
  },
187
- "execution_count": 6,
188
  "metadata": {},
189
  "output_type": "execute_result"
190
  }
@@ -196,7 +196,7 @@
196
  },
197
  {
198
  "cell_type": "code",
199
- "execution_count": 7,
200
  "metadata": {},
201
  "outputs": [
202
  {
@@ -269,7 +269,7 @@
269
  "4 ham Nah I don't think he goes to usf, he lives aro... 0"
270
  ]
271
  },
272
- "execution_count": 7,
273
  "metadata": {},
274
  "output_type": "execute_result"
275
  }
@@ -282,7 +282,7 @@
282
  },
283
  {
284
  "cell_type": "code",
285
- "execution_count": 8,
286
  "metadata": {},
287
  "outputs": [],
288
  "source": [
@@ -300,7 +300,7 @@
300
  },
301
  {
302
  "cell_type": "code",
303
- "execution_count": 9,
304
  "metadata": {},
305
  "outputs": [],
306
  "source": [
@@ -310,7 +310,7 @@
310
  },
311
  {
312
  "cell_type": "code",
313
- "execution_count": 10,
314
  "metadata": {},
315
  "outputs": [
316
  {
@@ -319,7 +319,7 @@
319
  "((4457,), (1115,), (4457,), (1115,))"
320
  ]
321
  },
322
- "execution_count": 10,
323
  "metadata": {},
324
  "output_type": "execute_result"
325
  }
@@ -330,7 +330,7 @@
330
  },
331
  {
332
  "cell_type": "code",
333
- "execution_count": 11,
334
  "metadata": {},
335
  "outputs": [],
336
  "source": [
@@ -340,19 +340,19 @@
340
  },
341
  {
342
  "cell_type": "code",
343
- "execution_count": 12,
344
  "metadata": {},
345
  "outputs": [],
346
  "source": [
347
  "# Importing Different classifiers to compare :\n",
348
  "# from sklearn.linear_model import LogisticRegression\n",
349
  "# from sklearn.ensemble import RandomForestClassifier\n",
350
- "from sklearn.naive_bayes import MultinomialNB # ✔️✔️ Works well with this type of problems, when data is discrete."
351
  ]
352
  },
353
  {
354
  "cell_type": "code",
355
- "execution_count": 13,
356
  "metadata": {},
357
  "outputs": [],
358
  "source": [
@@ -367,19 +367,423 @@
367
  },
368
  {
369
  "cell_type": "code",
370
- "execution_count": 14,
371
  "metadata": {},
372
  "outputs": [
373
  {
374
  "data": {
375
  "text/html": [
376
- "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;vectorizer&#x27;, CountVectorizer()), (&#x27;nb&#x27;, MultinomialNB())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;vectorizer&#x27;, CountVectorizer()), (&#x27;nb&#x27;, MultinomialNB())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MultinomialNB</label><div class=\"sk-toggleable__content\"><pre>MultinomialNB()</pre></div></div></div></div></div></div></div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  ],
378
  "text/plain": [
379
  "Pipeline(steps=[('vectorizer', CountVectorizer()), ('nb', MultinomialNB())])"
380
  ]
381
  },
382
- "execution_count": 14,
383
  "metadata": {},
384
  "output_type": "execute_result"
385
  }
@@ -392,7 +796,7 @@
392
  },
393
  {
394
  "cell_type": "code",
395
- "execution_count": 15,
396
  "metadata": {},
397
  "outputs": [
398
  {
@@ -401,7 +805,7 @@
401
  "0.97847533632287"
402
  ]
403
  },
404
- "execution_count": 15,
405
  "metadata": {},
406
  "output_type": "execute_result"
407
  }
@@ -461,18 +865,25 @@
461
  "cell_type": "markdown",
462
  "metadata": {},
463
  "source": [
464
- "#### *Saving this as a model using Joblib :*"
465
  ]
466
  },
467
  {
468
  "cell_type": "code",
469
- "execution_count": 17,
470
  "metadata": {},
471
  "outputs": [],
472
  "source": [
473
- "# from joblib import dump\n",
474
- "# dump(clf, 'Classifier.joblib')"
475
  ]
 
 
 
 
 
 
 
476
  }
477
  ],
478
  "metadata": {
@@ -491,7 +902,7 @@
491
  "name": "python",
492
  "nbconvert_exporter": "python",
493
  "pygments_lexer": "ipython3",
494
- "version": "3.10.6 (tags/v3.10.6:9c7b4bd, Aug 1 2022, 21:53:49) [MSC v.1932 64 bit (AMD64)]"
495
  },
496
  "orig_nbformat": 4,
497
  "vscode": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 4,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
+ "# Importing Libraries : \n",
10
  "import pandas as pd\n",
11
+ "import pickle"
12
  ]
13
  },
14
  {
15
  "cell_type": "code",
16
+ "execution_count": 5,
17
  "metadata": {},
18
  "outputs": [
19
  {
 
80
  "4 ham Nah I don't think he goes to usf, he lives aro..."
81
  ]
82
  },
83
+ "execution_count": 5,
84
  "metadata": {},
85
  "output_type": "execute_result"
86
  }
87
  ],
88
  "source": [
89
  "# importing data to work on :\n",
90
+ "dataset = pd.read_csv(\"data/spam.csv\")\n",
91
  "dataset.head()"
92
  ]
93
  },
94
  {
95
  "cell_type": "code",
96
+ "execution_count": 6,
97
  "metadata": {},
98
  "outputs": [
99
  {
 
102
  "(5572, 2)"
103
  ]
104
  },
105
+ "execution_count": 6,
106
  "metadata": {},
107
  "output_type": "execute_result"
108
  }
 
113
  },
114
  {
115
  "cell_type": "code",
116
+ "execution_count": 7,
117
  "metadata": {},
118
  "outputs": [
119
  {
 
122
  "Index(['Category', 'Message'], dtype='object')"
123
  ]
124
  },
125
+ "execution_count": 7,
126
  "metadata": {},
127
  "output_type": "execute_result"
128
  }
 
133
  },
134
  {
135
  "cell_type": "code",
136
+ "execution_count": 8,
137
  "metadata": {},
138
  "outputs": [
139
  {
 
160
  "dtype: int64"
161
  ]
162
  },
163
+ "execution_count": 8,
164
  "metadata": {},
165
  "output_type": "execute_result"
166
  }
 
173
  },
174
  {
175
  "cell_type": "code",
176
+ "execution_count": 9,
177
  "metadata": {},
178
  "outputs": [
179
  {
 
184
  "dtype: int64"
185
  ]
186
  },
187
+ "execution_count": 9,
188
  "metadata": {},
189
  "output_type": "execute_result"
190
  }
 
196
  },
197
  {
198
  "cell_type": "code",
199
+ "execution_count": 10,
200
  "metadata": {},
201
  "outputs": [
202
  {
 
269
  "4 ham Nah I don't think he goes to usf, he lives aro... 0"
270
  ]
271
  },
272
+ "execution_count": 10,
273
  "metadata": {},
274
  "output_type": "execute_result"
275
  }
 
282
  },
283
  {
284
  "cell_type": "code",
285
+ "execution_count": 11,
286
  "metadata": {},
287
  "outputs": [],
288
  "source": [
 
300
  },
301
  {
302
  "cell_type": "code",
303
+ "execution_count": 12,
304
  "metadata": {},
305
  "outputs": [],
306
  "source": [
 
310
  },
311
  {
312
  "cell_type": "code",
313
+ "execution_count": 13,
314
  "metadata": {},
315
  "outputs": [
316
  {
 
319
  "((4457,), (1115,), (4457,), (1115,))"
320
  ]
321
  },
322
+ "execution_count": 13,
323
  "metadata": {},
324
  "output_type": "execute_result"
325
  }
 
330
  },
331
  {
332
  "cell_type": "code",
333
+ "execution_count": 14,
334
  "metadata": {},
335
  "outputs": [],
336
  "source": [
 
340
  },
341
  {
342
  "cell_type": "code",
343
+ "execution_count": 15,
344
  "metadata": {},
345
  "outputs": [],
346
  "source": [
347
  "# Importing Different classifiers to compare :\n",
348
  "# from sklearn.linear_model import LogisticRegression\n",
349
  "# from sklearn.ensemble import RandomForestClassifier\n",
350
+ "from sklearn.naive_bayes import MultinomialNB # ✔️✔️ Works well with this type of problems, i.e. when data is discrete."
351
  ]
352
  },
353
  {
354
  "cell_type": "code",
355
+ "execution_count": 16,
356
  "metadata": {},
357
  "outputs": [],
358
  "source": [
 
367
  },
368
  {
369
  "cell_type": "code",
370
+ "execution_count": 17,
371
  "metadata": {},
372
  "outputs": [
373
  {
374
  "data": {
375
  "text/html": [
376
+ "<style>#sk-container-id-1 {\n",
377
+ " /* Definition of color scheme common for light and dark mode */\n",
378
+ " --sklearn-color-text: black;\n",
379
+ " --sklearn-color-line: gray;\n",
380
+ " /* Definition of color scheme for unfitted estimators */\n",
381
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
382
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
383
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
384
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
385
+ " /* Definition of color scheme for fitted estimators */\n",
386
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
387
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
388
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
389
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
390
+ "\n",
391
+ " /* Specific color for light theme */\n",
392
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
393
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
394
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
395
+ " --sklearn-color-icon: #696969;\n",
396
+ "\n",
397
+ " @media (prefers-color-scheme: dark) {\n",
398
+ " /* Redefinition of color scheme for dark theme */\n",
399
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
400
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
401
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
402
+ " --sklearn-color-icon: #878787;\n",
403
+ " }\n",
404
+ "}\n",
405
+ "\n",
406
+ "#sk-container-id-1 {\n",
407
+ " color: var(--sklearn-color-text);\n",
408
+ "}\n",
409
+ "\n",
410
+ "#sk-container-id-1 pre {\n",
411
+ " padding: 0;\n",
412
+ "}\n",
413
+ "\n",
414
+ "#sk-container-id-1 input.sk-hidden--visually {\n",
415
+ " border: 0;\n",
416
+ " clip: rect(1px 1px 1px 1px);\n",
417
+ " clip: rect(1px, 1px, 1px, 1px);\n",
418
+ " height: 1px;\n",
419
+ " margin: -1px;\n",
420
+ " overflow: hidden;\n",
421
+ " padding: 0;\n",
422
+ " position: absolute;\n",
423
+ " width: 1px;\n",
424
+ "}\n",
425
+ "\n",
426
+ "#sk-container-id-1 div.sk-dashed-wrapped {\n",
427
+ " border: 1px dashed var(--sklearn-color-line);\n",
428
+ " margin: 0 0.4em 0.5em 0.4em;\n",
429
+ " box-sizing: border-box;\n",
430
+ " padding-bottom: 0.4em;\n",
431
+ " background-color: var(--sklearn-color-background);\n",
432
+ "}\n",
433
+ "\n",
434
+ "#sk-container-id-1 div.sk-container {\n",
435
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
436
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
437
+ " so we also need the `!important` here to be able to override the\n",
438
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
439
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
440
+ " display: inline-block !important;\n",
441
+ " position: relative;\n",
442
+ "}\n",
443
+ "\n",
444
+ "#sk-container-id-1 div.sk-text-repr-fallback {\n",
445
+ " display: none;\n",
446
+ "}\n",
447
+ "\n",
448
+ "div.sk-parallel-item,\n",
449
+ "div.sk-serial,\n",
450
+ "div.sk-item {\n",
451
+ " /* draw centered vertical line to link estimators */\n",
452
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
453
+ " background-size: 2px 100%;\n",
454
+ " background-repeat: no-repeat;\n",
455
+ " background-position: center center;\n",
456
+ "}\n",
457
+ "\n",
458
+ "/* Parallel-specific style estimator block */\n",
459
+ "\n",
460
+ "#sk-container-id-1 div.sk-parallel-item::after {\n",
461
+ " content: \"\";\n",
462
+ " width: 100%;\n",
463
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
464
+ " flex-grow: 1;\n",
465
+ "}\n",
466
+ "\n",
467
+ "#sk-container-id-1 div.sk-parallel {\n",
468
+ " display: flex;\n",
469
+ " align-items: stretch;\n",
470
+ " justify-content: center;\n",
471
+ " background-color: var(--sklearn-color-background);\n",
472
+ " position: relative;\n",
473
+ "}\n",
474
+ "\n",
475
+ "#sk-container-id-1 div.sk-parallel-item {\n",
476
+ " display: flex;\n",
477
+ " flex-direction: column;\n",
478
+ "}\n",
479
+ "\n",
480
+ "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
481
+ " align-self: flex-end;\n",
482
+ " width: 50%;\n",
483
+ "}\n",
484
+ "\n",
485
+ "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
486
+ " align-self: flex-start;\n",
487
+ " width: 50%;\n",
488
+ "}\n",
489
+ "\n",
490
+ "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
491
+ " width: 0;\n",
492
+ "}\n",
493
+ "\n",
494
+ "/* Serial-specific style estimator block */\n",
495
+ "\n",
496
+ "#sk-container-id-1 div.sk-serial {\n",
497
+ " display: flex;\n",
498
+ " flex-direction: column;\n",
499
+ " align-items: center;\n",
500
+ " background-color: var(--sklearn-color-background);\n",
501
+ " padding-right: 1em;\n",
502
+ " padding-left: 1em;\n",
503
+ "}\n",
504
+ "\n",
505
+ "\n",
506
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
507
+ "clickable and can be expanded/collapsed.\n",
508
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
509
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
510
+ "*/\n",
511
+ "\n",
512
+ "/* Pipeline and ColumnTransformer style (default) */\n",
513
+ "\n",
514
+ "#sk-container-id-1 div.sk-toggleable {\n",
515
+ " /* Default theme specific background. It is overwritten whether we have a\n",
516
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
517
+ " background-color: var(--sklearn-color-background);\n",
518
+ "}\n",
519
+ "\n",
520
+ "/* Toggleable label */\n",
521
+ "#sk-container-id-1 label.sk-toggleable__label {\n",
522
+ " cursor: pointer;\n",
523
+ " display: block;\n",
524
+ " width: 100%;\n",
525
+ " margin-bottom: 0;\n",
526
+ " padding: 0.5em;\n",
527
+ " box-sizing: border-box;\n",
528
+ " text-align: center;\n",
529
+ "}\n",
530
+ "\n",
531
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
532
+ " /* Arrow on the left of the label */\n",
533
+ " content: \"▸\";\n",
534
+ " float: left;\n",
535
+ " margin-right: 0.25em;\n",
536
+ " color: var(--sklearn-color-icon);\n",
537
+ "}\n",
538
+ "\n",
539
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
540
+ " color: var(--sklearn-color-text);\n",
541
+ "}\n",
542
+ "\n",
543
+ "/* Toggleable content - dropdown */\n",
544
+ "\n",
545
+ "#sk-container-id-1 div.sk-toggleable__content {\n",
546
+ " max-height: 0;\n",
547
+ " max-width: 0;\n",
548
+ " overflow: hidden;\n",
549
+ " text-align: left;\n",
550
+ " /* unfitted */\n",
551
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
552
+ "}\n",
553
+ "\n",
554
+ "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
555
+ " /* fitted */\n",
556
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
557
+ "}\n",
558
+ "\n",
559
+ "#sk-container-id-1 div.sk-toggleable__content pre {\n",
560
+ " margin: 0.2em;\n",
561
+ " border-radius: 0.25em;\n",
562
+ " color: var(--sklearn-color-text);\n",
563
+ " /* unfitted */\n",
564
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
565
+ "}\n",
566
+ "\n",
567
+ "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
568
+ " /* unfitted */\n",
569
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
570
+ "}\n",
571
+ "\n",
572
+ "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
573
+ " /* Expand drop-down */\n",
574
+ " max-height: 200px;\n",
575
+ " max-width: 100%;\n",
576
+ " overflow: auto;\n",
577
+ "}\n",
578
+ "\n",
579
+ "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
580
+ " content: \"▾\";\n",
581
+ "}\n",
582
+ "\n",
583
+ "/* Pipeline/ColumnTransformer-specific style */\n",
584
+ "\n",
585
+ "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
586
+ " color: var(--sklearn-color-text);\n",
587
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
588
+ "}\n",
589
+ "\n",
590
+ "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
591
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
592
+ "}\n",
593
+ "\n",
594
+ "/* Estimator-specific style */\n",
595
+ "\n",
596
+ "/* Colorize estimator box */\n",
597
+ "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
598
+ " /* unfitted */\n",
599
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
600
+ "}\n",
601
+ "\n",
602
+ "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
603
+ " /* fitted */\n",
604
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
605
+ "}\n",
606
+ "\n",
607
+ "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
608
+ "#sk-container-id-1 div.sk-label label {\n",
609
+ " /* The background is the default theme color */\n",
610
+ " color: var(--sklearn-color-text-on-default-background);\n",
611
+ "}\n",
612
+ "\n",
613
+ "/* On hover, darken the color of the background */\n",
614
+ "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
615
+ " color: var(--sklearn-color-text);\n",
616
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
617
+ "}\n",
618
+ "\n",
619
+ "/* Label box, darken color on hover, fitted */\n",
620
+ "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
621
+ " color: var(--sklearn-color-text);\n",
622
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
623
+ "}\n",
624
+ "\n",
625
+ "/* Estimator label */\n",
626
+ "\n",
627
+ "#sk-container-id-1 div.sk-label label {\n",
628
+ " font-family: monospace;\n",
629
+ " font-weight: bold;\n",
630
+ " display: inline-block;\n",
631
+ " line-height: 1.2em;\n",
632
+ "}\n",
633
+ "\n",
634
+ "#sk-container-id-1 div.sk-label-container {\n",
635
+ " text-align: center;\n",
636
+ "}\n",
637
+ "\n",
638
+ "/* Estimator-specific */\n",
639
+ "#sk-container-id-1 div.sk-estimator {\n",
640
+ " font-family: monospace;\n",
641
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
642
+ " border-radius: 0.25em;\n",
643
+ " box-sizing: border-box;\n",
644
+ " margin-bottom: 0.5em;\n",
645
+ " /* unfitted */\n",
646
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
647
+ "}\n",
648
+ "\n",
649
+ "#sk-container-id-1 div.sk-estimator.fitted {\n",
650
+ " /* fitted */\n",
651
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
652
+ "}\n",
653
+ "\n",
654
+ "/* on hover */\n",
655
+ "#sk-container-id-1 div.sk-estimator:hover {\n",
656
+ " /* unfitted */\n",
657
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
658
+ "}\n",
659
+ "\n",
660
+ "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
661
+ " /* fitted */\n",
662
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
663
+ "}\n",
664
+ "\n",
665
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
666
+ "\n",
667
+ "/* Common style for \"i\" and \"?\" */\n",
668
+ "\n",
669
+ ".sk-estimator-doc-link,\n",
670
+ "a:link.sk-estimator-doc-link,\n",
671
+ "a:visited.sk-estimator-doc-link {\n",
672
+ " float: right;\n",
673
+ " font-size: smaller;\n",
674
+ " line-height: 1em;\n",
675
+ " font-family: monospace;\n",
676
+ " background-color: var(--sklearn-color-background);\n",
677
+ " border-radius: 1em;\n",
678
+ " height: 1em;\n",
679
+ " width: 1em;\n",
680
+ " text-decoration: none !important;\n",
681
+ " margin-left: 1ex;\n",
682
+ " /* unfitted */\n",
683
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
684
+ " color: var(--sklearn-color-unfitted-level-1);\n",
685
+ "}\n",
686
+ "\n",
687
+ ".sk-estimator-doc-link.fitted,\n",
688
+ "a:link.sk-estimator-doc-link.fitted,\n",
689
+ "a:visited.sk-estimator-doc-link.fitted {\n",
690
+ " /* fitted */\n",
691
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
692
+ " color: var(--sklearn-color-fitted-level-1);\n",
693
+ "}\n",
694
+ "\n",
695
+ "/* On hover */\n",
696
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
697
+ ".sk-estimator-doc-link:hover,\n",
698
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
699
+ ".sk-estimator-doc-link:hover {\n",
700
+ " /* unfitted */\n",
701
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
702
+ " color: var(--sklearn-color-background);\n",
703
+ " text-decoration: none;\n",
704
+ "}\n",
705
+ "\n",
706
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
707
+ ".sk-estimator-doc-link.fitted:hover,\n",
708
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
709
+ ".sk-estimator-doc-link.fitted:hover {\n",
710
+ " /* fitted */\n",
711
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
712
+ " color: var(--sklearn-color-background);\n",
713
+ " text-decoration: none;\n",
714
+ "}\n",
715
+ "\n",
716
+ "/* Span, style for the box shown on hovering the info icon */\n",
717
+ ".sk-estimator-doc-link span {\n",
718
+ " display: none;\n",
719
+ " z-index: 9999;\n",
720
+ " position: relative;\n",
721
+ " font-weight: normal;\n",
722
+ " right: .2ex;\n",
723
+ " padding: .5ex;\n",
724
+ " margin: .5ex;\n",
725
+ " width: min-content;\n",
726
+ " min-width: 20ex;\n",
727
+ " max-width: 50ex;\n",
728
+ " color: var(--sklearn-color-text);\n",
729
+ " box-shadow: 2pt 2pt 4pt #999;\n",
730
+ " /* unfitted */\n",
731
+ " background: var(--sklearn-color-unfitted-level-0);\n",
732
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
733
+ "}\n",
734
+ "\n",
735
+ ".sk-estimator-doc-link.fitted span {\n",
736
+ " /* fitted */\n",
737
+ " background: var(--sklearn-color-fitted-level-0);\n",
738
+ " border: var(--sklearn-color-fitted-level-3);\n",
739
+ "}\n",
740
+ "\n",
741
+ ".sk-estimator-doc-link:hover span {\n",
742
+ " display: block;\n",
743
+ "}\n",
744
+ "\n",
745
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
746
+ "\n",
747
+ "#sk-container-id-1 a.estimator_doc_link {\n",
748
+ " float: right;\n",
749
+ " font-size: 1rem;\n",
750
+ " line-height: 1em;\n",
751
+ " font-family: monospace;\n",
752
+ " background-color: var(--sklearn-color-background);\n",
753
+ " border-radius: 1rem;\n",
754
+ " height: 1rem;\n",
755
+ " width: 1rem;\n",
756
+ " text-decoration: none;\n",
757
+ " /* unfitted */\n",
758
+ " color: var(--sklearn-color-unfitted-level-1);\n",
759
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
760
+ "}\n",
761
+ "\n",
762
+ "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
763
+ " /* fitted */\n",
764
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
765
+ " color: var(--sklearn-color-fitted-level-1);\n",
766
+ "}\n",
767
+ "\n",
768
+ "/* On hover */\n",
769
+ "#sk-container-id-1 a.estimator_doc_link:hover {\n",
770
+ " /* unfitted */\n",
771
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
772
+ " color: var(--sklearn-color-background);\n",
773
+ " text-decoration: none;\n",
774
+ "}\n",
775
+ "\n",
776
+ "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
777
+ " /* fitted */\n",
778
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
779
+ "}\n",
780
+ "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;vectorizer&#x27;, CountVectorizer()), (&#x27;nb&#x27;, MultinomialNB())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[(&#x27;vectorizer&#x27;, CountVectorizer()), (&#x27;nb&#x27;, MultinomialNB())])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;CountVectorizer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html\">?<span>Documentation for CountVectorizer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>CountVectorizer()</pre></div> </div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;MultinomialNB<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html\">?<span>Documentation for MultinomialNB</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>MultinomialNB()</pre></div> </div></div></div></div></div></div>"
781
  ],
782
  "text/plain": [
783
  "Pipeline(steps=[('vectorizer', CountVectorizer()), ('nb', MultinomialNB())])"
784
  ]
785
  },
786
+ "execution_count": 17,
787
  "metadata": {},
788
  "output_type": "execute_result"
789
  }
 
796
  },
797
  {
798
  "cell_type": "code",
799
+ "execution_count": 18,
800
  "metadata": {},
801
  "outputs": [
802
  {
 
805
  "0.97847533632287"
806
  ]
807
  },
808
+ "execution_count": 18,
809
  "metadata": {},
810
  "output_type": "execute_result"
811
  }
 
865
  "cell_type": "markdown",
866
  "metadata": {},
867
  "source": [
868
+ "#### *Saving the model using `Pickle` :*"
869
  ]
870
  },
871
  {
872
  "cell_type": "code",
873
+ "execution_count": 22,
874
  "metadata": {},
875
  "outputs": [],
876
  "source": [
877
+ "# with open(\"models/spam-clf.pkl\", \"wb\") as f:\n",
878
+ "# pickle.dump(clf, f)"
879
  ]
880
+ },
881
+ {
882
+ "cell_type": "code",
883
+ "execution_count": null,
884
+ "metadata": {},
885
+ "outputs": [],
886
+ "source": []
887
  }
888
  ],
889
  "metadata": {
 
902
  "name": "python",
903
  "nbconvert_exporter": "python",
904
  "pygments_lexer": "ipython3",
905
+ "version": "3.10.6"
906
  },
907
  "orig_nbformat": 4,
908
  "vscode": {