Taoheed-O commited on
Commit
17effdf
1 Parent(s): 8ae168d

hosting spam detector app with hugging face

Browse files
Files changed (6) hide show
  1. main.py +86 -0
  2. model_log.pkl +0 -0
  3. requirements.txt +4 -0
  4. sms_spam.csv +0 -0
  5. spam_detector.ipynb +528 -0
  6. vectorizer.pkl +0 -0
main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import streamlit as st
3
+
4
+
5
+ # loading in the model to predict on the data
6
+
7
+ vectorizer_in = open('vectorizer.pkl', 'rb')
8
+ vectorizer = pickle.load(vectorizer_in)
9
+ pickle_in = open("model_log.pkl", "rb")
10
+ classifier = pickle.load(pickle_in)
11
+
12
+ # Image
13
+ st.image("https://media.istockphoto.com/photos/phishing-scam-email-identity-alert-3d-rendering-picture-id1046171248")
14
+ def welcome():
15
+ return 'welcome all'
16
+
17
+ # defining the function which will make the prediction using
18
+ # the data(text) which the user inputs
19
+ def prediction(text):
20
+ vector_text = vectorizer.transform([text]).toarray()
21
+ prediction = classifier.predict(vector_text)
22
+ print(prediction)
23
+ return(prediction)
24
+
25
+ # this is the main function in which is defined on the webpage
26
+ def main():
27
+ # giving the webpage a title
28
+ st.title("Spam E-mail Detector")
29
+
30
+ # the font and background color, the padding and the text to be displayed
31
+ html_temp = """
32
+ <div style ="background-color:black;padding:13px">
33
+ <h1 style ="color:white;text-align:center;">Spam Detector App</h1>
34
+ </div>
35
+ """
36
+
37
+ # this line allows us to display the front end aspects we have
38
+ # defined in the above code
39
+ st.markdown(html_temp, unsafe_allow_html = True)
40
+
41
+ #List of available models
42
+ options = st.radio("Available Models:", ["Logistic Regression", "Multinomial Naive Bayes","Decision Tree"])
43
+ result =""
44
+
45
+ # the below line ensures that when the button called 'Predict' is clicked,
46
+ # the prediction function defined above is called to make the prediction
47
+ # and store it in the variable result
48
+ if options == "Logistic Regression":
49
+ st.success("You picked {}".format(options))
50
+ # the following lines create text boxes in which the user can enter
51
+ # the data required to make the prediction
52
+ text = st.text_input("Review:", "Type your review here")
53
+
54
+ if st.button('Predict'):
55
+ result = prediction(text)
56
+ if result == 0:
57
+ st.error('This is not a spam mail/sms.'.format(result))
58
+ else:
59
+ st.success('This is a spam mail/sms.'.format(result))
60
+ else:
61
+ st.warning('This model is under development and not available for predicting yet.'.format(result))
62
+ pass
63
+
64
+ html_git = """
65
+ <h3>Checkout my GitHub</h3>
66
+ <div style ="background-color:black;padding:13px">
67
+ <h1 style ="color:white;text-align:center;"><a href="https://github.com/Taoheed-O"> My GitHub link</h1>
68
+ </div>
69
+ """
70
+ html_linkedIn = """
71
+ <h3>Connect with me on LinkedIn</h3>
72
+ <div style ="background-color:black;padding:13px">
73
+ <h1 style ="color:white;text-align:center;"><a href="https://www.linkedin.com/in/taoheed-oyeniyi"> My LinkedIn</h1>
74
+ </div>
75
+ """
76
+
77
+ # this line allows us to display the front end aspects we have
78
+ # defined in the above code
79
+ st.markdown(html_git, unsafe_allow_html = True)
80
+ st.markdown(html_linkedIn, unsafe_allow_html = True)
81
+
82
+
83
+
84
+
85
+ if __name__=='__main__':
86
+ main()
model_log.pkl ADDED
Binary file (61 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ sklearn
4
+ streamlit
sms_spam.csv ADDED
The diff for this file is too large to render. See raw diff
 
spam_detector.ipynb ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 54,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "#import necessary libraries\n",
10
+ "import numpy as np\n",
11
+ "import pandas as pd\n",
12
+ "from sklearn.model_selection import train_test_split,KFold,cross_val_score, ShuffleSplit \n",
13
+ "from sklearn.naive_bayes import MultinomialNB\n",
14
+ "from sklearn.tree import DecisionTreeClassifier\n",
15
+ "from sklearn.linear_model import LogisticRegression\n",
16
+ "from sklearn.metrics import f1_score,accuracy_score,classification_report\n",
17
+ "from sklearn.pipeline import Pipeline\n",
18
+ "from sklearn.feature_extraction.text import CountVectorizer"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 55,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "data": {
28
+ "text/html": [
29
+ "<div>\n",
30
+ "<style scoped>\n",
31
+ " .dataframe tbody tr th:only-of-type {\n",
32
+ " vertical-align: middle;\n",
33
+ " }\n",
34
+ "\n",
35
+ " .dataframe tbody tr th {\n",
36
+ " vertical-align: top;\n",
37
+ " }\n",
38
+ "\n",
39
+ " .dataframe thead th {\n",
40
+ " text-align: right;\n",
41
+ " }\n",
42
+ "</style>\n",
43
+ "<table border=\"1\" class=\"dataframe\">\n",
44
+ " <thead>\n",
45
+ " <tr style=\"text-align: right;\">\n",
46
+ " <th></th>\n",
47
+ " <th>type</th>\n",
48
+ " <th>text</th>\n",
49
+ " </tr>\n",
50
+ " </thead>\n",
51
+ " <tbody>\n",
52
+ " <tr>\n",
53
+ " <th>0</th>\n",
54
+ " <td>ham</td>\n",
55
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
56
+ " </tr>\n",
57
+ " <tr>\n",
58
+ " <th>1</th>\n",
59
+ " <td>ham</td>\n",
60
+ " <td>Ok lar... Joking wif u oni...</td>\n",
61
+ " </tr>\n",
62
+ " <tr>\n",
63
+ " <th>2</th>\n",
64
+ " <td>spam</td>\n",
65
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
66
+ " </tr>\n",
67
+ " <tr>\n",
68
+ " <th>3</th>\n",
69
+ " <td>ham</td>\n",
70
+ " <td>U dun say so early hor... U c already then say...</td>\n",
71
+ " </tr>\n",
72
+ " <tr>\n",
73
+ " <th>4</th>\n",
74
+ " <td>ham</td>\n",
75
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
76
+ " </tr>\n",
77
+ " </tbody>\n",
78
+ "</table>\n",
79
+ "</div>"
80
+ ],
81
+ "text/plain": [
82
+ " type text\n",
83
+ "0 ham Go until jurong point, crazy.. Available only ...\n",
84
+ "1 ham Ok lar... Joking wif u oni...\n",
85
+ "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
86
+ "3 ham U dun say so early hor... U c already then say...\n",
87
+ "4 ham Nah I don't think he goes to usf, he lives aro..."
88
+ ]
89
+ },
90
+ "execution_count": 55,
91
+ "metadata": {},
92
+ "output_type": "execute_result"
93
+ }
94
+ ],
95
+ "source": [
96
+ "#read in file\n",
97
+ "df = pd.read_csv('sms_spam.csv')\n",
98
+ "df.head()"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": 56,
104
+ "metadata": {},
105
+ "outputs": [
106
+ {
107
+ "data": {
108
+ "text/html": [
109
+ "<div>\n",
110
+ "<style scoped>\n",
111
+ " .dataframe tbody tr th:only-of-type {\n",
112
+ " vertical-align: middle;\n",
113
+ " }\n",
114
+ "\n",
115
+ " .dataframe tbody tr th {\n",
116
+ " vertical-align: top;\n",
117
+ " }\n",
118
+ "\n",
119
+ " .dataframe thead tr th {\n",
120
+ " text-align: left;\n",
121
+ " }\n",
122
+ "\n",
123
+ " .dataframe thead tr:last-of-type th {\n",
124
+ " text-align: right;\n",
125
+ " }\n",
126
+ "</style>\n",
127
+ "<table border=\"1\" class=\"dataframe\">\n",
128
+ " <thead>\n",
129
+ " <tr>\n",
130
+ " <th></th>\n",
131
+ " <th colspan=\"4\" halign=\"left\">text</th>\n",
132
+ " </tr>\n",
133
+ " <tr>\n",
134
+ " <th></th>\n",
135
+ " <th>count</th>\n",
136
+ " <th>unique</th>\n",
137
+ " <th>top</th>\n",
138
+ " <th>freq</th>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>type</th>\n",
142
+ " <th></th>\n",
143
+ " <th></th>\n",
144
+ " <th></th>\n",
145
+ " <th></th>\n",
146
+ " </tr>\n",
147
+ " </thead>\n",
148
+ " <tbody>\n",
149
+ " <tr>\n",
150
+ " <th>ham</th>\n",
151
+ " <td>4827</td>\n",
152
+ " <td>4518</td>\n",
153
+ " <td>Sorry, I'll call later</td>\n",
154
+ " <td>30</td>\n",
155
+ " </tr>\n",
156
+ " <tr>\n",
157
+ " <th>spam</th>\n",
158
+ " <td>747</td>\n",
159
+ " <td>642</td>\n",
160
+ " <td>Please call our customer service representativ...</td>\n",
161
+ " <td>4</td>\n",
162
+ " </tr>\n",
163
+ " </tbody>\n",
164
+ "</table>\n",
165
+ "</div>"
166
+ ],
167
+ "text/plain": [
168
+ " text \n",
169
+ " count unique top freq\n",
170
+ "type \n",
171
+ "ham 4827 4518 Sorry, I'll call later 30\n",
172
+ "spam 747 642 Please call our customer service representativ... 4"
173
+ ]
174
+ },
175
+ "execution_count": 56,
176
+ "metadata": {},
177
+ "output_type": "execute_result"
178
+ }
179
+ ],
180
+ "source": [
181
+ "# group by type of text/sms\n",
182
+ "df.groupby('type').describe()"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": 57,
188
+ "metadata": {},
189
+ "outputs": [
190
+ {
191
+ "data": {
192
+ "text/html": [
193
+ "<div>\n",
194
+ "<style scoped>\n",
195
+ " .dataframe tbody tr th:only-of-type {\n",
196
+ " vertical-align: middle;\n",
197
+ " }\n",
198
+ "\n",
199
+ " .dataframe tbody tr th {\n",
200
+ " vertical-align: top;\n",
201
+ " }\n",
202
+ "\n",
203
+ " .dataframe thead th {\n",
204
+ " text-align: right;\n",
205
+ " }\n",
206
+ "</style>\n",
207
+ "<table border=\"1\" class=\"dataframe\">\n",
208
+ " <thead>\n",
209
+ " <tr style=\"text-align: right;\">\n",
210
+ " <th></th>\n",
211
+ " <th>type</th>\n",
212
+ " <th>text</th>\n",
213
+ " <th>spam</th>\n",
214
+ " </tr>\n",
215
+ " </thead>\n",
216
+ " <tbody>\n",
217
+ " <tr>\n",
218
+ " <th>0</th>\n",
219
+ " <td>ham</td>\n",
220
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
221
+ " <td>0</td>\n",
222
+ " </tr>\n",
223
+ " <tr>\n",
224
+ " <th>1</th>\n",
225
+ " <td>ham</td>\n",
226
+ " <td>Ok lar... Joking wif u oni...</td>\n",
227
+ " <td>0</td>\n",
228
+ " </tr>\n",
229
+ " <tr>\n",
230
+ " <th>2</th>\n",
231
+ " <td>spam</td>\n",
232
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
233
+ " <td>1</td>\n",
234
+ " </tr>\n",
235
+ " <tr>\n",
236
+ " <th>3</th>\n",
237
+ " <td>ham</td>\n",
238
+ " <td>U dun say so early hor... U c already then say...</td>\n",
239
+ " <td>0</td>\n",
240
+ " </tr>\n",
241
+ " <tr>\n",
242
+ " <th>4</th>\n",
243
+ " <td>ham</td>\n",
244
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
245
+ " <td>0</td>\n",
246
+ " </tr>\n",
247
+ " </tbody>\n",
248
+ "</table>\n",
249
+ "</div>"
250
+ ],
251
+ "text/plain": [
252
+ " type text spam\n",
253
+ "0 ham Go until jurong point, crazy.. Available only ... 0\n",
254
+ "1 ham Ok lar... Joking wif u oni... 0\n",
255
+ "2 spam Free entry in 2 a wkly comp to win FA Cup fina... 1\n",
256
+ "3 ham U dun say so early hor... U c already then say... 0\n",
257
+ "4 ham Nah I don't think he goes to usf, he lives aro... 0"
258
+ ]
259
+ },
260
+ "execution_count": 57,
261
+ "metadata": {},
262
+ "output_type": "execute_result"
263
+ }
264
+ ],
265
+ "source": [
266
+ "#creating a new column named spam that classifies texts into spam or no spam messages/sms\n",
267
+ "# using the lambda function\n",
268
+ "df['spam'] = df['type'].apply(lambda x:1 if x == 'spam' else 0)\n",
269
+ "df.head()"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 58,
275
+ "metadata": {},
276
+ "outputs": [],
277
+ "source": [
278
+ "#using the train test split to split our datasets in the ratio 75:25 or 3:1\n",
279
+ "x_train,x_test,y_train,y_test = train_test_split(df.text,df.spam,test_size=0.25)"
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "execution_count": 59,
285
+ "metadata": {},
286
+ "outputs": [
287
+ {
288
+ "data": {
289
+ "text/plain": [
290
+ "array([[0, 0, 0, ..., 0, 0, 0],\n",
291
+ " [0, 0, 0, ..., 0, 0, 0],\n",
292
+ " [0, 0, 0, ..., 0, 0, 0]])"
293
+ ]
294
+ },
295
+ "execution_count": 59,
296
+ "metadata": {},
297
+ "output_type": "execute_result"
298
+ }
299
+ ],
300
+ "source": [
301
+ "# Taking care of our text data by calling the count_vectorizer on them to change into a numerical data\n",
302
+ "# that the model will understand.\n",
303
+ "count = CountVectorizer()\n",
304
+ "x_train_count = count.fit_transform(x_train.values)\n",
305
+ "x_train_count.toarray()[:3]"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 60,
311
+ "metadata": {},
312
+ "outputs": [
313
+ {
314
+ "data": {
315
+ "text/plain": [
316
+ "LogisticRegression()"
317
+ ]
318
+ },
319
+ "execution_count": 60,
320
+ "metadata": {},
321
+ "output_type": "execute_result"
322
+ }
323
+ ],
324
+ "source": [
325
+ "# Making use of the MultiNomial Naive Bayes model\n",
326
+ "model = LogisticRegression()\n",
327
+ "model.fit(x_train_count,y_train)"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "code",
332
+ "execution_count": 61,
333
+ "metadata": {},
334
+ "outputs": [
335
+ {
336
+ "data": {
337
+ "text/plain": [
338
+ "0.9849354375896701"
339
+ ]
340
+ },
341
+ "execution_count": 61,
342
+ "metadata": {},
343
+ "output_type": "execute_result"
344
+ }
345
+ ],
346
+ "source": [
347
+ "# Testing out our model's accuracy\n",
348
+ "x_test_pred = count.transform(x_test)\n",
349
+ "accuracy_score(model.predict(x_test_pred),y_test)"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": 62,
355
+ "metadata": {},
356
+ "outputs": [
357
+ {
358
+ "name": "stdout",
359
+ "output_type": "stream",
360
+ "text": [
361
+ "classification report : precision recall f1-score support\n",
362
+ "\n",
363
+ " 0 1.00 0.98 0.99 1212\n",
364
+ " 1 0.90 0.99 0.95 182\n",
365
+ "\n",
366
+ " accuracy 0.98 1394\n",
367
+ " macro avg 0.95 0.99 0.97 1394\n",
368
+ "weighted avg 0.99 0.98 0.99 1394\n",
369
+ "\n"
370
+ ]
371
+ }
372
+ ],
373
+ "source": [
374
+ "# Classification report\n",
375
+ "print(f\"classification report : {classification_report(model.predict(x_test_pred),y_test)}\")"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": 63,
381
+ "metadata": {},
382
+ "outputs": [],
383
+ "source": [
384
+ "# Using the pipeline\n",
385
+ "clf = Pipeline([\n",
386
+ " ('vectorizer',CountVectorizer()),\n",
387
+ " ('nb',LogisticRegression())\n",
388
+ "])\n"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": 64,
394
+ "metadata": {},
395
+ "outputs": [
396
+ {
397
+ "data": {
398
+ "text/plain": [
399
+ "Pipeline(steps=[('vectorizer', CountVectorizer()),\n",
400
+ " ('nb', LogisticRegression())])"
401
+ ]
402
+ },
403
+ "execution_count": 64,
404
+ "metadata": {},
405
+ "output_type": "execute_result"
406
+ }
407
+ ],
408
+ "source": [
409
+ "# fit our model\n",
410
+ "clf.fit(x_train,y_train)"
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "code",
415
+ "execution_count": 65,
416
+ "metadata": {},
417
+ "outputs": [
418
+ {
419
+ "data": {
420
+ "text/plain": [
421
+ "0.9849354375896701"
422
+ ]
423
+ },
424
+ "execution_count": 65,
425
+ "metadata": {},
426
+ "output_type": "execute_result"
427
+ }
428
+ ],
429
+ "source": [
430
+ "# Score our model\n",
431
+ "clf.score(x_test,y_test)"
432
+ ]
433
+ },
434
+ {
435
+ "cell_type": "code",
436
+ "execution_count": 66,
437
+ "metadata": {},
438
+ "outputs": [
439
+ {
440
+ "data": {
441
+ "text/plain": [
442
+ "array([0.97607656, 0.9784689 , 0.97727273, 0.98684211, 0.98325359])"
443
+ ]
444
+ },
445
+ "execution_count": 66,
446
+ "metadata": {},
447
+ "output_type": "execute_result"
448
+ }
449
+ ],
450
+ "source": [
451
+ "cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state=0)\n",
452
+ "cross_val_score(MultinomialNB(),x_train_count,y_train, cv=cv)"
453
+ ]
454
+ },
455
+ {
456
+ "cell_type": "code",
457
+ "execution_count": 67,
458
+ "metadata": {},
459
+ "outputs": [],
460
+ "source": [
461
+ "# Saving our model as a pickle file\n",
462
+ "import pickle\n",
463
+ "with open(\"model_log.pkl\", \"wb\") as f:\n",
464
+ " pickle.dump(model, f)\n",
465
+ "\n",
466
+ "with open(\"model_log.pkl\", \"rb\") as f:\n",
467
+ " model = pickle.load(f)\n",
468
+ " \n",
469
+ "\n",
470
+ "# Saving our vectorizer\n",
471
+ "with open(\"vectorizer.pkl\", \"wb\") as vect:\n",
472
+ " pickle.dump(count, vect)"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "code",
477
+ "execution_count": 68,
478
+ "metadata": {},
479
+ "outputs": [
480
+ {
481
+ "data": {
482
+ "text/plain": [
483
+ "array([1, 0, 1, 1])"
484
+ ]
485
+ },
486
+ "execution_count": 68,
487
+ "metadata": {},
488
+ "output_type": "execute_result"
489
+ }
490
+ ],
491
+ "source": [
492
+ "s = [\"FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv\"\n",
493
+ " , \"Nah I don't think he goes to usf, he lives around here though\",\"Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\",\n",
494
+ " \"URGENT! You have won a 1 week FREE membership in our £100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010 T&C www.dbuk.net LCCLTD POBOX 4403LDNW1A7RW18\"]\n",
495
+ "test = count.transform(s).toarray()\n",
496
+ "model.predict(test)"
497
+ ]
498
+ },
499
+ {
500
+ "cell_type": "code",
501
+ "execution_count": null,
502
+ "metadata": {},
503
+ "outputs": [],
504
+ "source": []
505
+ }
506
+ ],
507
+ "metadata": {
508
+ "kernelspec": {
509
+ "display_name": "Python 3",
510
+ "language": "python",
511
+ "name": "python3"
512
+ },
513
+ "language_info": {
514
+ "codemirror_mode": {
515
+ "name": "ipython",
516
+ "version": 3
517
+ },
518
+ "file_extension": ".py",
519
+ "mimetype": "text/x-python",
520
+ "name": "python",
521
+ "nbconvert_exporter": "python",
522
+ "pygments_lexer": "ipython3",
523
+ "version": "3.8.5"
524
+ }
525
+ },
526
+ "nbformat": 4,
527
+ "nbformat_minor": 4
528
+ }
vectorizer.pkl ADDED
Binary file (91 kB). View file