Prashun08 commited on
Commit
cc400c1
1 Parent(s): fc4e2e1

Application File

Browse files
Files changed (1) hide show
  1. app.py +139 -0
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """First_Text_Classification.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1sdLss09e3OxYVoeK3oBA6qrUSj_iOxp-
8
+
9
+ <h3 align = "center">Importing Libraries</h3>
10
+ """
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ import matplotlib.pyplot as plt
15
+ import seaborn as sns
16
+
17
+ """<h3 align = "center">Importing Dataset</h3>"""
18
+
19
+ data = pd.read_csv("/content/spam.csv", encoding = "ISO-8859-1")
20
+
21
+ """<h3 align = "center">Preliminary Data Checks</h3>"""
22
+
23
+ data.head()
24
+
25
+ data.isnull().sum()
26
+
27
+ data.shape
28
+
29
+ data['v1'].value_counts()
30
+
31
+ data.info()
32
+
33
+ """<h3 align = "center">Putting the Length of Characters of each row in a column.</h3>"""
34
+
35
+ data["Unnamed: 2"] = data["v2"].str.len()
36
+
37
+ """<h3 align = "center">Visualising Length of Characters for each category!</h3>"""
38
+
39
+ plt.figure(figsize = (12,8))
40
+ sns.displot(data = data ,x = "Unnamed: 2", hue = "v1",log_scale = True)
41
+
42
+ """<h5>It is evident from the above plot that spam texts are usually longer in length!</h5>
43
+
44
+ <h3 align = "center">Defining Variables</h3>
45
+ """
46
+
47
+ X = data["v2"]
48
+ y = data["v1"]
49
+
50
+ """<h3 align = "center">Train Test Split</h3>"""
51
+
52
+ from sklearn.model_selection import train_test_split
53
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
54
+
55
+ """<h3 align = "center">Vecrorizing Words into Matrix</h3>"""
56
+
57
+ from sklearn.feature_extraction.text import CountVectorizer
58
+ count_vect = CountVectorizer()
59
+
60
+ X_train_counts = count_vect.fit_transform(X_train)
61
+
62
+ X_train_counts
63
+
64
+ X_train.shape
65
+
66
+ X_train_counts.shape
67
+
68
+ from sklearn.feature_extraction.text import TfidfTransformer
69
+ tfidf_transformer = TfidfTransformer()
70
+
71
+ X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
72
+
73
+ X_train_tfidf.shape
74
+
75
+ """<h3 align = "center">Using TDIF Vectorizer for optimum vectorization!</h3>"""
76
+
77
+ from sklearn.feature_extraction.text import TfidfVectorizer
78
+ vectorizer = TfidfVectorizer()
79
+
80
+ X_train_tfidf = vectorizer.fit_transform(X_train)
81
+
82
+ X_train_tfidf.shape
83
+
84
+ """<h3 align = "center">Creating Model</h3>"""
85
+
86
+ from sklearn.svm import LinearSVC
87
+ clf = LinearSVC()
88
+
89
+ clf.fit(X_train_tfidf,y_train)
90
+
91
+ """<h3 align = "center">Creating Pipeline</h3>"""
92
+
93
+ from sklearn.pipeline import Pipeline
94
+
95
+ text_clf = Pipeline([("tfidf",TfidfVectorizer()),("clf",LinearSVC())])
96
+
97
+ text_clf.fit(X_train,y_train)
98
+
99
+ predictions = text_clf.predict(X_test)
100
+
101
+ X_test
102
+
103
+ from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
104
+
105
+ print(confusion_matrix(y_test,predictions))
106
+
107
+ print(classification_report(y_test,predictions))
108
+
109
+ """<h3 align = "center">Accuracy Score</h3>"""
110
+
111
+ print(accuracy_score(y_test,predictions))
112
+
113
+ """<h3 align = "center">Predictions </h3>"""
114
+
115
+ text_clf.predict(["Hi how are you doing today?"])
116
+
117
+ text_clf.predict(["Congratulations! You are selected for a free vouchar worth $500"])
118
+
119
+ """<h3 align = "center">Creating User Interface!</h3>"""
120
+
121
+ ! pip install gradio
122
+
123
+ import gradio as gr
124
+
125
+ def first_nlp_spam_detector(text):
126
+ list = []
127
+ list.append(text)
128
+ arr = text_clf.predict(list)
129
+ if arr[0] == 'ham':
130
+ return "Your Text is a Legitimate One!"
131
+ else:
132
+ return "Beware of such text messages, It\'s a Spam! "
133
+
134
+ interface = gr.Interface(first_nlp_spam_detector,inputs = gr.Textbox(lines=2, placeholder="Enter your Text Here.....!", show_label = False),
135
+ outputs = gr.Label(value = "Predicting the Text Classification..!"),description = "Predicting Text Legitimacy!")
136
+
137
+ first_nlp_spam_detector("Congratulations! You are selected for a free vouchar worth $500")
138
+
139
+ interface.launch()