Spaces:
Runtime error
Runtime error
Application File
Browse files
app.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""First_Text_Classification.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1sdLss09e3OxYVoeK3oBA6qrUSj_iOxp-
|
8 |
+
|
9 |
+
<h3 align = "center">Importing Libraries</h3>
|
10 |
+
"""
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
import pandas as pd
|
14 |
+
import matplotlib.pyplot as plt
|
15 |
+
import seaborn as sns
|
16 |
+
|
17 |
+
"""<h3 align = "center">Importing Dataset</h3>"""
|
18 |
+
|
19 |
+
data = pd.read_csv("/content/spam.csv", encoding = "ISO-8859-1")
|
20 |
+
|
21 |
+
"""<h3 align = "center">Preliminary Data Checks</h3>"""
|
22 |
+
|
23 |
+
data.head()
|
24 |
+
|
25 |
+
data.isnull().sum()
|
26 |
+
|
27 |
+
data.shape
|
28 |
+
|
29 |
+
data['v1'].value_counts()
|
30 |
+
|
31 |
+
data.info()
|
32 |
+
|
33 |
+
"""<h3 align = "center">Putting the Length of Characters of each row in a column.</h3>"""
|
34 |
+
|
35 |
+
data["Unnamed: 2"] = data["v2"].str.len()
|
36 |
+
|
37 |
+
"""<h3 align = "center">Visualising Length of Characters for each category!</h3>"""
|
38 |
+
|
39 |
+
plt.figure(figsize = (12,8))
|
40 |
+
sns.displot(data = data ,x = "Unnamed: 2", hue = "v1",log_scale = True)
|
41 |
+
|
42 |
+
"""<h5>It is evident from the above plot that spam texts are usually longer in length!</h5>
|
43 |
+
|
44 |
+
<h3 align = "center">Defining Variables</h3>
|
45 |
+
"""
|
46 |
+
|
47 |
+
X = data["v2"]
|
48 |
+
y = data["v1"]
|
49 |
+
|
50 |
+
"""<h3 align = "center">Train Test Split</h3>"""
|
51 |
+
|
52 |
+
from sklearn.model_selection import train_test_split
|
53 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
54 |
+
|
55 |
+
"""<h3 align = "center">Vecrorizing Words into Matrix</h3>"""
|
56 |
+
|
57 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
58 |
+
count_vect = CountVectorizer()
|
59 |
+
|
60 |
+
X_train_counts = count_vect.fit_transform(X_train)
|
61 |
+
|
62 |
+
X_train_counts
|
63 |
+
|
64 |
+
X_train.shape
|
65 |
+
|
66 |
+
X_train_counts.shape
|
67 |
+
|
68 |
+
from sklearn.feature_extraction.text import TfidfTransformer
|
69 |
+
tfidf_transformer = TfidfTransformer()
|
70 |
+
|
71 |
+
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
|
72 |
+
|
73 |
+
X_train_tfidf.shape
|
74 |
+
|
75 |
+
"""<h3 align = "center">Using TDIF Vectorizer for optimum vectorization!</h3>"""
|
76 |
+
|
77 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
78 |
+
vectorizer = TfidfVectorizer()
|
79 |
+
|
80 |
+
X_train_tfidf = vectorizer.fit_transform(X_train)
|
81 |
+
|
82 |
+
X_train_tfidf.shape
|
83 |
+
|
84 |
+
"""<h3 align = "center">Creating Model</h3>"""
|
85 |
+
|
86 |
+
from sklearn.svm import LinearSVC
|
87 |
+
clf = LinearSVC()
|
88 |
+
|
89 |
+
clf.fit(X_train_tfidf,y_train)
|
90 |
+
|
91 |
+
"""<h3 align = "center">Creating Pipeline</h3>"""
|
92 |
+
|
93 |
+
from sklearn.pipeline import Pipeline
|
94 |
+
|
95 |
+
text_clf = Pipeline([("tfidf",TfidfVectorizer()),("clf",LinearSVC())])
|
96 |
+
|
97 |
+
text_clf.fit(X_train,y_train)
|
98 |
+
|
99 |
+
predictions = text_clf.predict(X_test)
|
100 |
+
|
101 |
+
X_test
|
102 |
+
|
103 |
+
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
|
104 |
+
|
105 |
+
print(confusion_matrix(y_test,predictions))
|
106 |
+
|
107 |
+
print(classification_report(y_test,predictions))
|
108 |
+
|
109 |
+
"""<h3 align = "center">Accuracy Score</h3>"""
|
110 |
+
|
111 |
+
print(accuracy_score(y_test,predictions))
|
112 |
+
|
113 |
+
"""<h3 align = "center">Predictions </h3>"""
|
114 |
+
|
115 |
+
text_clf.predict(["Hi how are you doing today?"])
|
116 |
+
|
117 |
+
text_clf.predict(["Congratulations! You are selected for a free vouchar worth $500"])
|
118 |
+
|
119 |
+
"""<h3 align = "center">Creating User Interface!</h3>"""
|
120 |
+
|
121 |
+
! pip install gradio
|
122 |
+
|
123 |
+
import gradio as gr
|
124 |
+
|
125 |
+
def first_nlp_spam_detector(text):
|
126 |
+
list = []
|
127 |
+
list.append(text)
|
128 |
+
arr = text_clf.predict(list)
|
129 |
+
if arr[0] == 'ham':
|
130 |
+
return "Your Text is a Legitimate One!"
|
131 |
+
else:
|
132 |
+
return "Beware of such text messages, It\'s a Spam! "
|
133 |
+
|
134 |
+
interface = gr.Interface(first_nlp_spam_detector,inputs = gr.Textbox(lines=2, placeholder="Enter your Text Here.....!", show_label = False),
|
135 |
+
outputs = gr.Label(value = "Predicting the Text Classification..!"),description = "Predicting Text Legitimacy!")
|
136 |
+
|
137 |
+
first_nlp_spam_detector("Congratulations! You are selected for a free vouchar worth $500")
|
138 |
+
|
139 |
+
interface.launch()
|