Spaces:

Prashun08
/

Text_Classification

Runtime error

App Files Files Community

Prashun08 commited on Sep 26, 2022

Commit

cc400c1

1 Parent(s): fc4e2e1

Application File

Browse files

Files changed (1) hide show

app.py +139 -0

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+# -*- coding: utf-8 -*-
+"""First_Text_Classification.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1sdLss09e3OxYVoeK3oBA6qrUSj_iOxp-
+<h3 align = "center">Importing Libraries</h3>
+"""
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+"""<h3 align = "center">Importing Dataset</h3>"""
+data = pd.read_csv("/content/spam.csv", encoding = "ISO-8859-1")
+"""<h3 align = "center">Preliminary Data Checks</h3>"""
+data.head()
+data.isnull().sum()
+data.shape
+data['v1'].value_counts()
+data.info()
+"""<h3 align = "center">Putting the Length of Characters of each row in a column.</h3>"""
+data["Unnamed: 2"] = data["v2"].str.len()
+"""<h3 align = "center">Visualising Length of Characters for each category!</h3>"""
+plt.figure(figsize = (12,8))
+sns.displot(data = data ,x = "Unnamed: 2", hue = "v1",log_scale = True)
+"""<h5>It is evident from the above plot that spam texts are usually longer in length!</h5>
+<h3 align = "center">Defining Variables</h3>
+"""
+X = data["v2"]
+y = data["v1"]
+"""<h3 align = "center">Train Test Split</h3>"""
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
+"""<h3 align = "center">Vecrorizing Words into Matrix</h3>"""
+from sklearn.feature_extraction.text import CountVectorizer
+count_vect = CountVectorizer()
+X_train_counts = count_vect.fit_transform(X_train)
+X_train_counts
+X_train.shape
+X_train_counts.shape
+from sklearn.feature_extraction.text import TfidfTransformer
+tfidf_transformer = TfidfTransformer()
+X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
+X_train_tfidf.shape
+"""<h3 align = "center">Using TDIF Vectorizer for optimum vectorization!</h3>"""
+from sklearn.feature_extraction.text import TfidfVectorizer
+vectorizer = TfidfVectorizer()
+X_train_tfidf = vectorizer.fit_transform(X_train)
+X_train_tfidf.shape
+"""<h3 align = "center">Creating Model</h3>"""
+from sklearn.svm import LinearSVC
+clf = LinearSVC()
+clf.fit(X_train_tfidf,y_train)
+"""<h3 align = "center">Creating Pipeline</h3>"""
+from sklearn.pipeline import Pipeline
+text_clf = Pipeline([("tfidf",TfidfVectorizer()),("clf",LinearSVC())])
+text_clf.fit(X_train,y_train)
+predictions = text_clf.predict(X_test)
+X_test
+from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
+print(confusion_matrix(y_test,predictions))
+print(classification_report(y_test,predictions))
+"""<h3 align = "center">Accuracy Score</h3>"""
+print(accuracy_score(y_test,predictions))
+"""<h3 align = "center">Predictions </h3>"""
+text_clf.predict(["Hi how are you doing today?"])
+text_clf.predict(["Congratulations! You are selected for a free vouchar worth $500"])
+"""<h3 align = "center">Creating User Interface!</h3>"""
+! pip install gradio
+import gradio as gr
+def first_nlp_spam_detector(text):
+  list = []
+  list.append(text)
+  arr =  text_clf.predict(list)
+  if arr[0] == 'ham':
+    return "Your Text is a Legitimate One!"
+  else:
+    return "Beware of such text messages, It\'s a Spam! "
+interface = gr.Interface(first_nlp_spam_detector,inputs = gr.Textbox(lines=2, placeholder="Enter your Text Here.....!", show_label = False),
+                         outputs = gr.Label(value = "Predicting the Text Classification..!"),description = "Predicting Text Legitimacy!")
+first_nlp_spam_detector("Congratulations! You are selected for a free vouchar worth $500")
+interface.launch()