# -*- coding: utf-8 -*- """First_Text_Classification.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1sdLss09e3OxYVoeK3oBA6qrUSj_iOxp-

Importing Libraries

""" import numpy as np import pandas as pd """

Importing Dataset

""" data = pd.read_csv("spam.csv", encoding = "ISO-8859-1") """

Preliminary Data Checks

""" data.head() data.isnull().sum() data.shape data['v1'].value_counts() data.info() """

Putting the Length of Characters of each row in a column.

""" data["Unnamed: 2"] = data["v2"].str.len() """

Visualising Length of Characters for each category!

""" """

It is evident from the above plot that spam texts are usually longer in length!

Defining Variables

""" X = data["v2"] y = data["v1"] """

Train Test Split

""" from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) """

Vecrorizing Words into Matrix

""" from sklearn.feature_extraction.text import CountVectorizer count_vect = CountVectorizer() X_train_counts = count_vect.fit_transform(X_train) X_train_counts X_train.shape X_train_counts.shape from sklearn.feature_extraction.text import TfidfTransformer tfidf_transformer = TfidfTransformer() X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts) X_train_tfidf.shape """

Using TDIF Vectorizer for optimum vectorization!

""" from sklearn.feature_extraction.text import TfidfVectorizer vectorizer = TfidfVectorizer() X_train_tfidf = vectorizer.fit_transform(X_train) X_train_tfidf.shape """

Creating Model

""" from sklearn.svm import LinearSVC clf = LinearSVC() clf.fit(X_train_tfidf,y_train) """

Creating Pipeline

""" from sklearn.pipeline import Pipeline text_clf = Pipeline([("tfidf",TfidfVectorizer()),("clf",LinearSVC())]) text_clf.fit(X_train,y_train) predictions = text_clf.predict(X_test) X_test from sklearn.metrics import confusion_matrix,classification_report,accuracy_score print(confusion_matrix(y_test,predictions)) print(classification_report(y_test,predictions)) """

Accuracy Score

""" print(accuracy_score(y_test,predictions)) """

Predictions

""" text_clf.predict(["Hi how are you doing today?"]) text_clf.predict(["Congratulations! You are selected for a free vouchar worth $500"]) """

Creating User Interface!

""" import gradio as gr def first_nlp_spam_detector(text): list = [] list.append(text) arr = text_clf.predict(list) if arr[0] == 'ham': return "Your Text is a Legitimate One!" else: return "Beware of such text messages, It\'s a Spam! " interface = gr.Interface(first_nlp_spam_detector,inputs = gr.Textbox(lines=2, placeholder="Enter your Text Here.....!", show_label = False), outputs = gr.Label(value = "Predicting the Text Classification..!"),description = "Predicting Text Legitimacy!") first_nlp_spam_detector("Congratulations! You are selected for a free vouchar worth $500") interface.launch()