File size: 2,532 Bytes
6a2ebf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import streamlit as st
import pickle
import string
from nltk.corpus import stopwords
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()


# Function to preprocess the input text
def transform_text(text):
    text = text.lower()  # Convert to lowercase
    text = nltk.word_tokenize(text)  # Tokenize the text
    y = []

    # Removing alphanumeric tokens
    for i in text:
        if i.isalnum():
            y.append(i)

    text = y[:]
    y.clear()

    # Removing stopwords and punctuation
    for i in text:
        if i not in stopwords.words('english') and i not in string.punctuation:
            y.append(i)

    text = y[:]
    y.clear()

    # Performing stemming
    for i in text:
        y.append(ps.stem(i))

    return " ".join(y)  # Join the list into a single string with spaces


# Load the saved models (TF-IDF Vectorizer and the classification model)
tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
model = pickle.load(open('model.pkl', 'rb'))

# Setting up the main title and description
st.title("πŸ“§ Email/SMS Spam Classifier")
st.write("""

### Enter a message to determine whether it's Spam or Not Spam. 

This classifier uses **natural language processing (NLP)** techniques to preprocess and predict based on your input.

""")

# Input text field for user to enter the message
st.write("#### Message Input:")
input_sms = st.text_area("Type or paste your message here", height=150)

# Add a button to trigger the classification
if st.button("πŸ” Classify Message"):
    if input_sms.strip():  # Ensure there's text in the input
        ## 1. Preprocess the input text
        with st.spinner('Processing...'):
            transformed_sms = transform_text(input_sms)

        ## 2. Vectorize the transformed text
        vector_input = tfidf.transform([transformed_sms])

        ## 3. Predict the label (Spam or Not Spam)
        result = model.predict(vector_input)[0]

        ## 4. Display the result with appropriate color and message
        if result == 1:
            st.success("πŸ”΄ This message is classified as **Spam**.")
        else:
            st.success("🟒 This message is classified as **Not Spam**.")
    else:
        st.warning("Please enter a valid message to classify.")

# Adding a footer with a reference to your classifier and author
st.markdown("""

---

Developed using **Streamlit** and **NLP techniques**.<br>

**Author**: **Aditya Yadav**

""", unsafe_allow_html=True)