R0obin's picture
Create app.py
721f12c verified
raw
history blame contribute delete
No virus
1.83 kB
import streamlit as st
import pickle
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
# Initialize PorterStemmer
ps = PorterStemmer()
# Load the pre-trained model and TF-IDF vectorizer
with open('model.pkl', 'rb') as fil:
model = pickle.load(fil)
with open('vectorized.pkl', 'rb') as file:
tfidf = pickle.load(file)
# Define the text preprocessing function
def update_text(text):
text = text.lower()
text = nltk.word_tokenize(text)
y = []
for i in text:
if i.isalnum():
y.append(i)
text = y[:]
y.clear()
for i in text:
if i not in stopwords.words('english') and i not in string.punctuation:
y.append(i)
text = y[:]
y.clear()
for i in text:
y.append(ps.stem(i))
return " ".join(y)
# Streamlit application title
st.title("Email/SMS Spam Classifier")
# Collecting the SMS text with a larger text area
input_sms = st.text_area("Write the Message", height=150)
# Add a button to trigger the prediction
if st.button("Predict"):
# Preprocessing the text
transformed_sms = update_text(input_sms)
# Ensure the transformed SMS is not empty before vectorizing
if transformed_sms.strip():
# Vectorizing the SMS
vectorized_input = tfidf.transform([transformed_sms])
# Convert the sparse matrix to a dense format
vectorized_input_dense = vectorized_input.toarray()
# Predicting
try:
result = model.predict(vectorized_input_dense)[0]
if result == 1:
st.header("Spam")
else:
st.header("Not Spam")
except Exception as e:
st.error(f"Error during prediction: {e}")
else:
st.warning("Please enter a valid message.")