Spaces:

R0obin
/

email-spam-classifier

Build error

Create app.py

721f12c verified 3 months ago

No virus

1.83 kB

	import streamlit as st
	import pickle
	import nltk
	import string
	from nltk.corpus import stopwords
	from nltk.stem.porter import PorterStemmer

	# Initialize PorterStemmer
	ps = PorterStemmer()

	# Load the pre-trained model and TF-IDF vectorizer

	with open('model.pkl', 'rb') as fil:
	model = pickle.load(fil)
	with open('vectorized.pkl', 'rb') as file:
	tfidf = pickle.load(file)


	# Define the text preprocessing function
	def update_text(text):
	text = text.lower()
	text = nltk.word_tokenize(text)
	y = []
	for i in text:
	if i.isalnum():
	y.append(i)
	text = y[:]
	y.clear()
	for i in text:
	if i not in stopwords.words('english') and i not in string.punctuation:
	y.append(i)
	text = y[:]
	y.clear()
	for i in text:
	y.append(ps.stem(i))

	return " ".join(y)


	# Streamlit application title
	st.title("Email/SMS Spam Classifier")

	# Collecting the SMS text with a larger text area
	input_sms = st.text_area("Write the Message", height=150)

	# Add a button to trigger the prediction
	if st.button("Predict"):
	# Preprocessing the text
	transformed_sms = update_text(input_sms)

	# Ensure the transformed SMS is not empty before vectorizing
	if transformed_sms.strip():
	# Vectorizing the SMS
	vectorized_input = tfidf.transform([transformed_sms])

	# Convert the sparse matrix to a dense format
	vectorized_input_dense = vectorized_input.toarray()

	# Predicting
	try:
	result = model.predict(vectorized_input_dense)[0]
	if result == 1:
	st.header("Spam")
	else:
	st.header("Not Spam")
	except Exception as e:
	st.error(f"Error during prediction: {e}")
	else:
	st.warning("Please enter a valid message.")