import streamlit as st import pickle import nltk import string from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer # Initialize PorterStemmer ps = PorterStemmer() # Load the pre-trained model and TF-IDF vectorizer with open('model.pkl', 'rb') as fil: model = pickle.load(fil) with open('vectorized.pkl', 'rb') as file: tfidf = pickle.load(file) # Define the text preprocessing function def update_text(text): text = text.lower() text = nltk.word_tokenize(text) y = [] for i in text: if i.isalnum(): y.append(i) text = y[:] y.clear() for i in text: if i not in stopwords.words('english') and i not in string.punctuation: y.append(i) text = y[:] y.clear() for i in text: y.append(ps.stem(i)) return " ".join(y) # Streamlit application title st.title("Email/SMS Spam Classifier") # Collecting the SMS text with a larger text area input_sms = st.text_area("Write the Message", height=150) # Add a button to trigger the prediction if st.button("Predict"): # Preprocessing the text transformed_sms = update_text(input_sms) # Ensure the transformed SMS is not empty before vectorizing if transformed_sms.strip(): # Vectorizing the SMS vectorized_input = tfidf.transform([transformed_sms]) # Convert the sparse matrix to a dense format vectorized_input_dense = vectorized_input.toarray() # Predicting try: result = model.predict(vectorized_input_dense)[0] if result == 1: st.header("Spam") else: st.header("Not Spam") except Exception as e: st.error(f"Error during prediction: {e}") else: st.warning("Please enter a valid message.")