Swaymaw
Updated download of various nltk sub libraries
8073d00 unverified
import streamlit as st
import string
import pickle
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('corpus')
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
def transform_text(input_sms):
text = input_sms.lower()
text = nltk.word_tokenize(text)
y = []
for i in text:
if i.isalnum():
y.append(i)
text = y[:] # proper method to clone a list
y.clear()
for i in text:
if i not in stopwords.words('english') and i not in string.punctuation:
y.append(i)
text = y[:]
y.clear()
for i in text:
y.append(ps.stem(i))
return " ".join(y)
tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
model = pickle.load(open('model.pkl', 'rb'))
st.title("Email/SMS Spam Classifier")
input_sms = st.text_input("Enter the message: ")
if st.button('Predict'):
# 1. Pre-Process
transformed_sms = transform_text(input_sms)
# 2. Vectorize
vector_input = tfidf.transform([transformed_sms])
# 3. Predict
result = model.predict(vector_input)[0]
# 4. Display
if result == 1:
st.header("Spam")
if result == 0:
st.header("Not Spam/Ham")