Spaces:
Running
Running
File size: 1,518 Bytes
fe5faf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import streamlit as st
st.set_page_config(
page_title="NLP WEB APP"
)
st.title("SMS SPAM CLASSIFIER")
st.sidebar.success("Select a page above")
import pickle
import string
from nltk.corpus import stopwords
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
def transform_text(text):
# Lowering
text = text.lower()
#Tokenizing
text = nltk.word_tokenize(text)
#Removing special characters
new_text=[]
for word in text:
if word.isalnum():
new_text.append(word)
text = new_text[:]
new_text.clear()
#Removing stopwords and punctuation
for word in text:
if word not in stopwords.words('english') and word not in string.punctuation:
new_text.append(word)
text = new_text[:]
new_text.clear()
# Stemming
for word in text :
new_text.append(ps.stem(word))
return " ".join(new_text)
tfidf = pickle.load(open('sms-spam-models/vectorizer.pkl','rb'))
model = pickle.load(open('sms-spam-models/model.pkl','rb'))
sms = st.text_input("ENTER THE MESSAGE")
if st.button("PREDICT"):
# PREPROCESS
transformed_sms = transform_text(sms)
# VECTORIZE
vector_input = tfidf.transform([transformed_sms])
# PREDICTION
result = model.predict(vector_input)[0]
# DISPLAY
if result==1:
st.header("SPAM")
else:
st.header("NOT SPAM") |