Spaces:
Runtime error
Runtime error
from sklearn.feature_extraction.text import TfidfVectorizer | |
# import for loading python objects (scikit-learn models) | |
import pickle | |
import nltk | |
from nltk.data import load | |
from nltk.stem import PorterStemmer | |
import streamlit as st | |
import sklearn | |
nltk.download('punkt') | |
def custom_tokenizer_with_English_stemmer(text): | |
# my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None` | |
tokens = [word for word in nltk.word_tokenize(text)] | |
stems = [stemmerEN.stem(item.lower()) for item in tokens] | |
return stems | |
def predictSMSdata(test_text): | |
categories = ["legitimate", "spam"] | |
categories.sort() | |
# load model | |
filename1 = "LinearSVC_SMS_spam_EN.pickle" | |
file_handle1 = open(filename1, "rb") | |
classifier = pickle.load(file_handle1) | |
file_handle1.close() | |
# load tfidf_vectorizer for transforming test text data | |
filename2 = "tfidf_vectorizer_EN.pickle" | |
file_handle2 = open(filename2, "rb") | |
tfidf_vectorizer = pickle.load(file_handle2) | |
file_handle2.close() | |
test_list=[test_text] | |
tfidf_vectorizer_vectors_test = tfidf_vectorizer.transform(test_list) | |
predicted = classifier.predict(tfidf_vectorizer_vectors_test) | |
print(categories[predicted[0]]) | |
return categories[predicted[0]] | |
# Porter Stemmer for English | |
stemmerEN = PorterStemmer() | |
# adding the text that will show in the text box | |
default_value = "ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE MINS. INDIA CUST SERVs SED YES. L8ER GOT MEGA BILL. 3 DONT GIV A SHIT. BAILIFF DUE IN DAYS. I O £250 3 WANT £800" | |
text = st.text_area("enter some text!", default_value) | |
if text: | |
out = predictSMSdata(text) | |
st.write("The category of SMS = " + out.upper()) | |