Spaces:
Runtime error
Runtime error
from sklearn.feature_extraction.text import TfidfVectorizer | |
from TurkishStemmer import TurkishStemmer | |
import nltk | |
import string | |
# import for loading python objects (scikit-learn models) | |
import pickle | |
import streamlit as st | |
import sklearn | |
def custom_tokenizer_with_Turkish_stemmer(text): | |
# my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None` | |
tokens = [word for word in nltk.word_tokenize(text.translate(trans_table))] | |
stems = [stemmerTR.stem(item.lower()) for item in tokens] | |
return stems | |
def predictSMSdata(test_text): | |
categories = ["legitimate", "spam"] | |
categories.sort() | |
# load model | |
filename1 = "LinearSVC_SMS_spam_TR.pickle" | |
file_handle1 = open(filename1, "rb") | |
classifier = pickle.load(file_handle1) | |
file_handle1.close() | |
# load tfidf_vectorizer for transforming test text data | |
filename2 = "tfidf_vectorizer_TR.pickle" | |
file_handle2 = open(filename2, "rb") | |
tfidf_vectorizer = pickle.load(file_handle2) | |
file_handle2.close() | |
test_list=[test_text] | |
tfidf_vectorizer_vectors_test = tfidf_vectorizer.transform(test_list) | |
predicted = classifier.predict(tfidf_vectorizer_vectors_test) | |
print(categories[predicted[0]]) | |
trans_table = {ord(c): None for c in string.punctuation + string.digits} | |
stemmerTR = TurkishStemmer() | |
text = st.text_area("enter some text!") | |
if text: | |
out = predictSMSdata(text) | |
st.json(out) | |