akuysal commited on
Commit
2f60bb1
1 Parent(s): 29ec108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -10,6 +10,12 @@ import sklearn
10
 
11
  nltk.download('punkt')
12
 
 
 
 
 
 
 
13
  def predictSMSdata(test_text):
14
  categories = ["legitimate", "spam"]
15
  categories.sort()
 
10
 
11
  nltk.download('punkt')
12
 
13
+ def custom_tokenizer_with_English_stemmer(text):
14
+ # my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None`
15
+ tokens = [word for word in nltk.word_tokenize(text)]
16
+ stems = [stemmerEN.stem(item.lower()) for item in tokens]
17
+ return stems
18
+
19
  def predictSMSdata(test_text):
20
  categories = ["legitimate", "spam"]
21
  categories.sort()