demomern commited on
Commit
243f2c2
1 Parent(s): ec89c81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -59,8 +59,20 @@ def clean_review_text(text):
59
  # convert all text into lower case
60
  text = text.lower()
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- return text
64
 
65
 
66
 
 
59
  # convert all text into lower case
60
  text = text.lower()
61
 
62
+ # return text
63
+
64
+ # create spacy document to remove :
65
+ # token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
66
+ # token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
67
+ # token.is_space => return true if word as a space like tab, space ..
68
+ # token.lemma_ convert any word into root word ( go | went | gone | going => go )
69
+ doc = nlp(text)
70
+
71
+ clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]
72
+
73
+
74
+ return " ".join(clean_tokens_wds)
75
 
 
76
 
77
 
78