Spaces:

demomern
/

ABSA-CNN

Runtime error

demomern commited on Sep 15, 2023

Commit

243f2c2

•

1 Parent(s): ec89c81

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -59,8 +59,20 @@ def clean_review_text(text):
     # convert all text into lower case
     text = text.lower()
-    return text

     # convert all text into lower case
     text = text.lower()
+    # return text
+    # create spacy document to remove :
+    # token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
+    # token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
+    # token.is_space => return true if word as a space like tab, space ..
+    # token.lemma_ convert any word into root word ( go | went | gone | going => go )
+    doc = nlp(text)
+    clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]
+    return " ".join(clean_tokens_wds)