demomern commited on
Commit
548542d
1 Parent(s): a4bc322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -13
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import re
2
  import emoji
3
- import spacy
4
  import joblib
5
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
6
  from sklearn.neural_network import MLPClassifier
@@ -8,8 +7,6 @@ from sklearn.preprocessing import LabelEncoder
8
  from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
9
  import gradio as gr
10
 
11
- nlp = spacy.load("en_core_web_sm")
12
-
13
 
14
  # load the TF-IDF vectorizer to a file
15
  cv = joblib.load('tfidf_vectorizer.pkl')
@@ -55,17 +52,8 @@ def clean_review_text(text):
55
  # convert all text into lower case
56
  text = text.lower()
57
 
58
- # create spacy document to remove :
59
- # token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
60
- # token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
61
- # token.is_space => return true if word as a space like tab, space ..
62
- # token.lemma_ convert any word into root word ( go | went | gone | going => go )
63
- doc = nlp(text)
64
-
65
- clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]
66
 
67
-
68
- return " ".join(clean_tokens_wds)
69
 
70
 
71
 
 
1
  import re
2
  import emoji
 
3
  import joblib
4
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
5
  from sklearn.neural_network import MLPClassifier
 
7
  from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
8
  import gradio as gr
9
 
 
 
10
 
11
  # load the TF-IDF vectorizer to a file
12
  cv = joblib.load('tfidf_vectorizer.pkl')
 
52
  # convert all text into lower case
53
  text = text.lower()
54
 
 
 
 
 
 
 
 
 
55
 
56
+ return text
 
57
 
58
 
59