Spaces:

demomern
/

Apps-Reviews-for-Requirements-Elicitation

Sleeping

demomern commited on Sep 9, 2023

Commit

548542d

•

1 Parent(s): a4bc322

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import re
 import emoji
-import spacy
 import joblib
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.neural_network import MLPClassifier
@@ -8,8 +7,6 @@ from sklearn.preprocessing import LabelEncoder
 from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
 import gradio as gr
-nlp = spacy.load("en_core_web_sm")
 # load the TF-IDF vectorizer to a file
 cv = joblib.load('tfidf_vectorizer.pkl')
@@ -55,17 +52,8 @@ def clean_review_text(text):
     # convert all text into lower case
     text = text.lower()
-    # create spacy document to remove :
-    # token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
-    # token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
-    # token.is_space => return true if word as a space like tab, space ..
-    # token.lemma_ convert any word into root word ( go | went | gone | going => go )
-    doc = nlp(text)
-    clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]
-    return " ".join(clean_tokens_wds)

 import re
 import emoji
 import joblib
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.neural_network import MLPClassifier
 from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
 import gradio as gr
 # load the TF-IDF vectorizer to a file
 cv = joblib.load('tfidf_vectorizer.pkl')
     # convert all text into lower case
     text = text.lower()
+    return text