demomern commited on
Commit
3ca505b
1 Parent(s): 97f9f67

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import emoji
3
+ import spacy
4
+ import joblib
5
+ from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
6
+ from sklearn.neural_network import MLPClassifier
7
+ from sklearn.preprocessing import LabelEncoder
8
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
9
+ import gradio as gr
10
+
11
+ nlp = spacy.load("en_core_web_sm")
12
+
13
+
14
+ # load the TF-IDF vectorizer to a file
15
+ cv = joblib.load('tfidf_vectorizer.pkl')
16
+
17
+ # load the MLP classifier to a file
18
+ mlp_label = joblib.load('mlpLabel.pkl')
19
+
20
+ # load the MLP Aspect classifier to a file
21
+ mlp_aspect_label = joblib.load('mlpAspectLabel.pkl')
22
+
23
+
24
+ def remove_html(text) :
25
+ patt_html = r"<.*?>"
26
+ text = re.sub(patt_html, "", text)
27
+ return text
28
+
29
+ def remove_url(text):
30
+ patt_url = r"https?://\S+|www\.\S+"
31
+ text = re.sub(patt_url, "", text)
32
+ return text
33
+
34
+ def emoji_to_text(text) :
35
+ res_str = ""
36
+ for ch in text :
37
+ if emoji.is_emoji(ch) :
38
+ res_str += f" {emoji.demojize(ch)} "
39
+ # print(ch, emoji.demojize(ch))
40
+ else :
41
+ res_str += ch
42
+ return res_str
43
+
44
+ def clean_review_text(text):
45
+
46
+ # remove HTML Tags
47
+ text = remove_html(text)
48
+
49
+ # remove url to call function remover_url
50
+ text = remove_url(text)
51
+
52
+ # convert text emoji into text
53
+ text = emoji_to_text(text)
54
+
55
+ # convert all text into lower case
56
+ text = text.lower()
57
+
58
+ # create spacy document to remove :
59
+ # token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
60
+ # token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
61
+ # token.is_space => return true if word as a space like tab, space ..
62
+ # token.lemma_ convert any word into root word ( go | went | gone | going => go )
63
+ doc = nlp(text)
64
+
65
+ clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]
66
+
67
+
68
+ return " ".join(clean_tokens_wds)
69
+
70
+
71
+
72
+ label = ['negative', 'neutral', 'positive']
73
+ aspect_label = ['Card Decks and Challenges', 'Card Play and Board Games',
74
+ 'Fun and Coin Collecting', 'Game Scores and Features',
75
+ 'Game Updates and User Desires', 'Gameplay and App Experience',
76
+ 'Gameplay and Trading', 'Gameplay and User Experience',
77
+ 'Property and Land Management', 'Subway Adventures']
78
+
79
+
80
+ def return_label_aspect(Review):
81
+ review_vec = cv.transform([clean_review_text(Review)])
82
+ pred_label = mlp_label.predict_proba(review_vec)[0]
83
+ pred_aspect = mlp_aspect_label.predict_proba(review_vec)[0]
84
+
85
+ pred_label = { label[i]: round(pred_label[i], 2) for i in range(3) }
86
+ pred_aspect = { aspect_label[i]: round(pred_aspect[i], 2) for i in range(10) }
87
+ return pred_label, pred_aspect
88
+
89
+
90
+
91
+ iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
92
+ iface.launch(inline = False)