Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import emoji
|
3 |
+
import joblib
|
4 |
+
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
|
5 |
+
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torchtext.vocab as vocab
|
9 |
+
|
10 |
+
import gradio as gr
|
11 |
+
|
12 |
+
# Let's first load glove model
|
13 |
+
glove = vocab.GloVe(name='6B', dim=100)
|
14 |
+
|
15 |
+
def remove_html(text) :
|
16 |
+
patt_html = r"<.*?>"
|
17 |
+
text = re.sub(patt_html, "", text)
|
18 |
+
return text
|
19 |
+
|
20 |
+
def remove_url(text):
|
21 |
+
patt_url = r"https?://\S+|www\.\S+"
|
22 |
+
text = re.sub(patt_url, "", text)
|
23 |
+
return text
|
24 |
+
|
25 |
+
def emoji_to_text(text) :
|
26 |
+
res_str = ""
|
27 |
+
for ch in text :
|
28 |
+
if emoji.is_emoji(ch) :
|
29 |
+
res_str += f" {emoji.demojize(ch)} "
|
30 |
+
# print(ch, emoji.demojize(ch))
|
31 |
+
else :
|
32 |
+
res_str += ch
|
33 |
+
return res_str
|
34 |
+
|
35 |
+
def clean_review_text(text):
|
36 |
+
|
37 |
+
# remove HTML Tags
|
38 |
+
text = remove_html(text)
|
39 |
+
|
40 |
+
# remove url to call function remover_url
|
41 |
+
text = remove_url(text)
|
42 |
+
|
43 |
+
# convert text emoji into text
|
44 |
+
text = emoji_to_text(text)
|
45 |
+
|
46 |
+
# convert all text into lower case
|
47 |
+
text = text.lower()
|
48 |
+
|
49 |
+
|
50 |
+
return text
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
# Main CNN model defien
|
56 |
+
class CNNHotelReviewsModel(nn.Module):
|
57 |
+
def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
|
58 |
+
super().__init__()
|
59 |
+
|
60 |
+
self.embedding = nn.Embedding.from_pretrained(glove.vectors, freeze=True)
|
61 |
+
self.convs = nn.ModuleList([
|
62 |
+
nn.Conv2d(in_channels=1,
|
63 |
+
out_channels=n_filters,
|
64 |
+
kernel_size=(fs, embedding_dim))
|
65 |
+
for fs in filter_sizes
|
66 |
+
])
|
67 |
+
|
68 |
+
self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
|
69 |
+
self.dropout = nn.Dropout(dropout)
|
70 |
+
self.sigmoid = nn.Sigmoid()
|
71 |
+
|
72 |
+
def forward(self, text):
|
73 |
+
embedded = self.embedding(text)
|
74 |
+
embedded = embedded.unsqueeze(1)
|
75 |
+
conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
|
76 |
+
pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
|
77 |
+
cat = self.dropout(torch.cat(pooled, dim=1))
|
78 |
+
return self.sigmoid(self.fc(cat)).squeeze(1)
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
# Move model to GPU if available
|
84 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
85 |
+
|
86 |
+
EMBEDDING_DIM = 100
|
87 |
+
OUTPUT_DIM = 1 # Positive and Negative classes
|
88 |
+
|
89 |
+
N_FILTERS = 250
|
90 |
+
FILTER_SIZES = [2, 3, 4]
|
91 |
+
DROPOUT = 0.1
|
92 |
+
# Best Hyperparameters: {'n_filters': 250, 'filter_sizes': [2, 3, 4], 'dropout': 0.1}
|
93 |
+
|
94 |
+
CNN_Model = CNNHotelReviewsModel(EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT)
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
# Load the saved state_dict into the model
|
99 |
+
CNN_Model.load_state_dict(torch.load("hotel_review_model.pth", map_location=device))
|
100 |
+
CNN_Model = CNN_Model.to(device) # Move the model to GPU
|
101 |
+
CNN_Model.eval() # Set the model to evaluation mode
|
102 |
+
|
103 |
+
# For Aspect selection lda model impored
|
104 |
+
lda_model = joblib.load('lda_model.pkl')
|
105 |
+
dictionary = joblib.load('dictionary.pkl')
|
106 |
+
|
107 |
+
# CNN prediction model
|
108 |
+
def predict_review(model, review, max_len=128):
|
109 |
+
# Tokenize and convert to indices
|
110 |
+
tokens = review.split()
|
111 |
+
indices = [glove.stoi.get(token, 0) for token in tokens]
|
112 |
+
|
113 |
+
# Pad or truncate to max_len
|
114 |
+
if len(indices) < max_len:
|
115 |
+
indices += [0] * (max_len - len(indices))
|
116 |
+
else:
|
117 |
+
indices = indices[:max_len]
|
118 |
+
|
119 |
+
# Convert to tensor and add batch dimension
|
120 |
+
tensor = torch.tensor(indices).unsqueeze(0)
|
121 |
+
|
122 |
+
# Forward pass
|
123 |
+
model.eval() # Set model to evaluation mode
|
124 |
+
with torch.no_grad():
|
125 |
+
output = model(tensor.to(device))
|
126 |
+
|
127 |
+
# Convert output probability to class label (0 or 1)
|
128 |
+
prob = output.item()
|
129 |
+
# prediction = 1 if prob > 0.5 else 0
|
130 |
+
|
131 |
+
return {'positive': prob, 'negative': 1-prob}
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
# Now On the basis of above assumsiom let's create aspect_label dictionary.
|
136 |
+
aspect_label = {
|
137 |
+
0: "Reception & Service Efficiency",
|
138 |
+
1: "Transportation & Proximity",
|
139 |
+
2: "Room Comfort & Staff Courtesy",
|
140 |
+
3: "Location & Staff Quality",
|
141 |
+
4: "Room Discrepancies",
|
142 |
+
5: "Hotel Quality vs Price",
|
143 |
+
6: "Booking & Payment Issues",
|
144 |
+
7: "Room Ambiance & Noise",
|
145 |
+
8: "Amenities & Value",
|
146 |
+
9: "Room Size & Condition",
|
147 |
+
}
|
148 |
+
|
149 |
+
def dominant_topic(text):
|
150 |
+
text = text.split()
|
151 |
+
bow = dictionary.doc2bow(text)
|
152 |
+
topics = lda_model.get_document_topics(bow)
|
153 |
+
main_topic = max(topics, key=lambda x: x[1])
|
154 |
+
return { aspect_label[itm[0]]: float(itm[1]) for itm in topics } #main_topic[0]
|
155 |
+
|
156 |
+
|
157 |
+
def gr_fun(Review):
|
158 |
+
review = clean_review_text(Review)
|
159 |
+
pred_label = predict_review(CNN_Model, review)
|
160 |
+
pred_aspect = dominant_topic(review)
|
161 |
+
|
162 |
+
return pred_label, pred_aspect
|
163 |
+
|
164 |
+
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
|
169 |
+
iface = gr.Interface(
|
170 |
+
fn=gr_fun,
|
171 |
+
inputs="text",
|
172 |
+
outputs=[gr.Label(), gr.Label(num_top_classes=5)],
|
173 |
+
examples=[
|
174 |
+
"room condition was very bad",
|
175 |
+
"Staff where excellent and the room was lovely really great hotel will definitely be back",
|
176 |
+
"Couldn t find ice machine The junior suite was excellent with a fantastic bar",
|
177 |
+
"Furniture in the room was a bit worn and tired for the money you pay would just expect a bit more it was ok",
|
178 |
+
"Room was West facing and was far too warm particularly as the a c didn t seem to be working to well The shower room was excellent and large enough for my lady and I to be rude in Loved it"
|
179 |
+
]
|
180 |
+
)
|
181 |
+
|
182 |
+
# iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
|
183 |
+
iface.launch(inline = False)
|