demomern commited on
Commit
4cd06db
1 Parent(s): a12e96b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import emoji
3
+ import joblib
4
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ import torchtext.vocab as vocab
9
+
10
+ import gradio as gr
11
+
12
+ # Let's first load glove model
13
+ glove = vocab.GloVe(name='6B', dim=100)
14
+
15
+ def remove_html(text) :
16
+ patt_html = r"<.*?>"
17
+ text = re.sub(patt_html, "", text)
18
+ return text
19
+
20
+ def remove_url(text):
21
+ patt_url = r"https?://\S+|www\.\S+"
22
+ text = re.sub(patt_url, "", text)
23
+ return text
24
+
25
+ def emoji_to_text(text) :
26
+ res_str = ""
27
+ for ch in text :
28
+ if emoji.is_emoji(ch) :
29
+ res_str += f" {emoji.demojize(ch)} "
30
+ # print(ch, emoji.demojize(ch))
31
+ else :
32
+ res_str += ch
33
+ return res_str
34
+
35
+ def clean_review_text(text):
36
+
37
+ # remove HTML Tags
38
+ text = remove_html(text)
39
+
40
+ # remove url to call function remover_url
41
+ text = remove_url(text)
42
+
43
+ # convert text emoji into text
44
+ text = emoji_to_text(text)
45
+
46
+ # convert all text into lower case
47
+ text = text.lower()
48
+
49
+
50
+ return text
51
+
52
+
53
+
54
+
55
+ # Main CNN model defien
56
+ class CNNHotelReviewsModel(nn.Module):
57
+ def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
58
+ super().__init__()
59
+
60
+ self.embedding = nn.Embedding.from_pretrained(glove.vectors, freeze=True)
61
+ self.convs = nn.ModuleList([
62
+ nn.Conv2d(in_channels=1,
63
+ out_channels=n_filters,
64
+ kernel_size=(fs, embedding_dim))
65
+ for fs in filter_sizes
66
+ ])
67
+
68
+ self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
69
+ self.dropout = nn.Dropout(dropout)
70
+ self.sigmoid = nn.Sigmoid()
71
+
72
+ def forward(self, text):
73
+ embedded = self.embedding(text)
74
+ embedded = embedded.unsqueeze(1)
75
+ conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
76
+ pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
77
+ cat = self.dropout(torch.cat(pooled, dim=1))
78
+ return self.sigmoid(self.fc(cat)).squeeze(1)
79
+
80
+
81
+
82
+
83
+ # Move model to GPU if available
84
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
85
+
86
+ EMBEDDING_DIM = 100
87
+ OUTPUT_DIM = 1 # Positive and Negative classes
88
+
89
+ N_FILTERS = 250
90
+ FILTER_SIZES = [2, 3, 4]
91
+ DROPOUT = 0.1
92
+ # Best Hyperparameters: {'n_filters': 250, 'filter_sizes': [2, 3, 4], 'dropout': 0.1}
93
+
94
+ CNN_Model = CNNHotelReviewsModel(EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT)
95
+
96
+
97
+
98
+ # Load the saved state_dict into the model
99
+ CNN_Model.load_state_dict(torch.load("hotel_review_model.pth", map_location=device))
100
+ CNN_Model = CNN_Model.to(device) # Move the model to GPU
101
+ CNN_Model.eval() # Set the model to evaluation mode
102
+
103
+ # For Aspect selection lda model impored
104
+ lda_model = joblib.load('lda_model.pkl')
105
+ dictionary = joblib.load('dictionary.pkl')
106
+
107
+ # CNN prediction model
108
+ def predict_review(model, review, max_len=128):
109
+ # Tokenize and convert to indices
110
+ tokens = review.split()
111
+ indices = [glove.stoi.get(token, 0) for token in tokens]
112
+
113
+ # Pad or truncate to max_len
114
+ if len(indices) < max_len:
115
+ indices += [0] * (max_len - len(indices))
116
+ else:
117
+ indices = indices[:max_len]
118
+
119
+ # Convert to tensor and add batch dimension
120
+ tensor = torch.tensor(indices).unsqueeze(0)
121
+
122
+ # Forward pass
123
+ model.eval() # Set model to evaluation mode
124
+ with torch.no_grad():
125
+ output = model(tensor.to(device))
126
+
127
+ # Convert output probability to class label (0 or 1)
128
+ prob = output.item()
129
+ # prediction = 1 if prob > 0.5 else 0
130
+
131
+ return {'positive': prob, 'negative': 1-prob}
132
+
133
+
134
+
135
+ # Now On the basis of above assumsiom let's create aspect_label dictionary.
136
+ aspect_label = {
137
+ 0: "Reception & Service Efficiency",
138
+ 1: "Transportation & Proximity",
139
+ 2: "Room Comfort & Staff Courtesy",
140
+ 3: "Location & Staff Quality",
141
+ 4: "Room Discrepancies",
142
+ 5: "Hotel Quality vs Price",
143
+ 6: "Booking & Payment Issues",
144
+ 7: "Room Ambiance & Noise",
145
+ 8: "Amenities & Value",
146
+ 9: "Room Size & Condition",
147
+ }
148
+
149
+ def dominant_topic(text):
150
+ text = text.split()
151
+ bow = dictionary.doc2bow(text)
152
+ topics = lda_model.get_document_topics(bow)
153
+ main_topic = max(topics, key=lambda x: x[1])
154
+ return { aspect_label[itm[0]]: float(itm[1]) for itm in topics } #main_topic[0]
155
+
156
+
157
+ def gr_fun(Review):
158
+ review = clean_review_text(Review)
159
+ pred_label = predict_review(CNN_Model, review)
160
+ pred_aspect = dominant_topic(review)
161
+
162
+ return pred_label, pred_aspect
163
+
164
+
165
+
166
+
167
+
168
+
169
+ iface = gr.Interface(
170
+ fn=gr_fun,
171
+ inputs="text",
172
+ outputs=[gr.Label(), gr.Label(num_top_classes=5)],
173
+ examples=[
174
+ "room condition was very bad",
175
+ "Staff where excellent and the room was lovely really great hotel will definitely be back",
176
+ "Couldn t find ice machine The junior suite was excellent with a fantastic bar",
177
+ "Furniture in the room was a bit worn and tired for the money you pay would just expect a bit more it was ok",
178
+ "Room was West facing and was far too warm particularly as the a c didn t seem to be working to well The shower room was excellent and large enough for my lady and I to be rude in Loved it"
179
+ ]
180
+ )
181
+
182
+ # iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
183
+ iface.launch(inline = False)