Spaces:

demomern
/

ABSA-CNN

Runtime error

App Files Files Community

demomern commited on Sep 15, 2023

Commit

4cd06db

•

1 Parent(s): a12e96b

Create app.py

Browse files

Files changed (1) hide show

app.py +183 -0

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import re
+import emoji
+import joblib
+from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
+import torch
+import torch.nn as nn
+import torchtext.vocab as vocab
+import gradio as gr
+# Let's first load glove model
+glove = vocab.GloVe(name='6B', dim=100)
+def remove_html(text) :
+    patt_html = r"<.*?>"
+    text = re.sub(patt_html, "", text)
+    return text
+def remove_url(text):
+    patt_url = r"https?://\S+|www\.\S+"
+    text = re.sub(patt_url, "", text)
+    return text
+def emoji_to_text(text) :
+    res_str = ""
+    for ch in text :
+        if emoji.is_emoji(ch) :
+            res_str += f" {emoji.demojize(ch)} "
+            # print(ch, emoji.demojize(ch))
+        else :
+            res_str += ch
+    return res_str
+def clean_review_text(text):
+    # remove HTML Tags
+    text = remove_html(text)
+    # remove url to call function remover_url
+    text = remove_url(text)
+    # convert text emoji into text
+    text = emoji_to_text(text)
+    # convert all text into lower case
+    text = text.lower()
+    return text
+# Main CNN model defien
+class CNNHotelReviewsModel(nn.Module):
+    def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
+        super().__init__()
+        self.embedding = nn.Embedding.from_pretrained(glove.vectors, freeze=True)
+        self.convs = nn.ModuleList([
+            nn.Conv2d(in_channels=1,
+                      out_channels=n_filters,
+                      kernel_size=(fs, embedding_dim))
+            for fs in filter_sizes
+        ])
+        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
+        self.dropout = nn.Dropout(dropout)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, text):
+        embedded = self.embedding(text)
+        embedded = embedded.unsqueeze(1)
+        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
+        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
+        cat = self.dropout(torch.cat(pooled, dim=1))
+        return self.sigmoid(self.fc(cat)).squeeze(1)
+# Move model to GPU if available
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+EMBEDDING_DIM = 100
+OUTPUT_DIM = 1  # Positive and Negative classes
+N_FILTERS = 250
+FILTER_SIZES = [2, 3, 4]
+DROPOUT = 0.1
+# Best Hyperparameters: {'n_filters': 250, 'filter_sizes': [2, 3, 4], 'dropout': 0.1}
+CNN_Model = CNNHotelReviewsModel(EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT)
+# Load the saved state_dict into the model
+CNN_Model.load_state_dict(torch.load("hotel_review_model.pth", map_location=device))
+CNN_Model = CNN_Model.to(device)  # Move the model to GPU
+CNN_Model.eval()  # Set the model to evaluation mode
+# For Aspect selection lda model impored
+lda_model = joblib.load('lda_model.pkl')
+dictionary = joblib.load('dictionary.pkl')
+# CNN prediction model
+def predict_review(model, review, max_len=128):
+    # Tokenize and convert to indices
+    tokens = review.split()
+    indices = [glove.stoi.get(token, 0) for token in tokens]
+    # Pad or truncate to max_len
+    if len(indices) < max_len:
+        indices += [0] * (max_len - len(indices))
+    else:
+        indices = indices[:max_len]
+    # Convert to tensor and add batch dimension
+    tensor = torch.tensor(indices).unsqueeze(0)
+    # Forward pass
+    model.eval()  # Set model to evaluation mode
+    with torch.no_grad():
+        output = model(tensor.to(device))
+    # Convert output probability to class label (0 or 1)
+    prob = output.item()
+    # prediction = 1 if prob > 0.5 else 0
+    return {'positive': prob, 'negative': 1-prob}
+# Now On the basis of above assumsiom let's create aspect_label dictionary.
+aspect_label = {
+0: "Reception & Service Efficiency",
+1: "Transportation & Proximity",
+2: "Room Comfort & Staff Courtesy",
+3: "Location & Staff Quality",
+4: "Room Discrepancies",
+5: "Hotel Quality vs Price",
+6: "Booking & Payment Issues",
+7: "Room Ambiance & Noise",
+8: "Amenities & Value",
+9: "Room Size & Condition",
+}
+def dominant_topic(text):
+    text = text.split()
+    bow = dictionary.doc2bow(text)
+    topics = lda_model.get_document_topics(bow)
+    main_topic = max(topics, key=lambda x: x[1])
+    return { aspect_label[itm[0]]: float(itm[1]) for itm in topics } #main_topic[0]
+def gr_fun(Review):
+  review = clean_review_text(Review)
+  pred_label = predict_review(CNN_Model, review)
+  pred_aspect = dominant_topic(review)
+  return pred_label, pred_aspect
+iface = gr.Interface(
+    fn=gr_fun,
+    inputs="text",
+    outputs=[gr.Label(), gr.Label(num_top_classes=5)],
+    examples=[
+        "room condition was very bad",
+        "Staff where excellent and the room was lovely really great hotel will definitely be back",
+        "Couldn t find ice machine The junior suite was excellent with a fantastic bar",
+        "Furniture in the room was a bit worn and tired for the money you pay would just expect a bit more  it was ok",
+        "Room was West facing and was far too warm particularly as the a c didn t seem to be working to well  The shower room was excellent and large enough for my lady and I to be rude in Loved it"
+        ]
+    )
+# iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
+iface.launch(inline = False)