Spaces:

demomern
/

ABSA-CNN

Runtime error

App Files Files Community

ABSA-CNN / app.py

demomern

Update app.py

243f2c2 10 months ago

raw

history blame contribute delete

No virus

6.15 kB

	import re
	import emoji
	import joblib
	# from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.utils.data import Dataset, DataLoader

	import torchtext.vocab as vocab


	import gradio as gr

	# Using spacy.load().
	import spacy
	nlp = spacy.load("en_core_web_sm")

	# Importing as module.
	import en_core_web_sm
	nlp = en_core_web_sm.load()


	# Let's first load glove model
	glove = vocab.GloVe(name='6B', dim=100)

	def remove_html(text) :
	patt_html = r"<.*?>"
	text = re.sub(patt_html, "", text)
	return text

	def remove_url(text):
	patt_url = r"https?://\S+\|www\.\S+"
	text = re.sub(patt_url, "", text)
	return text

	def emoji_to_text(text) :
	res_str = ""
	for ch in text :
	if emoji.is_emoji(ch) :
	res_str += f" {emoji.demojize(ch)} "
	# print(ch, emoji.demojize(ch))
	else :
	res_str += ch
	return res_str

	def clean_review_text(text):

	# remove HTML Tags
	text = remove_html(text)

	# remove url to call function remover_url
	text = remove_url(text)

	# convert text emoji into text
	text = emoji_to_text(text)

	# convert all text into lower case
	text = text.lower()

	# return text

	# create spacy document to remove :
	# token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
	# token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
	# token.is_space => return true if word as a space like tab, space ..
	# token.lemma_ convert any word into root word ( go \| went \| gone \| going => go )
	doc = nlp(text)

	clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]


	return " ".join(clean_tokens_wds)





	# Main CNN model defien
	class CNNHotelReviewsModel(nn.Module):
	def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
	super().__init__()

	self.embedding = nn.Embedding.from_pretrained(glove.vectors, freeze=True)
	self.convs = nn.ModuleList([
	nn.Conv2d(in_channels=1,
	out_channels=n_filters,
	kernel_size=(fs, embedding_dim))
	for fs in filter_sizes
	])

	self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
	self.dropout = nn.Dropout(dropout)
	self.sigmoid = nn.Sigmoid()

	def forward(self, text):
	embedded = self.embedding(text)
	embedded = embedded.unsqueeze(1)
	conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
	pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
	cat = self.dropout(torch.cat(pooled, dim=1))
	return self.sigmoid(self.fc(cat)).squeeze(1)




	# Move model to GPU if available
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	EMBEDDING_DIM = 100
	OUTPUT_DIM = 1 # Positive and Negative classes

	N_FILTERS = 250
	FILTER_SIZES = [2, 3, 4]
	DROPOUT = 0.1
	# Best Hyperparameters: {'n_filters': 250, 'filter_sizes': [2, 3, 4], 'dropout': 0.1}

	CNN_Model = CNNHotelReviewsModel(EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT)



	# Load the saved state_dict into the model
	CNN_Model.load_state_dict(torch.load("hotel_review_model.pth", map_location=device))
	CNN_Model = CNN_Model.to(device) # Move the model to GPU
	CNN_Model.eval() # Set the model to evaluation mode

	# For Aspect selection lda model impored
	lda_model = joblib.load('lda_model.pkl')
	dictionary = joblib.load('dictionary.pkl')

	# CNN prediction model
	def predict_review(model, review, max_len=128):
	# Tokenize and convert to indices
	tokens = review.split()
	indices = [glove.stoi.get(token, 0) for token in tokens]

	# Pad or truncate to max_len
	if len(indices) < max_len:
	indices += [0] * (max_len - len(indices))
	else:
	indices = indices[:max_len]

	# Convert to tensor and add batch dimension
	tensor = torch.tensor(indices).unsqueeze(0)

	# Forward pass
	model.eval() # Set model to evaluation mode
	with torch.no_grad():
	output = model(tensor.to(device))

	# Convert output probability to class label (0 or 1)
	prob = output.item()
	# prediction = 1 if prob > 0.5 else 0

	return {'positive': prob, 'negative': 1-prob}



	# Now On the basis of above assumsiom let's create aspect_label dictionary.
	aspect_label = {
	0: "Reception & Service Efficiency",
	1: "Transportation & Proximity",
	2: "Room Comfort & Staff Courtesy",
	3: "Location & Staff Quality",
	4: "Room Discrepancies",
	5: "Hotel Quality vs Price",
	6: "Booking & Payment Issues",
	7: "Room Ambiance & Noise",
	8: "Amenities & Value",
	9: "Room Size & Condition",
	}

	def dominant_topic(text):
	text = text.split()
	bow = dictionary.doc2bow(text)
	topics = lda_model.get_document_topics(bow)
	main_topic = max(topics, key=lambda x: x[1])
	return { aspect_label[itm[0]]: float(itm[1]) for itm in topics } #main_topic[0]


	def gr_fun(Review):
	review = clean_review_text(Review)
	pred_label = predict_review(CNN_Model, review)
	pred_aspect = dominant_topic(review)

	return pred_label, pred_aspect






	iface = gr.Interface(
	fn=gr_fun,
	inputs="text",
	outputs=[gr.Label(), gr.Label(num_top_classes=5)],
	examples=[
	"room condition was very bad",
	"Staff where excellent and the room was lovely really great hotel will definitely be back",
	"Couldn t find ice machine The junior suite was excellent with a fantastic bar",
	"Furniture in the room was a bit worn and tired for the money you pay would just expect a bit more it was ok",
	"Room was West facing and was far too warm particularly as the a c didn t seem to be working to well The shower room was excellent and large enough for my lady and I to be rude in Loved it"
	]
	)

	# iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
	iface.launch(inline = False)