Spaces:

dperales
/

ITACA_Insurance_Core_v4

Sleeping

App Files Files Community

ITACA_Insurance_Core_v4 / sentiment_analysis_v2.py

dperales

Upload 2 files

65ffad7 about 1 year ago

raw history blame contribute delete

No virus

3.96 kB

	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from transformers_interpret import SequenceClassificationExplainer
	import torch
	import pandas as pd


	class SentimentAnalysis:
	"""
	Sentiment on text data.
	Attributes:
	tokenizer: An instance of Hugging Face Tokenizer
	model: An instance of Hugging Face Model
	explainer: An instance of SequenceClassificationExplainer from Transformers interpret
	"""

	def __init__(self):
	# Load Tokenizer & Model
	hub_location = 'cardiffnlp/twitter-roberta-base-sentiment'
	self.tokenizer = AutoTokenizer.from_pretrained(hub_location)
	self.model = AutoModelForSequenceClassification.from_pretrained(hub_location)

	hub_location_sp = 'finiteautomata/beto-sentiment-analysis'
	self.tokenizer_sp = AutoTokenizer.from_pretrained(hub_location_sp)
	self.model_sp = AutoModelForSequenceClassification.from_pretrained(hub_location_sp)

	# Change model labels in config
	self.model.config.id2label[0] = "Negative"
	self.model.config.id2label[1] = "Neutral"
	self.model.config.id2label[2] = "Positive"
	self.model.config.label2id["Negative"] = self.model.config.label2id.pop("LABEL_0")
	self.model.config.label2id["Neutral"] = self.model.config.label2id.pop("LABEL_1")
	self.model.config.label2id["Positive"] = self.model.config.label2id.pop("LABEL_2")

	# Instantiate explainer
	self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
	self.explainer_sp = SequenceClassificationExplainer(self.model_sp, self.tokenizer_sp)

	def justify(self, text, lang):
	"""
	Get html annotation for displaying sentiment justification over text.
	Parameters:
	text (str): The user input string to sentiment justification
	Returns:
	html (hmtl): html object for plotting sentiment prediction justification
	"""

	if lang == 'es':
	word_attributions = self.explainer_sp(text)
	html = self.explainer_sp.visualize("example.html")
	else:
	word_attributions = self.explainer(text)
	html = self.explainer.visualize("example.html")

	return html

	def classify(self, text, lang):
	"""
	Recognize Sentiment in text.
	Parameters:
	text (str): The user input string to perform sentiment classification on
	Returns:
	predictions (str): The predicted probabilities for sentiment classes
	"""

	if lang == 'es':
	tokens = self.tokenizer_sp.encode_plus(text, add_special_tokens=False, return_tensors='pt')
	outputs = self.model_sp(**tokens)
	probs = torch.nn.functional.softmax(outputs[0], dim=-1)
	probs = probs.mean(dim=0).detach().numpy()
	predictions = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')
	else:
	tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
	outputs = self.model(**tokens)
	probs = torch.nn.functional.softmax(outputs[0], dim=-1)
	probs = probs.mean(dim=0).detach().numpy()
	predictions = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')

	return predictions

	def run(self, text, lang):
	"""
	Classify and Justify Sentiment in text.
	Parameters:
	text (str): The user input string to perform sentiment classification on
	Returns:
	predictions (str): The predicted probabilities for sentiment classes
	html (hmtl): html object for plotting sentiment prediction justification
	"""

	predictions = self.classify(text, lang)
	html = self.justify(text, lang)

	return predictions, html