Spaces:

pouchedfox
/

Sen

Build error

App Files Files Community

Sen / app.py

pouchedfox

Upload app.py

25d443b about 2 years ago

raw

history blame contribute delete

No virus

7.96 kB

	import numpy as np
	import csv
	from typing import Optional
	from urllib.request import urlopen
	import gradio as gr


	class SentimentTransform():
	def __init__(
	self,
	model_name: str = "cardiffnlp/twitter-roberta-base-sentiment",
	highlight: bool = False,
	positive_sentiment_name: str = "positive",
	max_number_of_shap_documents: Optional[int] = None,
	min_abs_score: float = 0.1,
	sensitivity: float = 0,
	**kwargs,
	):
	"""
	Sentiment Ops.
	Parameters
	-------------
	model_name: str
	The name of the model
	sensitivity: float
	How confident it is about being `neutral`. If you are dealing with news sources,
	you probably want less sensitivity
	"""
	self.model_name = model_name
	self.highlight = highlight
	self.positive_sentiment_name = positive_sentiment_name
	self.max_number_of_shap_documents = max_number_of_shap_documents
	self.min_abs_score = min_abs_score
	self.sensitivity = sensitivity
	for k, v in kwargs.items():
	setattr(self, k, v)

	def preprocess(self, text: str):
	new_text = []
	for t in text.split(" "):
	t = "@user" if t.startswith("@") and len(t) > 1 else t
	t = "http" if t.startswith("http") else t
	new_text.append(t)
	return " ".join(new_text)

	@property
	def classifier(self):
	if not hasattr(self, "_classifier"):
	import transformers

	self._classifier = transformers.pipeline(
	return_all_scores=True,
	model=self.model_name,
	)
	return self._classifier

	def _get_label_mapping(self, task: str):
	# Note: this is specific to the current model
	labels = []
	mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
	with urlopen(mapping_link) as f:
	html = f.read().decode("utf-8").split("\n")
	csvreader = csv.reader(html, delimiter="\t")
	labels = [row[1] for row in csvreader if len(row) > 1]
	return labels

	@property
	def label_mapping(self):
	return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}

	def analyze_sentiment(
	self,
	text,
	highlight: bool = False,
	positive_sentiment_name: str = "positive",
	max_number_of_shap_documents: Optional[int] = None,
	min_abs_score: float = 0.1,
	):
	if text is None:
	return None
	labels = self.classifier([str(text)], truncation=True, max_length=512)
	ind_max = np.argmax([l["score"] for l in labels[0]])
	sentiment = labels[0][ind_max]["label"]
	max_score = labels[0][ind_max]["score"]
	sentiment = self.label_mapping.get(sentiment, sentiment)
	if sentiment.lower() == "neutral" and max_score > self.sensitivity:
	overall_sentiment = 1e-5
	elif sentiment.lower() == "neutral":
	# get the next highest score
	new_labels = labels[0][:ind_max] + labels[0][(ind_max + 1):]
	new_ind_max = np.argmax([l["score"] for l in new_labels])
	new_max_score = new_labels[new_ind_max]["score"]
	new_sentiment = new_labels[new_ind_max]["label"]
	new_sentiment = self.label_mapping.get(new_sentiment, new_sentiment)
	overall_sentiment = self._calculate_overall_sentiment(
	new_max_score, new_sentiment
	)

	else:
	overall_sentiment = self._calculate_overall_sentiment(max_score, sentiment)
	# Adjust to avoid bug
	if overall_sentiment == 0:
	overall_sentiment = 1e-5
	if not highlight:
	return {
	"sentiment": sentiment,
	"overall_sentiment_score": overall_sentiment,
	}
	shap_documents = self.get_shap_values(
	text,
	sentiment_ind=ind_max,
	max_number_of_shap_documents=max_number_of_shap_documents,
	min_abs_score=min_abs_score,
	)
	return {
	"sentiment": sentiment,
	"score": max_score,
	"overall_sentiment": overall_sentiment,
	"highlight_chunk_": shap_documents,
	}

	def _calculate_overall_sentiment(self, score: float, sentiment: str):
	if sentiment.lower().strip() == self.positive_sentiment_name:
	return score
	else:
	return -score

	# def explainer(self):
	# if hasattr(self, "_explainer"):
	# return self._explainer
	# else:
	# try:
	# import shap
	# except ModuleNotFoundError:
	# raise MissingPackageError("shap")
	# self._explainer = shap.Explainer(self.classifier)
	# return self._explainer

	def get_shap_values(
	self,
	text: str,
	sentiment_ind: int = 2,
	max_number_of_shap_documents: Optional[int] = None,
	min_abs_score: float = 0.1,
	):
	"""Get SHAP values"""
	shap_values = self.explainer([text])
	cohorts = {"": shap_values}
	cohort_labels = list(cohorts.keys())
	cohort_exps = list(cohorts.values())
	features = cohort_exps[0].data
	feature_names = cohort_exps[0].feature_names
	values = np.array([cohort_exps[i].values for i in range(len(cohort_exps))])
	shap_docs = [
	{"text": v, "score": f}
	for f, v in zip(
	[x[sentiment_ind] for x in values[0][0].tolist()], feature_names[0]
	)
	]
	if max_number_of_shap_documents is not None:
	sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)
	else:
	sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)[
	:max_number_of_shap_documents
	]
	return [d for d in sorted_scores if abs(d["score"]) > min_abs_score]

	def transform(self, text):
	# # For each document, update the field
	# sentiment_docs = [{"_id": d["_id"]} for d in documents]
	# for i, t in enumerate(self.text_fields):
	# if self.output_fields is not None:
	# output_field = self.output_fields[i]
	# else:
	# output_field = self._get_output_field(t)
	sentiment = self.analyze_sentiment(
	text,
	highlight=self.highlight,
	max_number_of_shap_documents=self.max_number_of_shap_documents,
	min_abs_score=self.min_abs_score, )
	return sentiment


	def sentiment_classifier(text, model_type, sensitivity):
	if model_type == 'Social Media Model':
	model_name = "cardiffnlp/twitter-roberta-base-sentiment"
	elif model_type == 'Survey Model':
	model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
	else:
	model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
	model = SentimentTransform(model_name=model_name, sensitivity=sensitivity)
	res_dict = model.transform(text)
	return res_dict['sentiment'], res_dict['overall_sentiment_score']


	demo = gr.Interface(
	fn=sentiment_classifier,
	inputs=[gr.Textbox(placeholder="Put the text here and click 'submit' to predict its sentiment", label="Input Text"), gr.Dropdown(["Social Media Model", "Survey Model"], value="Survey Model", label="Select the Model that you want to use."), gr.Slider(0, 1, step = 0.01, label="Sensitivity (How confident it is about being `neutral`. If you are dealing with news sources, you probably want less sensitivity.)")],
	outputs=[gr.Textbox(label='Sentiment'), gr.Textbox(label='Sentiment Score')],
	)
	demo.launch(debug=True)