Spaces:

spark-ds549
/

Epik

Sleeping

Epik / app /cosmic_view.py

Minh Q. Le

Merged COSMIC and GPT UI

df89157 about 1 year ago

8.23 kB

	import os
	import pickle
	import tempfile
	import gradio as gr
	from tqdm import tqdm
	from app.utils import (
	create_input_instruction,
	format_prediction_ouptut,
	remove_temp_dir,
	decode_numeric_label,
	decode_speaker_role,
	display_sentiment_score_table,
	sentiment_flow_plot,
	EXAMPLE_CONVERSATIONS,
	)
	from fairseq.data.data_utils import collate_tokens

	import sys

	sys.path.insert(0, "../") # neccesary to load modules outside of app

	from app import roberta, comet, COSMIC_MODEL, cosmic_args
	from preprocessing import preprocess
	from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader


	def cosmic_preprocess(input, dir="."):
	result = preprocess.process_user_input(input)

	if not result["success"]:
	raise gr.Error(result["message"])

	data = result["data"]

	# processed the data and turn it into a csv file
	output_csv_path = os.path.join(dir, "epik.csv")
	grouped_df = preprocess.preapre_csv(data, output_csv_path, with_label=False)

	# convert the csv to pickle file of speakers, labels, sentences
	pickle_dest = os.path.join(dir, "epik.pkl")
	preprocess.convert_to_pickle(
	source=output_csv_path,
	dest=pickle_dest,
	index_col="ConversationId",
	list_type_columns=[
	"Text",
	"ParticipantRoleEncoded",
	"LabelNumeric",
	],
	order=[
	"ParticipantRoleEncoded",
	"LabelNumeric",
	"Text",
	],
	exclude=["ParticipantRole"],
	)

	# split the id for prediction, we'll put these in validation ids
	preprocess.split_and_save_ids(
	grouped_df["ConversationId"].to_list(), 0, 0, 1, dir=dir
	)

	# add ids into the pickle files
	preprocess.merge_pkl_with_ids(
	pickle_src=pickle_dest,
	ids_files=["train_set.txt", "test_set.txt", "validation_set.txt"],
	dir=dir,
	)

	# generate the sentences pickle file
	sentences_pkl_path = os.path.join(dir, "epik_sentences.pkl")
	preprocess.convert_to_pickle(
	source=output_csv_path,
	dest=sentences_pkl_path,
	index_col="ConversationId",
	list_type_columns=["Text"],
	exclude=[
	"ParticipantRole",
	"ParticipantRoleEncoded",
	"LabelNumeric",
	],
	)

	return pickle_dest, sentences_pkl_path


	def cosmic_roberta_extract(path, dest_dir="."):
	# load the feature from file at path
	speakers, labels, sentences, train_ids, test_ids, valid_ids = pickle.load(
	open(path, "rb")
	)
	roberta1, roberta2, roberta3, roberta4 = {}, {}, {}, {}

	all_ids = train_ids + test_ids + valid_ids

	for i in tqdm(range(len(all_ids))):
	item = all_ids[i]
	sent = sentences[item]
	sent = [s.encode("ascii", errors="ignore").decode("utf-8") for s in sent]
	batch = collate_tokens([roberta.encode(s) for s in sent], pad_idx=1)
	feat = roberta.extract_features(batch, return_all_hiddens=True)
	roberta1[item] = [row for row in feat[-1][:, 0, :].detach().numpy()]
	roberta2[item] = [row for row in feat[-2][:, 0, :].detach().numpy()]
	roberta3[item] = [row for row in feat[-3][:, 0, :].detach().numpy()]
	roberta4[item] = [row for row in feat[-4][:, 0, :].detach().numpy()]

	roberta_feature_path = os.path.join(dest_dir, "epik_features_roberta.pkl")
	pickle.dump(
	[
	speakers,
	labels,
	roberta1,
	roberta2,
	roberta3,
	roberta4,
	sentences,
	train_ids,
	test_ids,
	valid_ids,
	],
	open(roberta_feature_path, "wb"),
	)

	return roberta_feature_path


	def cosmic_comet_extract(path, dir="."):
	print("Extracting features in", path)
	sentences = pickle.load(open(path, "rb"))
	feaures = comet.extract(sentences)

	comet_feature_path = os.path.join(dir, "epik_features_comet.pkl")
	pickle.dump(feaures, open(comet_feature_path, "wb"))

	return comet_feature_path


	def cosmic_classifier(input):
	# create a temporary directory for the input data
	temp_dir = tempfile.mkdtemp(dir=os.getcwd(), prefix="temp")

	epik_path, epik_sentences_path = cosmic_preprocess(input, temp_dir)

	roberta_path = cosmic_roberta_extract(epik_path, temp_dir)
	comet_path = cosmic_comet_extract(epik_sentences_path, temp_dir)

	# use cosmic model to make predictions
	data_loader, ids = get_valid_dataloader(roberta_path, comet_path)
	predictions = predict(COSMIC_MODEL, data_loader, cosmic_args)

	speakers, _, sentences, _, _, valid_ids = pickle.load(open(epik_path, "rb"))

	# Assuming that there's only one conversation
	conv_id = ids[0]
	speaker_roles = [
	decode_speaker_role(numeric_role) for numeric_role in speakers[conv_id]
	]
	labels = [decode_numeric_label(pred) for pred in predictions[0]]
	output = format_prediction_ouptut(speaker_roles, sentences[conv_id], labels)

	print()
	print("======= Removing Temporary Directory =======")
	remove_temp_dir(temp_dir)
	return output


	def cosmic_ui():
	with gr.Blocks() as cosmic_model:
	gr.Markdown(
	"""
	# COSMIC
	COSMIC is a popular model for predicting sentiment labels using the entire
	context of the conversation. In other words, it analyzes the previous
	messages to predict the sentiment label for the current message.<br/>
	The model was adopted from this
	[repo](https://github.com/declare-lab/conv-emotion.git), implemented based
	on this research [paper](https://arxiv.org/pdf/2010.02795.pdf).

	```bash COSMIC: COmmonSense knowledge for eMotion Identification in
	Conversations. D. Ghosal, N. Majumder, A. Gelbukh, R. Mihalcea, & S. Poria. Findings of EMNLP 2020.
	```
	"""
	)

	create_input_instruction()
	with gr.Row():
	with gr.Column():
	example_dropdown = gr.Dropdown(
	choices=["-- Not Selected --"] + list(EXAMPLE_CONVERSATIONS.keys()),
	value="-- Not Selected --",
	label="Select an example",
	)

	gr.Markdown('<p style="text-align: center;color: gray;">--- OR ---</p>')

	conversation_input = gr.TextArea(
	value="",
	label="Input you conversation",
	placeholder="Plese input your conversation here",
	lines=15,
	max_lines=15,
	)

	def on_example_change(input):
	if input in EXAMPLE_CONVERSATIONS:
	return EXAMPLE_CONVERSATIONS[input]

	return ""

	example_dropdown.input(
	on_example_change,
	inputs=example_dropdown,
	outputs=conversation_input,
	)

	with gr.Column():
	output = gr.Textbox(
	value="",
	label="Predicted Sentiment Labels",
	lines=22,
	max_lines=22,
	interactive=False,
	)
	submit_btn = gr.Button(value="Submit")
	submit_btn.click(cosmic_classifier, conversation_input, output)

	# reset the output whenever a change in the input is detected
	conversation_input.change(lambda x: "", conversation_input, output)

	gr.Markdown("# Sentiment Flow Plot")
	with gr.Row():
	with gr.Column(scale=1):
	display_sentiment_score_table()
	with gr.Column(scale=2):
	plot_box = gr.Plot(label="Analysis Plot")

	plot_btn = gr.Button(value="Plot Sentiment Flow")
	plot_btn.click(sentiment_flow_plot, inputs=[output], outputs=[plot_box])

	# reset all outputs whenever a change in the input is detected
	conversation_input.change(
	lambda x: ("", None),
	conversation_input,
	outputs=[output, plot_box],
	)
	return cosmic_model