Spaces:

alistairmcleay
/

cambridge-masters-project

Runtime error

App Files Files Community

cambridge-masters-project / scripts /simulate_interaction.py

alistairmcleay

Put models on model hub

2936a70 over 2 years ago

raw

history blame contribute delete

7.06 kB

	import sys
	import traceback
	import pandas as pd

	# from tqdm import tqdm
	from UBAR_code.interaction import UBAR_interact
	from user_model_code.interaction import multiwoz_interact
	from UBAR_code.interaction.UBAR_interact import bcolors


	# from tqdm import tqdm
	from scripts.UBAR_code.interaction import UBAR_interact
	from scripts.user_model_code.interaction import multiwoz_interact
	from scripts.UBAR_code.interaction.UBAR_interact import bcolors


	def instantiate_agents():

	UBAR_checkpoint_path = "cambridge-masters-project/epoch50_trloss0.59_gpt2"
	user_model_checkpoint_path = "cambridge-masters-project/MultiWOZ-full_checkpoint_step340k"

	sys_model = UBAR_interact.UbarSystemModel(
	"UBAR_sys_model", UBAR_checkpoint_path, "cambridge-masters-project/scripts/UBAR_code/interaction/config.yaml"
	)

	user_model = multiwoz_interact.NeuralAgent(
	"user", user_model_checkpoint_path, "cambridge-masters-project/scripts/user_model_code/interaction/config.yaml"
	)

	return sys_model, user_model


	def read_multiwoz_data():
	"""
	Read the multiwoz 2.0 raw data from the .json file
	"""
	raw_mwoz_20_path = "cambridge-masters-project/data/raw/UBAR/multi-woz/data.json"
	df_raw_mwoz = pd.read_json(raw_mwoz_20_path)
	return df_raw_mwoz


	def load_test_val_lists():
	val_list_file = "cambridge-masters-project/data/raw/UBAR/multi-woz/valListFile.json"
	test_list_file = "cambridge-masters-project/data/raw/UBAR/multi-woz/testListFile.json"


	def main(
	write_to_file=False, ground_truth_system_responses=False, train_only=True, n_dialogues="all", log_successes=False
	):
	sys_model, user_model = instantiate_agents()

	# TODO: move hardcoded vars into config file
	raw_mwoz_20_path = "cambridge-masters-project/data/raw/UBAR/multi-woz/data.json"
	user_utterances_out_path = "cambridge-masters-project/data/preprocessed/UBAR/user_utterances_from_simulator.txt"
	logging_successes_path = "cambridge-masters-project/data/preprocessed/UBAR/logging_successes"
	sys_model.print_intermediary_info = False
	user_model.print_intermediary_info = False

	df_raw_mwoz = pd.read_json(raw_mwoz_20_path)
	if n_dialogues == "all":
	n_dialogues = len(df_raw_mwoz.columns)

	curr_dialogue_user_utterances_formatted = []

	print("Loading goals...")
	goals = multiwoz_interact.read_multiWOZ_20_goals(raw_mwoz_20_path, n_dialogues)

	# Write column headers
	if write_to_file:
	with open(user_utterances_out_path, "w") as f:
	f.write("Dialogue #\tDialogue ID\tTurn #\tSystem Response\n")

	print("Loading data...")
	df_mwoz_data = read_multiwoz_data()
	val_list, test_list = load_test_val_lists()

	successful_dialogues = 0
	total_dialogues_generated = 0 # train dialogues only
	for dialogue_idx, (goal, dialogue_filename) in enumerate(zip(goals, df_mwoz_data.columns)):
	if log_successes:
	# log successful_dialogues to logging_successes_path every 100 dialogues
	if dialogue_idx % 100 == 0:
	with open(logging_successes_path, "w") as f:
	f.write(str(successful_dialogues) + " / " + str(total_dialogues_generated))

	curr_dialogue_user_utterances_formatted = []
	if train_only:
	if dialogue_filename in val_list or dialogue_filename in test_list:
	continue

	total_dialogues_generated += 1
	print("Dialogue: {}".format(dialogue_filename))

	# There are occasionally exceptions thrown from one of the agents, usually the user
	# In this case we simply continue to the next dialogue
	try:
	# Reset state after each dialogue
	sys_model.init_session()
	user_model.init_session(ini_goal=goal)
	sys_response = ""

	for turn_idx in range(50):
	# Turn idx in this case represents the turn as one user utterance AND one system response
	usr_response_raw_data_idx = turn_idx * 2
	sys_response_raw_data_idx = turn_idx * 2 + 1

	user_utterance = user_model.response(sys_response)
	print(bcolors.OKBLUE + "User: " + bcolors.ENDC + user_utterance)

	if write_to_file:
	user_utterance = user_utterance.replace("\n", " ")
	curr_dialogue_user_utterances_formatted.append(
	str(dialogue_idx)
	+ "\t"
	+ dialogue_filename
	+ "\t"
	+ str(usr_response_raw_data_idx)
	+ "\t"
	+ user_utterance
	+ "\n"
	)

	if user_model.is_terminated():
	successful_dialogues += 1
	print(bcolors.OKCYAN + "Dialogue terminated successfully!" + bcolors.ENDC)
	print(bcolors.OKCYAN + "---" * 30 + bcolors.ENDC + "\n")
	if write_to_file:
	# Write whole dialogue to file
	with open(user_utterances_out_path, "a") as f:
	for line in curr_dialogue_user_utterances_formatted:
	f.write(line)
	break

	# Next turn materials
	if ground_truth_system_responses:
	# If we are at the end of the ground truth dialogues
	if len(df_mwoz_data.iloc[:, dialogue_idx].log) <= sys_response_raw_data_idx:
	print(bcolors.RED + "Dialogue terminated unsuccessfully!" + bcolors.ENDC)
	print(bcolors.RED + "---" * 30 + bcolors.ENDC + "\n")
	break
	sys_response = df_mwoz_data.iloc[:, dialogue_idx].log[sys_response_raw_data_idx]["text"]
	else:
	sys_response = sys_model.response(user_utterance, turn_idx)
	capitalised_sys_response = sys_response[0].upper() + sys_response[1:]
	print(bcolors.GREEN + "System: " + bcolors.ENDC + capitalised_sys_response)

	except Exception:
	print(bcolors.RED + "" 30 + bcolors.ENDC)
	print(bcolors.RED + "Error in dialogue {}".format(dialogue_filename) + bcolors.ENDC)
	print(bcolors.RED + "" 30 + bcolors.ENDC)
	traceback.print_exc()
	continue

	print("Successful dialogues: {}".format(successful_dialogues))
	print("Total dialogues: {}".format(n_dialogues))
	print("% Successful Dialopues: {}".format(successful_dialogues / n_dialogues))


	if __name__ == "__main__":
	# TODO: move parameters to config file
	# Fix the hacky mess below
	ground_truth_system_responses = sys.argv[1]
	if ground_truth_system_responses == "False":
	ground_truth_system_responses = False
	else:
	ground_truth_system_responses = True
	main(write_to_file=False, ground_truth_system_responses=ground_truth_system_responses)