import random import gradio as gr import sys import traceback import pandas as pd import gradio as gr import json import yaml # from tqdm import tqdm from scripts.UBAR_code.interaction import UBAR_interact from scripts.user_model_code.interaction import multiwoz_interact from scripts.UBAR_code.interaction.UBAR_interact import bcolors # Initialise agents UBAR_checkpoint_path = "epoch50_trloss0.59_gpt2" user_model_checkpoint_path = "MultiWOZ-full_checkpoint_step340k" sys_model = self_play_sys_model = UBAR_interact.UbarSystemModel( "UBAR_sys_model", UBAR_checkpoint_path, "scripts/UBAR_code/interaction/config.yaml" ) user_model = self_play_user_model = multiwoz_interact.NeuralAgent( "user", user_model_checkpoint_path, "scripts/user_model_code/interaction/config.yaml" ) # Get goals n_goals = 100 goals_path = "data/raw/UBAR/multi-woz/data.json" print("Loading goals...") goals = multiwoz_interact.read_multiWOZ_20_goals(goals_path, n_goals) # Initialise agent with first goal (can be incrememnted by user) for user simulator tab curr_goal_idx = random.randint(0, n_goals - 1) current_goal = goals[curr_goal_idx] user_model.init_session(ini_goal=current_goal) # Do the same initialisation but for the self-play tab curr_sp_goal_idx = random.randint(0, n_goals - 1) current_sp_goal = goals[curr_sp_goal_idx] self_play_user_model.init_session(ini_goal=current_sp_goal) # Get the responses for each agent and track conversation history ds_history = [] us_history = [] self_play_history = [] def change_goal(): global curr_goal_idx curr_goal_idx = random.randint(0, n_goals - 1) current_goal = goals[curr_goal_idx] user_model.init_session(ini_goal=current_goal) current_goal_yaml = yaml.dump(current_goal, default_flow_style=False) return current_goal_yaml def change_sp_goal(): global curr_sp_goal_idx curr_sp_goal_idx = random.randint(0, n_goals - 1) current_sp_goal = goals[curr_sp_goal_idx] self_play_user_model.init_session(ini_goal=current_sp_goal) current_sp_goal_yaml = yaml.dump(current_sp_goal, default_flow_style=False) return current_sp_goal_yaml def ds_chatbot(user_utt): turn_id = len(ds_history) sys_response = sys_model.response(user_utt, turn_id) ds_history.append((user_utt, sys_response)) return ds_history def us_chatbot(sys_response): user_utt = user_model.response(sys_response) us_history.append((sys_response, user_utt)) if user_model.is_terminated(): change_goal() return us_history def self_play(): if len(self_play_history) == 0: sys_response = "" else: sys_response = self_play_history[-1][1] user_utt = user_model.response(sys_response) turn_id = len(self_play_history) sys_response = sys_model.response(user_utt, turn_id) self_play_history.append((user_utt, sys_response)) if user_model.is_terminated(): change_goal() return self_play_history # Initialise demo render block = gr.Blocks() with block: gr.Markdown("# Demo User Simulator and Task-Oriented Dialogue System") gr.Markdown("*Created by Alistair McLeay, with help from Professor Bill Byrne, Andy Tseng, and Alex Coca*") with gr.Tabs(): with gr.TabItem("Dialogue System"): gr.Markdown( "This bot is a Task-Oriented Dialogue Systen. You are the user. Go ahead and try to book a train, or a hotel etc." ) with gr.Row(): ds_input_text = gr.inputs.Textbox( label="User Message", placeholder="I'd like to book a train from Cambridge to London" ) ds_response = gr.outputs.Chatbot(label="Dialogue System Response") ds_button = gr.Button("Submit Message") with gr.TabItem("User Simulator"): gr.Markdown( "This bot is a User Simulator. You are the Task-Oriented Dialogue System. Your job is to help the user with their requests." ) new_goal_button = gr.Button("Generate Goal") with gr.Row(): us_input_text = gr.inputs.Textbox( label="Dialogue System Message", placeholder="How can I help you today?" ) us_response = gr.outputs.Chatbot(label="User Simulator Response") current_goal_yaml = gr.outputs.Textbox(label="Current Goal (YAML)") us_button = gr.Button("Submit Message") with gr.TabItem("Self-Play"): gr.Markdown( "In this scenario you define a goal and you then watch both agents interact where the User Simulator is trying to achieve the goal, and the Task-Oriented Dialogue System is trying to help the User Simulator do so." ) new_sp_goal_button = gr.Button("Generate Goal") with gr.Row(): self_play_response = gr.outputs.Chatbot(label="Self-Play Output") current_sp_goal_yaml = gr.outputs.Textbox(label="Current Goal (YAML)") self_play_button = gr.Button("Run Next Step") ds_button.click(ds_chatbot, ds_input_text, ds_response) us_button.click(us_chatbot, us_input_text, us_response) self_play_button.click(self_play, None, self_play_response) new_goal_button.click(change_goal, None, current_goal_yaml) new_sp_goal_button.click(change_sp_goal, None, current_sp_goal_yaml) block.launch(share=True)