Spaces:

cmu-lti
/

sotopia-space

Runtime error

App Files Files Community

Haofei Yu commited on Apr 9

Commit

3ce130a

•

1 Parent(s): 6774d89

Feature/support multi turn (#14)

Browse files

* add the issue and pr template

* only show generated conversation

* support multi-turn sotopia prompt

Files changed (2) hide show

app.py +24 -10
utils.py +53 -12

app.py CHANGED Viewed

@@ -2,11 +2,12 @@ import gradio as gr
 from dataclasses import dataclass
 import os
 import torch
 from uuid import uuid4
 from peft import PeftModel, PeftConfig
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-from utils import Agent, get_starter_prompt, format_chat_prompt
 HUMAN_AGENT = Agent(
@@ -23,18 +24,23 @@ MACHINE_AGENT = Agent(
     secrets="Descendant of a wealthy oil tycoon, rejects family fortune",
     personality="Benjamin Jackson, expressive and imaginative, leans towards self-direction and liberty. His decisions aim for societal betterment.",)
-DEFUALT_INSTRUCTIONS = get_starter_prompt(MACHINE_AGENT, HUMAN_AGENT, "Conversation between two friends, where one is upset and crying")
 DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
 MODEL_NAME = "cmu-lti/sotopia-pi-mistral-7b-BC_SR"
 COMPUTE_DTYPE = torch.float16
 config_dict = PeftConfig.from_json_file("peft_config.json")
-# import pdb; pdb.set_trace()
 config = PeftConfig.from_peft_type(**config_dict)
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-model = PeftModel.from_pretrained(model, MODEL_NAME, config=config).to(COMPUTE_DTYPE).to("cuda")
 according_visible = True
@@ -109,10 +115,10 @@ def chat_accordion():
                     max_lines=1,
                     visible=False,
                 )
     return temperature, instructions, user_name, bot_name, session_id, max_tokens
 def run_chat(
     message: str,
     history,
@@ -123,7 +129,13 @@ def run_chat(
     top_p: float,
     max_tokens: int
 ):
-    prompt = format_chat_prompt(message, history, instructions, user_name, bot_name)
     input_tokens = tokenizer(prompt, return_tensors="pt", padding="do_not_pad").input_ids.to("cuda")
     input_length = input_tokens.shape[-1]
     output_tokens = model.generate(
@@ -138,7 +150,7 @@ def run_chat(
     text_output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
     return text_output
 def chat_tab():
     with gr.Column():
         with gr.Row():
@@ -160,7 +172,10 @@ def chat_tab():
                         render=False,
                         show_label=False,
                         rtl=False,
-                        avatar_images=("images/user_icon.png", "images/bot_icon.png"),
                     ),
                     textbox=gr.Textbox(
                         placeholder="Write your message here...",
@@ -184,7 +199,6 @@ def chat_tab():
                 )
 def main():
     with gr.Blocks(
         css="""#chat_container {height: 820px; width: 1000px; margin-left: auto; margin-right: auto;}

 from dataclasses import dataclass
 import os
 import torch
+import transformers
 from uuid import uuid4
 from peft import PeftModel, PeftConfig
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from utils import Agent, get_starter_prompt, format_sotopia_prompt
 HUMAN_AGENT = Agent(
     secrets="Descendant of a wealthy oil tycoon, rejects family fortune",
     personality="Benjamin Jackson, expressive and imaginative, leans towards self-direction and liberty. His decisions aim for societal betterment.",)
+SCENARIO = "Conversation between two friends, where one is upset and crying"
+DEFUALT_INSTRUCTIONS = get_starter_prompt(
+    MACHINE_AGENT,
+    HUMAN_AGENT,
+    SCENARIO
+)
 DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
 MODEL_NAME = "cmu-lti/sotopia-pi-mistral-7b-BC_SR"
 COMPUTE_DTYPE = torch.float16
 config_dict = PeftConfig.from_json_file("peft_config.json")
 config = PeftConfig.from_peft_type(**config_dict)
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1").to("cuda")
+model = PeftModel.from_pretrained(model, MODEL_NAME, config=config).to("cuda")
 according_visible = True
                     max_lines=1,
                     visible=False,
                 )
     return temperature, instructions, user_name, bot_name, session_id, max_tokens
+# history are input output pairs
 def run_chat(
     message: str,
     history,
     top_p: float,
     max_tokens: int
 ):
+    prompt = format_sotopia_prompt(
+        message,
+        history,
+        instructions,
+        user_name,
+        bot_name
+    )
     input_tokens = tokenizer(prompt, return_tensors="pt", padding="do_not_pad").input_ids.to("cuda")
     input_length = input_tokens.shape[-1]
     output_tokens = model.generate(
     text_output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
     return text_output
 def chat_tab():
     with gr.Column():
         with gr.Row():
                         render=False,
                         show_label=False,
                         rtl=False,
+                        avatar_images=(
+                            "images/user_icon.png",
+                            "images/bot_icon.png"
+                        ),
                     ),
                     textbox=gr.Textbox(
                         placeholder="Write your message here...",
                 )
 def main():
     with gr.Blocks(
         css="""#chat_container {height: 820px; width: 1000px; margin-left: auto; margin-right: auto;}

utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
 class Agent:
     def __init__(self, name, background, goal, secrets, personality):
         self.name = name
@@ -9,23 +11,62 @@ class Agent:
 def get_starter_prompt(machine_agent, human_agent, scenario):
     return f"Prompt after formatting:\nImagine you are {machine_agent.name}, your task is to act/speak as {machine_agent.name} would, keeping in mind {machine_agent.name}'s social goal.\nYou can find {machine_agent.name}'s background and goal in the 'Here is the context of the interaction' field.\nNote that {machine_agent.name}'s secret and goal is only visible to you.\nYou should try your best to achieve {machine_agent.name}'s goal in a way that align with their character traits.\nAdditionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before).\n\nHere is the context of this interaction:\n Scenario: {scenario}\nParticipants: {human_agent.name} and {machine_agent.name}\n{human_agent.name}'s background: {human_agent.background} Personality and values description: {human_agent.personality} \n{machine_agent.name}'s background: {machine_agent.background} Personality and values description: {machine_agent.personality} {machine_agent.name}'s secrets: {machine_agent.secrets}\n{human_agent.name}'s goal: Unknown\n{machine_agent.name}'s goal: {machine_agent.name}\nConversation Starts:"
-def format_chat_prompt(
     message: str,
-    chat_history,
     instructions: str,
     user_name: str,
     bot_name: str,
     include_all_chat_history: bool = True,
     index : int = 1
 ) -> str:
-    instructions = instructions.strip()
-    prompt = instructions
-    if not include_all_chat_history:
-        if index >= 0:
-            index = -index
-        chat_history = chat_history[index:]
-    for turn in chat_history:
-        user_message, bot_message = turn
-        prompt = f"{prompt}\n{user_name}: {user_message}\n{bot_name}: {bot_message}"
-    prompt = f"{prompt}\n{user_name}: {message}\n{bot_name}:"
     return prompt

+from typing import Tuple, List
 class Agent:
     def __init__(self, name, background, goal, secrets, personality):
         self.name = name
 def get_starter_prompt(machine_agent, human_agent, scenario):
     return f"Prompt after formatting:\nImagine you are {machine_agent.name}, your task is to act/speak as {machine_agent.name} would, keeping in mind {machine_agent.name}'s social goal.\nYou can find {machine_agent.name}'s background and goal in the 'Here is the context of the interaction' field.\nNote that {machine_agent.name}'s secret and goal is only visible to you.\nYou should try your best to achieve {machine_agent.name}'s goal in a way that align with their character traits.\nAdditionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before).\n\nHere is the context of this interaction:\n Scenario: {scenario}\nParticipants: {human_agent.name} and {machine_agent.name}\n{human_agent.name}'s background: {human_agent.background} Personality and values description: {human_agent.personality} \n{machine_agent.name}'s background: {machine_agent.background} Personality and values description: {machine_agent.personality} {machine_agent.name}'s secrets: {machine_agent.secrets}\n{human_agent.name}'s goal: Unknown\n{machine_agent.name}'s goal: {machine_agent.name}\nConversation Starts:"
+# we define history as
+# [(user_message, bot_message), (user_message, bot_message)]
+# we define dialogue history as
+# user_name: user_message\nbot_name: bot_message\nuser_name: user_message\nbot_name: bot_message\n
+def dialogue_history_length_check(string, max_token, tokenizer):
+    prompt_tokens = len(tokenizer(string)["input_ids"])
+    return max(prompt_tokens - max_token, 0)
+def truncate_dialogue_history_to_length(dia_his, surpass_num, tokenizer):
+    dia_sen = dia_his.split("\n")
+    remove_len = 0
+    i = 0
+    while remove_len < surpass_num:
+        remove_len += len(tokenizer(dia_sen[i])["input_ids"])
+        i += 1
+    trunc_dia = "\n".join(p for p in dia_sen[i:])
+    return trunc_dia
+def dialogue_history_creation(history, user_name, bot_name):
+    dialogue_history = ""
+    for idx, turn in enumerate(history):
+        user_message, bot_message = turn
+        # TODOTODO (haofeiyu): we first assume that human talks first
+        user_turn_idx = idx * 2
+        bot_turn_idx = idx * 2 + 1
+        dialogue_history = f"{dialogue_history}\n\nTurn #{user_turn_idx}: {user_name}: {user_message}\n\nTurn #{bot_turn_idx}: {bot_name}: {bot_message}"
+    last_turn_idx = len(history) * 2
+    return dialogue_history, last_turn_idx
+def dialogue_history_truncation(dialogue_history, max_token_num, tokenizer):
+    surpass_num = dialogue_history_length_check(dialogue_history, max_token_num, tokenizer)
+    if surpass_num > 0:
+        dialogue_history = truncate_dialogue_history_to_length(dialogue_history, surpass_num, tokenizer)
+    return dialogue_history
+def format_sotopia_prompt(
     message: str,
+    history: List[Tuple[str, str]],
     instructions: str,
     user_name: str,
     bot_name: str,
     include_all_chat_history: bool = True,
     index : int = 1
 ) -> str:
+    prompt = instructions.strip()
+    dialogue_history, last_turn_idx = dialogue_history_creation(
+        history,
+        user_name,
+        bot_name
+    )
+    prompt = f"{prompt}\n{dialogue_history}"
+    prompt = f"{prompt}\n\nTurn #{last_turn_idx+1}: {user_name}: {message}\n.\nYou are at Turn #{last_turn_idx+2}."
     return prompt