Spaces:
Runtime error
Runtime error
File size: 13,303 Bytes
fe95067 8b07d8c fe95067 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 |
import re
from typing import Literal, cast
from pydantic import BaseModel, Field
from utils import format_docstring
ActionType = Literal["none", "speak", "non-verbal communication", "action", "leave"]
class Message(BaseModel):
"""
An interface for messages.
There is only one required method: to_natural_language
"""
def to_natural_language(self) -> str:
raise NotImplementedError
class SimpleMessage(Message):
"""
A simple message with a single string field.
"""
message: str = Field(description="the message")
def to_natural_language(self) -> str:
return self.message
class Observation(Message):
last_turn: str = Field(description="the last turn of the conversation")
turn_number: int = Field(description="the turn number of the conversation")
available_actions: list[ActionType] = Field(description="the available actions")
def to_natural_language(self) -> str:
if self.turn_number == 0:
return f"\n{self.last_turn}\nConversation Starts:\n"
else:
return f"Turn #{self.turn_number-1}: {self.last_turn}\n"
class ScriptBackground(Message):
scenario: str = Field(description="scenario of the episode")
p1_name: str = Field(description="name of participant 1")
p2_name: str = Field(description="name of participant 2")
p1_background: str = Field(description="background of participant 1")
p2_background: str = Field(description="background of participant 2")
p1_goal: str = Field(description="goal of participant 1")
p2_goal: str = Field(description="goal of participant 2")
def to_natural_language(self) -> str:
if self.p1_background or self.p2_background:
p1_background = self.p1_background if self.p1_background else "Unknown"
p2_background = self.p2_background if self.p2_background else "Unknown"
# Not using AND, since in stranger relation the background is not visible
return format_docstring(
f"""Here is the context of this interaction:
Scenario: {self.scenario}
Participants: {self.p1_name} and {self.p2_name}
{self.p1_name}'s background: {p1_background}
{self.p2_name}'s background: {p2_background}
{self.p1_name}'s goal: {self.p1_goal}
{self.p2_name}'s goal: {self.p2_goal}
"""
)
else:
return format_docstring(
f"""Here is the context of this interaction:
Scenario: {self.scenario}
Participants: {self.p1_name} and {self.p2_name}
{self.p1_name}'s goal: {self.p1_goal}
{self.p2_name}'s goal: {self.p2_goal}
"""
)
class ScriptEnvironmentResponse(Message):
terminated: bool = Field(
description="whether the conversation is terminated",
default_factory=lambda: False,
)
p1_rate: float | tuple[float, dict[str, float]] | None = Field(
description="rating of participant 1, on the scale of 1 to 10"
)
p2_rate: float | tuple[float, dict[str, float]] | None = Field(
description="rating of participant 2, on the scale of 1 to 10"
)
comments: str | None = Field(
description="All of the comments supporting the termination and rating"
)
def to_natural_language(self) -> str:
reason_to_stop = format_docstring(
f"""Environment response:
{"The conversation is terminated." if self.terminated else ""}
{"Rating of participant 1" + str(self.p1_rate) if self.p1_rate is not None else ""}
{"Rating of participant 2" + str(self.p2_rate) if self.p2_rate is not None else ""}
{self.comments if self.comments is not None else ""}
"""
)
clean_text = ""
for line in reason_to_stop.split("\n"):
if line.strip():
clean_text += line + "\n"
return clean_text
class AgentAction(Message):
action_type: ActionType = Field(
description="whether to speak at this turn or choose to not do anything"
)
argument: str = Field(
description="the utterance if choose to speak, the expression or gesture if choose non-verbal communication, or the physical action if choose action"
)
def to_natural_language(self) -> str:
match self.action_type:
case "none":
return "did nothing"
case "speak":
return f"{self.argument}"
case "non-verbal communication":
return f"[{self.action_type}] {self.argument}"
case "action":
return f"[{self.action_type}] {self.argument}"
case "leave":
return "left the conversation"
ScriptInteractionReturnType = tuple[
list[list[tuple[str, str, Message]]], list[tuple[str, Message]]
]
class ScriptInteraction(Message):
interactions: str = Field(
description="""The interaction between the two participants in maximum 20 turns. Each turn is separated by a newline, and should only describe one agent. Following the structure:
Turn #x
[participant's name] [action] {argument for some actions}
You can use different types of actions, but only use one in each turn. You should move other information into argument part. Below shows a python code snippet of the format for each action type:
match self.action_type:
case "none":
return "did nothing"
case "speak":
return f'said: "{self.argument}"'
case "non-verbal communication":
return f"[{self.action_type}] {self.argument}"
case "action":
return f"[{self.action_type}] {self.argument}"
case "leave":
return "left the conversation"
For example, the following is acceptable:
Turn #x
Oliver Thompson said: "Hey Esmeralda, what's wrong? You seem upset."
Turn #x
Esmeralda Solis [action] moved closer
Turn #x
Oliver Thompson [non-verbal communication] smiled
Turn #x
Esmeralda Solis did nothing
Turn #x
Oliver Thompson left the conversation
Turn #x
Esmeralda Solis [action] leaned in and lowered her voice: "Sorry"
And the following is not acceptable:
Turn #1
Oliver Thompson [speak] said: "Hey Esmeralda, what's wrong? You seem upset."
Turn #1
Esmeralda Solis non-verbal communication moved closer
"""
)
def to_natural_language(self) -> str:
return self.interactions
def parse(
self, agent_names: list[str], background: str
) -> tuple[list[list[tuple[str, str, Message]]], list[tuple[str, Message]]]:
interaction = self.interactions
# print("Interaction: ", interaction)
lines = self.split_by_turn(interaction)
agent_results = []
results: list[list[tuple[str, str, Message]]] = [
[
(
"Environment",
name,
Observation(
last_turn=background,
turn_number=0,
available_actions=["none"],
),
)
for name in agent_names
]
]
for line_idx, line in enumerate(lines):
try:
res = self.parse_single_dialogue(line)
action: AgentAction = cast(AgentAction, res["action"])
argument: str = cast(str, res["argument"])
cast(int, res["turn"])
name: str = cast(str, res["name"])
parsed_action = AgentAction(action_type=action, argument=argument)
if name not in agent_names:
print(
f"The name of the agent, {name}, is not in the list of agent names, {agent_names}"
)
name = agent_names[
line_idx % 2
] # TODO Not sure what name to be set here
except Exception as e:
print(
f"Error when parsing the dialogue: {line}",
f"The error is: {e}",
)
raise e
parsed_action = AgentAction(action_type="none", argument="")
name = agent_names[line_idx % 2] # TODO same question as above
inactive_agent_name = (
agent_names[0] if name == agent_names[1] else agent_names[1]
)
results.append(
[
(
"Environment",
name,
Observation(
last_turn="environment is the agent",
turn_number=line_idx + 1,
available_actions=["none"],
),
)
for name in agent_names
]
+ [
(name, "Environment", parsed_action),
(
inactive_agent_name,
"Environment",
AgentAction(action_type="none", argument="did nothing"),
),
]
)
agent_results.append((name, parsed_action))
# print("Parsed agent results: ", agent_results)
return (results, agent_results) # type: ignore
def parse_single_dialogue(
self, dialogue: str
) -> dict[str, str | int | AgentAction | None]:
"""Parse a single dialogue string and return a dictionary with turn, name, action, and argument."""
# Match the turn number and name. Assume all agent name starts with a capital letter and is followed by lowercase letters
match_turn_name = re.match(
r"Turn #?(\d+):?\s*\n((?:[A-Z]['a-z]* ?)+)", dialogue
)
if not match_turn_name:
raise ValueError(
f"The dialogue does not match the expected format: {dialogue}"
)
return None # TODO Which should we use, return None or raise error?
turn, name = match_turn_name.groups()
action_content = dialogue[
len(match_turn_name.group(0)) :
].strip() # Extract the action content
# Check for different action types
if "did nothing" in action_content:
action, argument = "none", ""
elif match := re.match(r'said: "(.*?)"', action_content):
action, argument = "speak", match.group(1)
action, argument = action.strip(), argument.strip()
elif match := re.match(r'\[speak\] said: "(.*?)"', action_content):
action, argument = "speak", match.group(1)
action, argument = action.strip(), argument.strip()
elif match := re.match(
r"\[(non-verbal communication|action)\] (.*)", action_content
):
action, argument = match.groups()
elif "left the conversation" in action_content:
# TODO Make it more elegant to handle the situation of `left the conversation.`
action, argument = "leave", ""
else:
action, argument = None, None
parsed_item = {
"turn": int(turn),
"name": name.strip(),
"action": action,
"argument": argument,
}
return parsed_item
def split_by_turn(self, input_string: str) -> list[str]:
"""Split the input dialogue string by turn and return a list of dialogues."""
# Split using 'Turn #' as delimiter, but keep the delimiter in the results
dialogues = re.split(r"(?=Turn #?\d+)", input_string)
# Remove any empty strings and strip whitespace
dialogues = [dialogue.strip() for dialogue in dialogues if dialogue.strip()]
dialogues = [dialogue for dialogue in dialogues if dialogue.startswith("Turn")]
# Change from Turn #x to Turn (#)x (# is optional)
dialogues[-1] = "\n".join(
dialogues[-1].split("\n")[:2]
) # Discard further input in the last turn
for dialogue in dialogues:
# TODO this is current workaround for the issue of multiple agents in one turn
if len(dialogue.split("\n")) >= 3:
raise ValueError("Only one agent can act per turn.")
return dialogues
@staticmethod
def default_value_for_return_type() -> ScriptInteractionReturnType:
results_1: list[list[tuple[str, str, Message]]] = [
[
(
"Environment",
name,
Observation(
last_turn="Environment is the agent",
turn_number=0,
available_actions=["none"],
),
)
for name in ["none", "none"]
]
]
results_2: list[tuple[str, Message]] = [
("", AgentAction(action_type="none", argument=""))
]
return (results_1, results_2)
|