Spaces:
Running
Running
Jiaxian Guo
commited on
Commit
•
28a47b6
1
Parent(s):
581bd24
Add application file
Browse files- .DS_Store +0 -0
- agent.py +467 -0
- app.py +468 -0
- context.py +39 -0
- game_config/coup.json +7 -0
- game_config/leduc_limit.json +6 -0
- game_config/limit_holdem.json +6 -0
- model.py +99 -0
- person_config/GoodGuy.json +6 -0
- person_config/Persuader.json +6 -0
- requirements.txt +16 -0
- retriever.py +73 -0
- setting.py +139 -0
- util.py +127 -0
.DS_Store
ADDED
Binary file (10.2 kB). View file
|
|
agent.py
ADDED
@@ -0,0 +1,467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Reference: https://python.langchain.com/en/latest/use_cases/agent_simulations
|
2 |
+
|
3 |
+
import re
|
4 |
+
from datetime import datetime
|
5 |
+
from typing import List, Optional, Tuple
|
6 |
+
|
7 |
+
from langchain import LLMChain
|
8 |
+
from langchain.base_language import BaseLanguageModel
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.retrievers import TimeWeightedVectorStoreRetriever
|
11 |
+
from langchain.schema import Document
|
12 |
+
from pydantic import BaseModel, Field
|
13 |
+
from termcolor import colored
|
14 |
+
import util
|
15 |
+
import time
|
16 |
+
|
17 |
+
class SuspicionAgent(BaseModel):
|
18 |
+
"""A character with memory and innate characteristics."""
|
19 |
+
|
20 |
+
name: str
|
21 |
+
game_name: str
|
22 |
+
age: int
|
23 |
+
observation_rule: str
|
24 |
+
"""The traits of the character you wish not to change."""
|
25 |
+
status: str
|
26 |
+
"""Current activities of the character."""
|
27 |
+
llm: BaseLanguageModel
|
28 |
+
|
29 |
+
"""The retriever to fetch related memories."""
|
30 |
+
verbose: bool = False
|
31 |
+
|
32 |
+
reflection_threshold: Optional[float] = None
|
33 |
+
"""When the total 'importance' of memories exceeds the above threshold, stop to reflect."""
|
34 |
+
|
35 |
+
current_plan: List[str] = []
|
36 |
+
belief: str = ""
|
37 |
+
pattern: str = ""
|
38 |
+
long_belief: str = ""
|
39 |
+
counter_belief: str = ""
|
40 |
+
plan: str = ""
|
41 |
+
high_plan: str = ""
|
42 |
+
"""The current plan of the agent."""
|
43 |
+
|
44 |
+
memory: List = ['']
|
45 |
+
summary: str = "" #: :meta private:
|
46 |
+
summary_refresh_seconds: int = 3600 #: :meta private:
|
47 |
+
last_refreshed: datetime = Field(default_factory=datetime.now) #: :meta private:
|
48 |
+
|
49 |
+
memory_importance: float = 0.0 #: :meta private:
|
50 |
+
max_tokens_limit: int = 1200 #: :meta private:
|
51 |
+
read_observation: str = "" #: :meta private:
|
52 |
+
|
53 |
+
rule: str = "" #: :meta private:
|
54 |
+
class Config:
|
55 |
+
"""Configuration for this pydantic object."""
|
56 |
+
|
57 |
+
arbitrary_types_allowed = True
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
def add_long_memory(self, memory_content: str) -> List[str]:
|
65 |
+
"""Add an observation or memory to the agent's memory."""
|
66 |
+
self.memory.append(memory_content)
|
67 |
+
return self.memory
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
def planning_module(self, observation: str, recipient_name:str, previous_conversation: List[str] =None, belief: str =None, valid_action_list: List[str] = None, short_memory_summary:str = "",pattern:str = "",last_plan:str = "", mode: str = "second_tom") -> str:
|
73 |
+
"""Make Plans and Evaluate Plans."""
|
74 |
+
"""Combining these two modules together to save costs"""
|
75 |
+
|
76 |
+
if mode == 'second_tom':
|
77 |
+
prompt = PromptTemplate.from_template(
|
78 |
+
"You are the objective player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
|
79 |
+
+ " The game rule is: {rule} \n"
|
80 |
+
+'{pattern}\n'
|
81 |
+
+ " Your observation about the game status now is: {observation}\n"
|
82 |
+
+'{belief}\n'
|
83 |
+
+ " Understanding all given information, can you do following things:"
|
84 |
+
+ " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent. "
|
85 |
+
+ " Potential {recipient_name}'s actions (if release) and Estimate Winning/Lose/Draw Rate for Each Plan: From the perspective of {recipient_name} , please infer what the action {recipient_name} with probability (normalize to number 100% in total) would do when {recipient_name} holds different cards and then calculate the winning/lose/draw rates when {recipient_name} holds different cards step by step. At last, please calculate the overall winning/lose/draw rates for each plan step by step considering {recipient_name}'s behaviour pattern. Output in a tree-structure: "
|
86 |
+
+ "Output: Plan 1: If I execute plan1. "
|
87 |
+
"The winning/lose/draw rates when {recipient_name} holds card1: Based on {recipient_name}'s behaviour pattern, In the xx round, because {recipient_name} holds card1 (probability) and the combination with current public card (if release) (based on my belief on {recipient_name}), and if he sees my action, {recipient_name} will do action1 (probability) ( I actually hold card and the public card (if reveal) is , he holds card1 and the public card (if reveal), considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ), action2 (probability) (considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ),.. (normalize to number 100% in total); \n Overall (winning rate for his card1) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
|
88 |
+
"The winning/lose/draw rates when {recipient_name} holds card2: Based on {recipient_name}'s behaviour pattern, In the xx round, because {recipient_name} holds card2 (probability) and the combination with current public card (if release) (based on my belief on {recipient_name}) , and if he sees my action, he will do action1 (probability) (I actually hold card and the public card (if reveal) is , he holds card1 and the public card (if reveal), considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ).. action2 (probability) (normalize to number 100% in total) (considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ),.. ;..... continue ....\n Overall (winning rate for his card2) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
|
89 |
+
"...\n"
|
90 |
+
"Plan1 overall {initiator_name}'s Winning/Lose/Draw rates : the Winning rate (probability) for plan 1 is (winning rate for his card1) + (winning rate for his card2) + .. ; Lose rate (probability) for plan 1 : (lose rate for his card1) + (lose rate for his card2) + .. ; Draw Rate (probability) for plan 1 : (draw rate for his card1) + (draw rate for his card2) + ... ; (normalize to number 100% in total) for plan1 \n"
|
91 |
+
"Plan 2: If I execute plan2, The winning/lose/draw rates when {recipient_name} holds card1: Based on {recipient_name}'s behaviour pattern, In the xx round, if {recipient_name} holds card1 (probability) and the combination with current public card (if release), .. (format is similar with before ) ... continue .."
|
92 |
+
"Plan 3: .. Coninue ... "
|
93 |
+
+ " The number of payoffs for each plan: Understanding your current observation, each new plans, please infer the number of wininng/lose payoffs for each plan step by step, Output: Plan1: After the action, All chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules step by step) : After the action, All chips in the pot: If lose , the lose payoff would be: (Calculated by Lose Payoff Rules step by step). Plan2: After the action, All chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules step by step): After the action, All chips in the pot: If lose , the lose chips would be: (Calculated by Lose Payoff Rules step by step). If the number of my chips in pots have no change, please directly output them. \n"
|
94 |
+
+ " Estimate Expected Chips Gain for Each Plan: Understanding all the information and Estimate Winning/Lose/Draw Rate for Each Plan, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule) step by step"
|
95 |
+
+ " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n "
|
96 |
+
)
|
97 |
+
|
98 |
+
elif mode == 'first_tom':
|
99 |
+
prompt = PromptTemplate.from_template(
|
100 |
+
"You are the player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
|
101 |
+
+ " The game rule is: {rule} \n"
|
102 |
+
+ " {pattern} \n"
|
103 |
+
+ " Your observation about the game status now is: {observation}\n"
|
104 |
+
+ ' {belief}\n'
|
105 |
+
+ " Understanding all given information, can you do following things:"
|
106 |
+
+ " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent."
|
107 |
+
+ " Potential {recipient_name}'s actions and Estimate Winning/Lose/Draw Rate: From the perspective of {recipient_name}, please infer what the action {recipient_name} with probability (normalize to number 100% in total) would do when {recipient_name} holds different cards, and then calculate the winning/lose/draw rates when {recipient_name} holds different cards step by step. Output in a tree-structure: "
|
108 |
+
+ "Output: Based on {recipient_name}'s behaviour pattern and Analysis on {recipient_name}'s cards, "
|
109 |
+
"Winning/lose/draw rates when {recipient_name} holds card1 in the xx round,: if {recipient_name} holds card1 (probability) (based on my belief on {recipient_name}) with the public card (if release), {recipient_name} will do action1 (probability) (infer I will win/draw/lose step by step (considering Single Game Win/Draw/Lose Rule and my factual card analysis with public card (if release), his card analysis with public card (if release) step by step ), action2 (probability) (infer I will win/draw/lose step by step ),.. (normalize to number 100% in total); Overall (winning rate for his card1) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
|
110 |
+
"The winning/lose/draw rates when {recipient_name} holds card2 in the xx round,: If {recipient_name} holds card2 (probability) (based on my belief on {recipient_name}) with the public card (if release), he will do action1 (probability) (infer I will win/draw/lose (considering Single Game Win/Draw/Lose Rule and my factual card analysis with current public card (if release), his card analysis with current public card (if release)) step by step ).. action2 (probability) (normalize to number 100% in total) (infer I will win/draw/lose step by step ),.. based on {recipient_name}'s behaviour pattern;..... continue .... Overall (winning rate for his card2) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
|
111 |
+
"..."
|
112 |
+
"Overall {initiator_name}'s Winning/Lose/Draw rates : Based on the above analysis, the Winning rate (probability) is (winning rate for his card1) + (winning rate for his card2) + .. ; Lose rate (probability): (lose rate for his card1) + (lose rate for his card2) + .. ; Draw Rate (probability): (draw rate for his card1) + (draw rate for his card2) + ... ; (normalize to number 100% in total). \n"
|
113 |
+
+ " Potential believes about the number of winning and lose payoffs for each plan: Understanding the game rule, your current observation, previous actions summarization, each new plans, Winning Payoff Rule, Lose Payoff Rule, please infer your several believes about the number of chips in pots for each plan step by step, Output: Plan1: Chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules in the game rule) : After the action, If lose , the lose payoff would be: . Plan2: Chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules in the game rule): After the action, If lose , the lose chips would be: . If the number of my chips in pots have no change, please directly output them. "
|
114 |
+
+ " Estimate Expected Chips Gain for Each Plan: Understanding the game rule, plans, and your knowledge about the {game_name}, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule)., explain what is the results if you do not select the plan, and explain why is this final Expected Chips Gain reasonablely step by step? "
|
115 |
+
+ " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n\n "
|
116 |
+
)
|
117 |
+
else:
|
118 |
+
prompt = PromptTemplate.from_template(
|
119 |
+
"You are the player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
|
120 |
+
+ " The game rule is: {rule} \n"
|
121 |
+
+ " {pattern} \n"
|
122 |
+
+ " Your observation about the game status now is: {observation}\n"
|
123 |
+
+ " Understanding all given information, can you do following things:"
|
124 |
+
+ " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent."
|
125 |
+
+ " Estimate Winning/Lose/Draw Rate for Each Plan: Understanding the given information, and your knowledge about the {game_name}, please estimate the success rate of each step of each plan step by step and the overall average winning/lose/draw rate (normalize to number 100% in total) of each plan/strategy for the current game step by step following the templete: If I do plan1, because I hold card, the public information (if release) and Single Game Win/Draw/Lose Rule, I will win or Lose or draw (probability); ... continue .... Overall win/draw/lose rate: Based on the analysis, I can do the weighted average step by step to get that the overall weighted average winning rate is (probability), average lose rate is (probability), draw rate is (probability) (normalize to number 100% in total)\n "
|
126 |
+
+ " Potential believes about the number of winning and lose payoffs for each plan: Understanding the game rule, your current observation, previous actions summarization, each new plans, Winning Payoff Rule, Lose Payoff Rule, please infer your several believes about the number of chips in pots for each plan step by step, Output: Plan1: Chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules in the game rule) : After the action, Chips in the pot: If lose , the lose payoff would be: . Plan2: Chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules in the game rule): After the action, Chips in the pot: If lose , the lose chips would be: . If the number of my chips in pots have no change, please directly output them. "
|
127 |
+
+" Estimate Expected Chips Gain for Each Plan: Understanding the game rule, plans, and your knowledge about the {game_name}, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule)., explain what is the results if you do not select the plan, and explain why is this final Expected Chips Gain reasonablely step by step? "
|
128 |
+
+ " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n\n "
|
129 |
+
)
|
130 |
+
|
131 |
+
agent_summary_description = short_memory_summary
|
132 |
+
|
133 |
+
belief = self.belief if belief is None else belief
|
134 |
+
|
135 |
+
kwargs = dict(
|
136 |
+
|
137 |
+
recent_observations=agent_summary_description,
|
138 |
+
last_plan=last_plan,
|
139 |
+
belief=belief,
|
140 |
+
initiator_name=self.name,
|
141 |
+
pattern=pattern,
|
142 |
+
recipient_name=recipient_name,
|
143 |
+
observation=observation,
|
144 |
+
rule=self.rule,
|
145 |
+
game_name=self.game_name,
|
146 |
+
valid_action_list=valid_action_list
|
147 |
+
)
|
148 |
+
|
149 |
+
|
150 |
+
plan_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
|
151 |
+
self.plan = plan_prediction_chain.run(**kwargs)
|
152 |
+
self.plan = self.plan.strip()
|
153 |
+
|
154 |
+
return self.plan.strip()
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
def get_belief(self, observation: str, recipient_name: str,short_memory_summary:str,pattern:str = "",mode: str = "second_tom") -> str:
|
159 |
+
"""React to get a belief."""
|
160 |
+
if mode == 'second_tom':
|
161 |
+
prompt = PromptTemplate.from_template(
|
162 |
+
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
163 |
+
+ " The game rule is: {rule} \n"
|
164 |
+
+ " Your estimated judgement about the behaviour pattern of {recipient_name} and improved strategy is: {pattern} \n"
|
165 |
+
+ " Your observation now is: {observation}\n"
|
166 |
+
+ " Your current game progress summarization including actions and conversations with {recipient_name} is: {recent_observations}\n"
|
167 |
+
+ " Understanding the game rule, the cards you have, your observation, progress summarization in the current game, the estimated behaviour pattern of {recipient_name}, the potential guess pattern of {recipient_name} on you, and your knowledge about the {game_name}, can you do following things? "
|
168 |
+
+ " Analysis on my Cards: Understanding all given information and your knowledge about the {game_name}, please analysis what is your best combination and advantages of your cards in the current round step by step."
|
169 |
+
+ " Belief on {recipient_name}'s cards: Understanding all given information, please infer the probabilities about the cards of {recipient_name} (normalize to number 100% in total) objectively step by step."
|
170 |
+
"Output: {recipient_name} saw my history actions (or not) and then did action1 (probability) in the 1st round , ... continue..... Before this round, {recipient_name} say my history actions (or not) and did action1 (probability), because {recipient_name}'s behaviour pattern and the match with the public card (if release), {recipient_name} tends to have card1 (probability), card2 (probability) ..continue.. (normalize to number 100% in total)."
|
171 |
+
+ " Analysis on {recipient_name}'s Cards: Understanding all given information and your knowledge about the {game_name}, please analysis what is {recipient_name}'s best combination and advantages of {recipient_name}'s cards in the current round step by step."
|
172 |
+
+ " Potential {recipient_name}'s current believes about your cards: Understanding all given information and your knowledge about the {game_name}, If you were {recipient_name} (he can only observe my actions but cannot see my cards), please infer the {recipient_name}'s believes about your cards with probability (normalize to number 100% in total) step by step. Output: {agent_name} did action1 (probability) (after I did action or not) in the 1st round, , ... continue... {agent_name} did action1 (probability) (after I did action or not) in the current round,, from the perspective of {recipient_name}, {agent_name} tends to have card1 (probability), card2 (probability) ... (normalize to number 100% in total) ."
|
173 |
+
)
|
174 |
+
elif mode == 'first_tom':
|
175 |
+
prompt = PromptTemplate.from_template(
|
176 |
+
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
177 |
+
+ " The game rule is: {rule} \n"
|
178 |
+
+ " Your estimated judgement about the behaviour pattern of {recipient_name} and improved strategy is: {pattern} \n"
|
179 |
+
+ " Your observation now is: {observation}\n"
|
180 |
+
+ " Your current game progress summarization including actions and conversations with {recipient_name} is: {recent_observations}\n"
|
181 |
+
+ " Understanding the game rule, the cards you have, your observation, progress summarization in the current game, the estimated behaviour pattern of {recipient_name} on you, and your knowledge about the {game_name}, can you do following things? "
|
182 |
+
+ " Analysis on my Cards: Understanding all given information, please analysis what is your best combination and advantages of your cards in the current round step by step."
|
183 |
+
+ " Belief on {recipient_name}'s cards: Understanding all given information, please infer your the probabilities about the cards of {recipient_name} (normalize to number 100% total) step by step. Templete: In the 1st round, {recipient_name} did action1 (probability), ... continue... In the current round, {recipient_name} did action1 (probability), because {recipient_name}'s behaviour pattern and the match with the current public card (if release), he tends to have card1 (probability), card2 (probability) (normalize to number 100% in total). "
|
184 |
+
+ " Analysis on {recipient_name}'s Cards: Understanding all given information, please analysis what is {recipient_name}'s best combination and advantages of {recipient_name}'s cards in the current round step by step."
|
185 |
+
|
186 |
+
)
|
187 |
+
agent_summary_description = short_memory_summary
|
188 |
+
|
189 |
+
kwargs = dict(
|
190 |
+
agent_summary_description=agent_summary_description,
|
191 |
+
recent_observations=agent_summary_description,
|
192 |
+
agent_name=self.name,
|
193 |
+
pattern= pattern,
|
194 |
+
recipient_name=recipient_name,
|
195 |
+
observation=observation,
|
196 |
+
game_name=self.game_name,
|
197 |
+
rule=self.rule
|
198 |
+
|
199 |
+
)
|
200 |
+
print(recipient_name)
|
201 |
+
|
202 |
+
belief_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
|
203 |
+
self.belief = belief_prediction_chain.run(**kwargs)
|
204 |
+
self.belief = self.belief.strip()
|
205 |
+
return self.belief.strip()
|
206 |
+
|
207 |
+
|
208 |
+
def get_pattern(self, recipient_name: str,game_pattern: str='', last_k:int=20,short_summarization:str='',mode:str='second_tom') -> str:
|
209 |
+
"""React to get a belief."""
|
210 |
+
|
211 |
+
if mode == 'second_tom':
|
212 |
+
prompt = PromptTemplate.from_template(
|
213 |
+
"You are the objective player behind a NPC character called {agent_name}, and you are playing {game_name} with {recipient_name}. \n"
|
214 |
+
+ " The game rule is: {rule} \n"
|
215 |
+
+ " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
|
216 |
+
+ " {recipient_name}'s game pattern: Understanding all given information and your understanding about the {game_name}, please infer and estimate as many as possible reasonable {recipient_name}'s game behaviour pattern/preferences for each card he holds and each round with probability (normalize to number 100\% in total for each pattern item) and please also infer advantages of his card, and analysis how the {recipient_name}'s behaviour pattern/preferences are influenced by my actions when he holds different cards step by step. Output as a tree-structure "
|
217 |
+
+ "Output: When {recipient_name} holds card1 and the combination of public card (if release): if {recipient_name} is the first to act, he would like to do action1 (probabilities), action2 (probabilities) ... continue .. If {recipient_name} sees the action1/action2/action3 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) ... continue ... (normalize to number 100% in total), if {recipient_name} sees the action2 of the opponent or not, ... continue ..(more patterns with different actions).. in the 1st round, ; If {recipient_name} sees the action1 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) ... continue... (normalize to number 100% in total), ... continue ..(more patterns)..In the 2nd round,;"
|
218 |
+
"When {recipient_name} holds card2 and combination of public card (if release): if {recipient_name} is the first to act, he would like to do action1 (probabilities), action2 (probabilities) ... continue .. If {recipient_name} sees the action1 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) .. continue ... (normalize to number 100% in total)...in the 1st round,; .. continue ..(more patterns with different actions).in the 2nd round .. "
|
219 |
+
" (more patterns with different cards).. continue.."
|
220 |
+
+ " {recipient_name}'s guess on my game pattern: Understanding all given information, please infer several reasonable believes about my game pattern/preference when holding different cards from the perspective of {recipient_name} (please consider the advantages of the card, actions and the the match with the public card (if release)) for every round of the game in detail as a tree-structure output step by step"
|
221 |
+
+ "Output: In the 1st round, When name holds card1 with public card (if release), he would like to do (probabilities), action2 (probabilities) (normalize to number 100% in total) o ... continue .. and then do action ...;"
|
222 |
+
"When name holds card2 with public card (if release), ... "
|
223 |
+
" .. continue.."
|
224 |
+
+ " Strategy Improvement: Understanding the above information, think about what strategies I can adopt to exploit the game pattern of {recipient_name} and {recipient_name}'s guess on my game pattern for winning {recipient_name} in the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
|
225 |
+
"When I hold card and the public card (if release), and see the action of the opponent, I would like to do action1; ... "
|
226 |
+
)
|
227 |
+
elif mode == 'first_tom':
|
228 |
+
prompt = PromptTemplate.from_template(
|
229 |
+
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
230 |
+
+ " The game rule is: {rule} \n"
|
231 |
+
+ " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
|
232 |
+
+ " Please understand the game rule, previous all game history and your knowledge about the {game_name}, can you do following things for future games? "
|
233 |
+
+ " {recipient_name}'s game pattern: Understanding all given information, please infer all possible reasonable {recipient_name}'s game pattern/preferences for each card he holds and each round with probability (normalize to number 100\% in total for each pattern item) for every round of the game as a tree-structure output step by step "
|
234 |
+
+ "Output: In the 1st round, when name holds card1 and the public card (if release), he would like to do action (probabilities); when name holds card2 and the public card (if release), he would like to do action (probabilities), ... continue.. In the 2nd round, when name holds card1 and the public card (if release), .(similar with before).. continue. "
|
235 |
+
+ " Number of chips reason: Think about why you can have these chips in all previous games step by step. "
|
236 |
+
+ " Reflex: Reflex which your actions are right or wrong in previous games to win or Lose conrete chips step by step (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions) "
|
237 |
+
+ " Strategy Improvement: Understanding the above information, think about what strategies I can adopt to exploit the game pattern of {recipient_name} for winning {recipient_name} in the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
|
238 |
+
)
|
239 |
+
else:
|
240 |
+
prompt = PromptTemplate.from_template(
|
241 |
+
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
242 |
+
+ " The game rule is: {rule} \n"
|
243 |
+
+ " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
|
244 |
+
+ " Please understand the game rule, previous all game history and your knowledge about the {game_name}, can you do following things for future games? "
|
245 |
+
+ " Number of chips reason: Think about why you can have these chips in all previous games step by step. "
|
246 |
+
+ " Reflex: Reflex which your actions are right or wrong in previous games to win or Lose conrete chips step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions) "
|
247 |
+
+ " Strategy Improvement: Understanding the above information, think about what strategies I need to adopt to win {recipient_name} for the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
|
248 |
+
)
|
249 |
+
reflection_chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
|
250 |
+
long_memory = self.memory[-last_k:]
|
251 |
+
long_memory_str = "\n\n".join([o for o in long_memory])
|
252 |
+
|
253 |
+
kwargs = dict(
|
254 |
+
long_memory=long_memory_str,
|
255 |
+
game_pattern=game_pattern,
|
256 |
+
agent_name=self.name,
|
257 |
+
recipient_name=recipient_name,
|
258 |
+
game_name=self.game_name,
|
259 |
+
rule=self.rule
|
260 |
+
|
261 |
+
)
|
262 |
+
# print(kwargs)
|
263 |
+
|
264 |
+
self.long_belief = reflection_chain.run(**kwargs)
|
265 |
+
self.long_belief = self.long_belief.strip()
|
266 |
+
return self.long_belief.strip()
|
267 |
+
|
268 |
+
|
269 |
+
|
270 |
+
def get_summarization(self, recipient_name: str,game_memory: str, opponent_name:str) -> str:
|
271 |
+
"""Get a long memory summarization to save costs."""
|
272 |
+
prompt = PromptTemplate.from_template(
|
273 |
+
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
274 |
+
+ " The game rule is: {rule} \n"
|
275 |
+
+ " The observation conversion rules are: {observation_rule}\n"
|
276 |
+
+ " One game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
|
277 |
+
+ " Understanding the game rule, observation conversion rules and game history and your knowledge about the {game_name}, can you do following things:"
|
278 |
+
+ " History summarization: summary the game history with action, observation, and results information? using the templete, and respond shortly: In the first round of first game, name holds card1 does action .... continue ..."
|
279 |
+
+ "{opponent_name}'s card reasoning: If the card of {opponent_name} is not available, because {agent_name}'s card is xx and public card (if release) is xxx, and {opponent_name} behaviours are xx, the current game result is xx, please infer {opponent_name}'s card with probability (100% in total) with your understanding about the above all information confidently step by step. \n"
|
280 |
+
)
|
281 |
+
reflection_chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
|
282 |
+
kwargs = dict(
|
283 |
+
observation_rule=self.observation_rule,
|
284 |
+
long_memory=game_memory,
|
285 |
+
agent_name=self.name,
|
286 |
+
recipient_name=recipient_name,
|
287 |
+
opponent_name=opponent_name,
|
288 |
+
# observation=observation,
|
289 |
+
game_name=self.game_name,
|
290 |
+
rule=self.rule
|
291 |
+
|
292 |
+
)
|
293 |
+
# print(kwargs)
|
294 |
+
|
295 |
+
self.long_belief = reflection_chain.run(**kwargs)
|
296 |
+
self.long_belief = self.long_belief.strip()
|
297 |
+
return self.long_belief.strip()
|
298 |
+
|
299 |
+
|
300 |
+
def get_short_memory_summary(self, observation: str, recipient_name: str,short_memory_summary:str) -> str:
|
301 |
+
"""React to get a belief."""
|
302 |
+
prompt = PromptTemplate.from_template(
|
303 |
+
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
304 |
+
+ " The game rule is: {rule} \n"
|
305 |
+
+ " Your current observation is: {observation}\n"
|
306 |
+
+ " The current game history including previous action, observations and conversation is: {agent_summary_description}\n"
|
307 |
+
+ " Based on the game rule, your observation and your knowledge about the {game_name}, please summarize the current history. Output as a tree-structure, and respond shortly: "
|
308 |
+
+ " In the first round, name does action, and say xxx .... continue ..."
|
309 |
+
)
|
310 |
+
|
311 |
+
agent_summary_description = short_memory_summary
|
312 |
+
|
313 |
+
kwargs = dict(
|
314 |
+
agent_summary_description=agent_summary_description,
|
315 |
+
recent_observations=agent_summary_description,
|
316 |
+
agent_name=self.name,
|
317 |
+
recipient_name=recipient_name,
|
318 |
+
observation=observation,
|
319 |
+
game_name=self.game_name,
|
320 |
+
rule=self.rule
|
321 |
+
|
322 |
+
)
|
323 |
+
|
324 |
+
belief_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
|
325 |
+
self.belief = belief_prediction_chain.run(**kwargs)
|
326 |
+
self.belief = self.belief.strip()
|
327 |
+
return self.belief.strip()
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
def convert_obs(self, observation: str, recipient_name: str, user_index: str, valid_action_list:str) -> str:
|
332 |
+
"""React to get a belief."""
|
333 |
+
prompt = PromptTemplate.from_template(
|
334 |
+
"You are the player behind a NPC character called {agent_name} with player index {user_index}, and you are playing the board game {game_name} with {recipient_name}. \n"
|
335 |
+
+ " The game rule is: {rule} \n"
|
336 |
+
+ " Your observation now is: {observation}\n"
|
337 |
+
+ " You will receive a valid action list you can perform in this turn \n"
|
338 |
+
+ " Your valid action list is: {valid_action_list}\n"
|
339 |
+
+ " The observation conversion rules are: {observation_rule}\n"
|
340 |
+
+ " Please convert {observation} and {valid_action_list} to the readable text based on the observation conversion rules and your knowledge about the {game_name} (respond shortly).\n\n"
|
341 |
+
)
|
342 |
+
kwargs = dict(
|
343 |
+
user_index=user_index,
|
344 |
+
agent_name=self.name,
|
345 |
+
rule=self.rule,
|
346 |
+
recipient_name=recipient_name,
|
347 |
+
observation=observation,
|
348 |
+
valid_action_list=valid_action_list,
|
349 |
+
game_name=self.game_name,
|
350 |
+
observation_rule=self.observation_rule
|
351 |
+
)
|
352 |
+
obs_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
|
353 |
+
self.read_observation = obs_prediction_chain.run(**kwargs)
|
354 |
+
self.read_observation = self.read_observation.strip()
|
355 |
+
return self.read_observation
|
356 |
+
|
357 |
+
|
358 |
+
|
359 |
+
def action_decision(self, observation: str, valid_action_list: List[str], promp_head: str, act: str = None,short_memory_summary:str="") -> Tuple[str,str]:
|
360 |
+
"""React to a given observation."""
|
361 |
+
"""React to a given observation."""
|
362 |
+
prompt = PromptTemplate.from_template(
|
363 |
+
promp_head
|
364 |
+
+ "\nYour plan is: {plan}"
|
365 |
+
+ "\n Based on the plan, please select the next action from the available action list: {valid_action_list} (Just one word) and say something to the opponent player to bluff or confuse him or keep silent to finally win the whole game and reduce the risk of your action (respond sentence only). Please respond them and split them by |"
|
366 |
+
+ "\n\n"
|
367 |
+
)
|
368 |
+
|
369 |
+
agent_summary_description = short_memory_summary
|
370 |
+
|
371 |
+
kwargs = dict(
|
372 |
+
agent_summary_description= agent_summary_description,
|
373 |
+
# current_time=current_time_str,
|
374 |
+
# relevant_memories=relevant_memories_str,
|
375 |
+
agent_name= self.name,
|
376 |
+
game_name=self.game_name,
|
377 |
+
observation= observation,
|
378 |
+
agent_status= self.status,
|
379 |
+
valid_action_list = valid_action_list,
|
380 |
+
plan = self.plan,
|
381 |
+
belief = self.belief,
|
382 |
+
act = act
|
383 |
+
)
|
384 |
+
action_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
|
385 |
+
|
386 |
+
result = action_prediction_chain.run(**kwargs)
|
387 |
+
if "|" in result:
|
388 |
+
result,result_comm = result.split("|",1)
|
389 |
+
else:
|
390 |
+
result_comm = ""
|
391 |
+
return result.strip(),result_comm.strip()
|
392 |
+
|
393 |
+
def make_act(self, observation: str,opponent_name: str, player_index:int,valid_action_list: List, verbose_print:bool,game_idx:int,round:int,bot_short_memory:List, bot_long_memory:List, console,log_file_name='', mode='second_tom') -> Tuple[bool, str]:
|
394 |
+
readable_text_amy_obs = self.convert_obs(observation, opponent_name, player_index, valid_action_list)
|
395 |
+
if verbose_print:
|
396 |
+
util.get_logging(logger_name=log_file_name + '_obs',
|
397 |
+
content={str(game_idx + 1) + "_" + str(round): {"raw_obs": observation,
|
398 |
+
"readable_text_obs": readable_text_amy_obs}})
|
399 |
+
console.print('readable_text_obs: ', style="red")
|
400 |
+
print(readable_text_amy_obs)
|
401 |
+
time.sleep(0)
|
402 |
+
if len(bot_short_memory[player_index]) == 1:
|
403 |
+
short_memory_summary = f'{game_idx+1}th Game Start \n'+readable_text_amy_obs
|
404 |
+
else:
|
405 |
+
short_memory_summary = self.get_short_memory_summary(observation=readable_text_amy_obs, recipient_name=opponent_name,short_memory_summary='\n'.join(bot_short_memory[player_index]))
|
406 |
+
|
407 |
+
if log_file_name is not None:
|
408 |
+
util.get_logging(logger_name=log_file_name + '_short_memory',
|
409 |
+
content={str(game_idx + 1) + "_" + str(round): {
|
410 |
+
"raw_short_memory": '\n'.join(bot_short_memory[player_index]),
|
411 |
+
"short_memory_summary": short_memory_summary}})
|
412 |
+
if verbose_print:
|
413 |
+
console.print('short_memory_summary: ', style="yellow")
|
414 |
+
print(short_memory_summary)
|
415 |
+
|
416 |
+
time.sleep(0)
|
417 |
+
if round <= 1:
|
418 |
+
self.pattern = self.get_pattern(opponent_name,'',short_summarization=short_memory_summary,mode=mode)
|
419 |
+
if log_file_name is not None:
|
420 |
+
util.get_logging(logger_name=log_file_name + '_pattern_model',
|
421 |
+
content={str(game_idx + 1) + "_" + str(round): self.pattern})
|
422 |
+
console.print('pattern: ', style="blue")
|
423 |
+
print(self.pattern)
|
424 |
+
|
425 |
+
time.sleep(0)
|
426 |
+
print(opponent_name)
|
427 |
+
|
428 |
+
if mode == 'second_tom' or mode == 'first_tom':
|
429 |
+
belief = self.get_belief(readable_text_amy_obs,opponent_name,short_memory_summary=short_memory_summary,pattern=self.pattern,mode=mode)
|
430 |
+
if verbose_print:
|
431 |
+
console.print(self.name + " belief: " , style="deep_pink3")
|
432 |
+
print(self.name + " belief: " + str(belief))
|
433 |
+
util.get_logging(logger_name=log_file_name + '_belief',
|
434 |
+
content={str(game_idx + 1) + "_" + str(round): {
|
435 |
+
"belief": str(belief)}})
|
436 |
+
else:
|
437 |
+
belief = ''
|
438 |
+
|
439 |
+
time.sleep(0)
|
440 |
+
plan = self.planning_module(readable_text_amy_obs,opponent_name, belief=belief,valid_action_list=valid_action_list,short_memory_summary=short_memory_summary,pattern=self.pattern,last_plan='', mode=mode)
|
441 |
+
if verbose_print:
|
442 |
+
console.print(self.name + " plan: " , style="orchid")
|
443 |
+
print(self.name + " plan: " + str(plan))
|
444 |
+
util.get_logging(logger_name=log_file_name + '_plan',
|
445 |
+
content={str(game_idx + 1) + "_" + str(round): {
|
446 |
+
"plan": str(plan)}})
|
447 |
+
time.sleep(0)
|
448 |
+
promp_head = ''
|
449 |
+
act, comm = self.action_decision(readable_text_amy_obs, valid_action_list, promp_head,short_memory_summary=short_memory_summary)
|
450 |
+
if log_file_name is not None:
|
451 |
+
util.get_logging(logger_name= log_file_name + '_act',
|
452 |
+
content={str(game_idx + 1) + "_" + str(round): {
|
453 |
+
"act": str(act), "talk_sentence": str(comm)}})
|
454 |
+
|
455 |
+
while act not in valid_action_list:
|
456 |
+
print('Action + ', str(act), ' is not a valid action in valid_action_list, please try again.\n')
|
457 |
+
promp_head += 'Action {act} is not a valid action in {valid_action_list}, please try again.\n'
|
458 |
+
act, comm = self.action_decision( readable_text_amy_obs, valid_action_list, promp_head,act)
|
459 |
+
print(self.name + " act: " + str(act))
|
460 |
+
print(comm)
|
461 |
+
|
462 |
+
bot_short_memory[player_index].append(f"{self.name} have the observation {readable_text_amy_obs}, try to take action: {act} and say {comm} to {opponent_name}")
|
463 |
+
bot_short_memory[((player_index + 1)%2)].append(f"{self.name} try to take action: {act} and say {comm} to {opponent_name}")
|
464 |
+
|
465 |
+
bot_long_memory[player_index].append(
|
466 |
+
f"{self.name} have the observation {observation}, try to take action: {act} and say {comm} to {opponent_name}")
|
467 |
+
return act,comm,bot_short_memory,bot_long_memory
|
app.py
ADDED
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
import inquirer
|
9 |
+
import typer
|
10 |
+
from rich.console import Console
|
11 |
+
from rich.prompt import IntPrompt, Prompt, Confirm
|
12 |
+
import argparse
|
13 |
+
import logging
|
14 |
+
|
15 |
+
import util
|
16 |
+
from model import get_all_embeddings, get_all_llms
|
17 |
+
from setting import Settings, get_all_model_settings, load_model_setting
|
18 |
+
# import Settings, get_all_model_settings, load_model_setting
|
19 |
+
from model import agi_init
|
20 |
+
import gym
|
21 |
+
from retriever import (
|
22 |
+
create_new_memory_retriever,
|
23 |
+
)
|
24 |
+
import gym_coup
|
25 |
+
import random
|
26 |
+
from rlcard.utils import set_seed
|
27 |
+
import rlcard
|
28 |
+
from rlcard import models
|
29 |
+
from rlcard.models import leducholdem_rule_models
|
30 |
+
|
31 |
+
|
32 |
+
#Inferenec function
|
33 |
+
def predict(openai_gpt4_key, game_selection, action, inputs, top_p, temperature, chat_counter, dialogue_chatbot=[], system_chatbot=[], history=[]):
|
34 |
+
|
35 |
+
verified, settings, env, ctx = history['verified'], history['settings'], history['env'], history['ctx']
|
36 |
+
bot_long_memory, bot_short_memory = history['bot_long_memory'], history['bot_short_memory']
|
37 |
+
agents_num, chips, user_index, game_idx, mode = history['agents_num'], history['chips'], history['user_index'], history['game_idx'], history['mode']
|
38 |
+
|
39 |
+
status_message = ''
|
40 |
+
valid_actions = gr.Dropdown.update(choices=[], value=None)
|
41 |
+
|
42 |
+
if env is None: #settings.model.llm.openai_api_key is None:
|
43 |
+
if not verified:
|
44 |
+
res = util.verify_openai_token(openai_gpt4_key)
|
45 |
+
if res != "OK":
|
46 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, res
|
47 |
+
history['verified'] = True
|
48 |
+
|
49 |
+
if game_selection == '' or game_selection is None:
|
50 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, "Please select a game."
|
51 |
+
|
52 |
+
settings = Settings()
|
53 |
+
settings.model = load_model_setting("openai-gpt-4-0613")
|
54 |
+
|
55 |
+
#settings.model.llm.openai_api_key = None
|
56 |
+
#settings.model.embedding.openai_api_key = None
|
57 |
+
|
58 |
+
settings.model.llm.openai_api_key = openai_gpt4_key
|
59 |
+
settings.model.embedding.openai_api_key = openai_gpt4_key
|
60 |
+
res = util.verify_model_initialization(settings)
|
61 |
+
|
62 |
+
if res != "OK":
|
63 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, res
|
64 |
+
else:
|
65 |
+
# read agents configs
|
66 |
+
agent1_file = 'person_config/Persuader.json'
|
67 |
+
agent1_config = util.load_json(Path(agent1_file))
|
68 |
+
agent1_config["path"] = agent1_file
|
69 |
+
|
70 |
+
agent2_file = 'person_config/GoodGuy.json'
|
71 |
+
agent2_config = util.load_json(Path(agent2_file))
|
72 |
+
agent2_config["path"] = agent2_file
|
73 |
+
|
74 |
+
agent_configs = [agent1_config, agent2_config]
|
75 |
+
agent_names = [agent1_config["name"], agent2_config["name"]]
|
76 |
+
if game_selection == 'coup':
|
77 |
+
game_config_file = 'game_config/coup.json'
|
78 |
+
elif game_selection == 'leduc-holdem':
|
79 |
+
game_config_file = 'game_config/leduc_limit.json'
|
80 |
+
elif game_selection == 'limit-holdem':
|
81 |
+
game_config_file = 'game_config/limit_holdem.json'
|
82 |
+
|
83 |
+
game_config = util.load_json(Path(game_config_file))
|
84 |
+
game_config["path"] = game_config_file
|
85 |
+
|
86 |
+
user_index = 1
|
87 |
+
console = Console()
|
88 |
+
ctx = agi_init(agent_configs, game_config, console, settings, user_index)
|
89 |
+
|
90 |
+
os.environ["OPENAI_API_KEY"] = openai_gpt4_key
|
91 |
+
print(game_selection)
|
92 |
+
if game_selection != 'coup':
|
93 |
+
env = rlcard.make(game_selection)
|
94 |
+
else:
|
95 |
+
env = gym.make('coup-v0')
|
96 |
+
env.reset()
|
97 |
+
|
98 |
+
history['env'] = env
|
99 |
+
history['ctx'] = ctx
|
100 |
+
|
101 |
+
#valid_actions['label'] = 'hello there'
|
102 |
+
|
103 |
+
for i in range(agents_num):
|
104 |
+
bot_short_memory.append([f'{game_idx+1}th Game Start'])
|
105 |
+
bot_long_memory.append([f'{game_idx+1}th Game Start'])
|
106 |
+
|
107 |
+
status_message = 'Verified.'
|
108 |
+
if game_selection != 'coup':
|
109 |
+
valid_actions = f"{env.get_state(env.get_player_id())['raw_legal_actions']}"
|
110 |
+
else:
|
111 |
+
valid_action_list = env.get_valid_actions(text=True)
|
112 |
+
|
113 |
+
# check if opponent makes move first
|
114 |
+
if game_selection != 'coup':
|
115 |
+
idx = env.get_player_id()
|
116 |
+
else:
|
117 |
+
idx = env.game.whose_action
|
118 |
+
if idx != user_index:
|
119 |
+
amy = ctx.robot_agents[idx]
|
120 |
+
if game_selection != 'coup':
|
121 |
+
amy_obs = env.get_state(env.get_player_id())['raw_obs']
|
122 |
+
amy_index = env.get_player_id()
|
123 |
+
amy_obs['game_num'] = game_idx+1
|
124 |
+
amy_obs['rest_chips'] = chips[idx]
|
125 |
+
amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
|
126 |
+
valid_action_list = env.get_state(env.get_player_id())['raw_legal_actions']
|
127 |
+
else:
|
128 |
+
amy_obs =env.get_obs(text=True,p2_view = (idx==1))
|
129 |
+
amy_index = env.game.whose_action
|
130 |
+
valid_action_list = env.get_valid_actions(text=True)
|
131 |
+
|
132 |
+
|
133 |
+
opponent_name = ctx.robot_agents[(idx+1)%agents_num].name
|
134 |
+
|
135 |
+
act, comm, bot_short_memory, bot_long_memory = amy.make_act(amy_obs, opponent_name, amy_index, valid_action_list, verbose_print=False,
|
136 |
+
game_idx=game_idx, round=0, bot_short_memory=bot_short_memory, bot_long_memory=bot_long_memory, console=Console(),
|
137 |
+
log_file_name=None, mode=mode)
|
138 |
+
if game_selection != 'coup':
|
139 |
+
env.step(act, raw_action=True)
|
140 |
+
else:
|
141 |
+
env.step(act)
|
142 |
+
win_message = env.game.call_system_info()
|
143 |
+
# print(win_message)
|
144 |
+
if win_message is not None:
|
145 |
+
print(win_message)
|
146 |
+
win_message = win_message.replace('Player 0',ctx.robot_agents[0].name)
|
147 |
+
win_message = win_message.replace('Player 1',ctx.robot_agents[1].name)
|
148 |
+
|
149 |
+
win_message = win_message.replace('I',ctx.robot_agents[idx].name)
|
150 |
+
win_message = win_message.replace('the opponent',ctx.robot_agents[(idx + 1) % agents_num].name)
|
151 |
+
|
152 |
+
bot_short_memory.append(win_message)
|
153 |
+
bot_long_memory.append(win_message)
|
154 |
+
|
155 |
+
dialogue_chatbot.append((None, comm))
|
156 |
+
system_chatbot.append((None, f'Suspicion-Agent action: {act}'))
|
157 |
+
|
158 |
+
|
159 |
+
# get user observation
|
160 |
+
idx = user_index #env.get_player_id()
|
161 |
+
if game_selection != 'coup':
|
162 |
+
amy_obs = env.get_state(idx)['raw_obs']
|
163 |
+
#amy_obs['game_num'] = game_idx+1
|
164 |
+
amy_obs['rest_chips'] = chips[idx]
|
165 |
+
amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
|
166 |
+
|
167 |
+
valid_actions = env.get_state(idx)['raw_legal_actions']
|
168 |
+
else:
|
169 |
+
amy_obs =env.get_obs(text=True,p2_view = (idx==1))
|
170 |
+
valid_actions = env.get_valid_actions(text=True)
|
171 |
+
|
172 |
+
if game_selection != 'coup':
|
173 |
+
game_state_string = ""
|
174 |
+
for key, value in amy_obs.items():
|
175 |
+
if key != 'legal_actions':
|
176 |
+
game_state_string += f"{key}: {value}\n"
|
177 |
+
|
178 |
+
system_chatbot.append((f'Game state:\n{game_state_string}', None))
|
179 |
+
else:
|
180 |
+
system_chatbot.append((f'Game state:\n{amy_obs}', None))
|
181 |
+
#system_chatbot.append((f'{amy_obs}', None))
|
182 |
+
|
183 |
+
valid_actions = gr.Dropdown.update(choices=valid_actions, value=None)
|
184 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
|
185 |
+
#else:
|
186 |
+
# return system_chatbot, dialogue_chatbot, history, chat_counter, "Already Verified."
|
187 |
+
|
188 |
+
# check if game is over
|
189 |
+
if game_selection != 'coup':
|
190 |
+
game_over = env.is_over()
|
191 |
+
else:
|
192 |
+
game_over = env.game.game_over
|
193 |
+
if game_over:
|
194 |
+
status_message = "Game ended."
|
195 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
|
196 |
+
|
197 |
+
if action is None:
|
198 |
+
status_message = "No action received."
|
199 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
|
200 |
+
if game_selection != 'coup':
|
201 |
+
if action not in env.get_state(env.get_player_id())['raw_legal_actions']:
|
202 |
+
status_message = "Not a valid action. Please enter a valid action."
|
203 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
|
204 |
+
else:
|
205 |
+
if action not in env.get_valid_actions(text=True):
|
206 |
+
status_message = "Not a valid action. Please enter a valid action."
|
207 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
|
208 |
+
|
209 |
+
# message can be empty
|
210 |
+
#if inputs is None or inputs == "":
|
211 |
+
# status_message += " No message received."
|
212 |
+
# return system_chatbot, dialogue_chatbot, history, chat_counter, status_message
|
213 |
+
|
214 |
+
|
215 |
+
# user takes action
|
216 |
+
if game_selection != 'coup':
|
217 |
+
env_state = env.get_state(env.get_player_id())['raw_obs']
|
218 |
+
else:
|
219 |
+
env_state = env.get_obs(text=True,p2_view = (env.game.whose_action==1))
|
220 |
+
|
221 |
+
# here action comes from user input
|
222 |
+
#act,_ = rule_model.eval_step(env.get_state(env.get_player_id()))
|
223 |
+
act = action #env.get_state(env.get_player_id())['raw_legal_actions'][0]
|
224 |
+
if game_selection != 'coup':
|
225 |
+
bot_short_memory[(user_index + 1) % agents_num].append(
|
226 |
+
f"The valid action list of {ctx.robot_agents[user_index].name} is {env.get_state(env.get_player_id())['raw_legal_actions']}, and he tries to take action: {act}. He said, {inputs}")
|
227 |
+
# bot_short_memory[(args.user_index) % args.agents_num].append(
|
228 |
+
# f"{ctx.robot_agents[args.user_index].name} have the observation: {env.get_state(env.get_player_id())['raw_obs']}, and try to take action: {act}.")
|
229 |
+
bot_long_memory[(user_index) % agents_num].append(
|
230 |
+
f"{ctx.robot_agents[user_index].name} have the observation: {env.get_state(env.get_player_id())['raw_obs']}, and try to take action: {act}.")
|
231 |
+
# bot_long_memory[(args.user_index) % args.agents_num].append(
|
232 |
+
# f"{ctx.robot_agents[args.user_index].name} try to take action: {act}.")
|
233 |
+
else:
|
234 |
+
bot_short_memory[(user_index + 1) % agents_num].append(
|
235 |
+
f"The valid action list of {ctx.robot_agents[user_index].name} is {env.get_valid_actions(text=True)}, and he tries to take action: {act}. He said, {inputs}")
|
236 |
+
# bot_short_memory[(args.user_index) % args.agents_num].append(
|
237 |
+
# f"{ctx.robot_agents[args.user_index].name} have the observation: {env.get_state(env.get_player_id())['raw_obs']}, and try to take action: {act}.")
|
238 |
+
bot_long_memory[(user_index) % agents_num].append(
|
239 |
+
f"{ctx.robot_agents[user_index].name} have the observation: {env.get_obs(text=True,p2_view = (env.game.whose_action==1))}, and try to take action: {act}.")
|
240 |
+
# bot_long_memory[(args.user_index) % args.agents_num].append(
|
241 |
+
# f"{ctx.robot_agents[args.user_index].name} try to take action: {act}.")
|
242 |
+
|
243 |
+
if game_selection != 'coup':
|
244 |
+
env.step(act, raw_action=True)
|
245 |
+
else:
|
246 |
+
env.step(act)
|
247 |
+
|
248 |
+
comm = None
|
249 |
+
if game_selection != 'coup':
|
250 |
+
game_over = env.is_over()
|
251 |
+
else:
|
252 |
+
game_over = env.game.game_over
|
253 |
+
if not game_over:
|
254 |
+
# opponent move
|
255 |
+
# bot reaction
|
256 |
+
|
257 |
+
if game_selection != 'coup':
|
258 |
+
idx = env.get_player_id()
|
259 |
+
amy = ctx.robot_agents[idx]
|
260 |
+
amy_index = env.get_player_id()
|
261 |
+
amy_obs = env.get_state(env.get_player_id())['raw_obs']
|
262 |
+
amy_obs['game_num'] = game_idx+1
|
263 |
+
amy_obs['rest_chips'] = chips[idx]
|
264 |
+
amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
|
265 |
+
valid_action_list = env.get_state(env.get_player_id())['raw_legal_actions']
|
266 |
+
else:
|
267 |
+
idx = env.game.whose_action
|
268 |
+
amy = ctx.robot_agents[idx]
|
269 |
+
amy_index = env.game.whose_action
|
270 |
+
amy_obs = env.get_obs(text=True,p2_view = (idx==1))
|
271 |
+
valid_action_list = env.get_valid_actions(text=True)
|
272 |
+
opponent_name = ctx.robot_agents[(idx+1)%agents_num].name
|
273 |
+
|
274 |
+
act, comm, bot_short_memory, bot_long_memory = amy.make_act(amy_obs, opponent_name, amy_index, valid_action_list, verbose_print=False,
|
275 |
+
game_idx=game_idx, round=0, bot_short_memory=bot_short_memory, bot_long_memory=bot_long_memory, console=Console(),
|
276 |
+
log_file_name=None, mode=mode)
|
277 |
+
if game_selection != 'coup':
|
278 |
+
env.step(act, raw_action=True)
|
279 |
+
else:
|
280 |
+
env.step(act)
|
281 |
+
|
282 |
+
idx = user_index # env.get_player_id()
|
283 |
+
if game_selection != 'coup':
|
284 |
+
amy_obs = env.get_state(idx)['raw_obs']
|
285 |
+
else:
|
286 |
+
amy_obs = env.get_obs(text=True,p2_view = (idx==1))
|
287 |
+
#amy_obs['game_num'] = game_idx+1
|
288 |
+
if game_selection != 'coup':
|
289 |
+
amy_obs['rest_chips'] = chips[idx]
|
290 |
+
amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
|
291 |
+
|
292 |
+
valid_actions = env.get_state(idx)['raw_legal_actions']
|
293 |
+
else:
|
294 |
+
valid_actions = env.get_valid_actions(text=True)
|
295 |
+
|
296 |
+
game_state_string = ""
|
297 |
+
for key, value in amy_obs.items():
|
298 |
+
if key != 'legal_actions':
|
299 |
+
game_state_string += f"{key}: {value}\n"
|
300 |
+
|
301 |
+
dialogue_chatbot.append((inputs if inputs != "" else None, comm))
|
302 |
+
system_chatbot.append((f'My action: {action}', f'Suspicion-Agent action: {act}'))
|
303 |
+
system_chatbot.append((f'Game state:\n{game_state_string}', None))
|
304 |
+
|
305 |
+
|
306 |
+
if game_selection != 'coup':
|
307 |
+
if env.is_over():
|
308 |
+
pay_offs = env.get_payoffs()
|
309 |
+
for idx in range(len(pay_offs)):
|
310 |
+
pay_offs[idx] = pay_offs[idx]*2
|
311 |
+
chips[idx] += pay_offs[idx]
|
312 |
+
if pay_offs[user_index] > 0:
|
313 |
+
win_message = f'You win {pay_offs[user_index]} chips, Suspicion-Agent lose {pay_offs[user_index]} chips'
|
314 |
+
else:
|
315 |
+
win_message = f'Suspicion-Agent win {pay_offs[(user_index+1)%agents_num]} chips, you lose {pay_offs[(user_index+1)%agents_num]} chips'
|
316 |
+
|
317 |
+
idx = (user_index + 1)%agents_num
|
318 |
+
amy_obs = env.get_state(idx)['raw_obs']
|
319 |
+
bot_hand = amy_obs['hand']
|
320 |
+
system_chatbot.append((None, f'Suspicion-Agent hand: {bot_hand}'))
|
321 |
+
system_chatbot.append((f'Gameover.\n {win_message}', None))
|
322 |
+
|
323 |
+
valid_actions = []
|
324 |
+
else:
|
325 |
+
if env.game.game_over:
|
326 |
+
idx = (user_index + 1)%agents_num
|
327 |
+
amy_obs = env.get_obs(text=True,p2_view = (idx==1))
|
328 |
+
bot_hand = amy_obs
|
329 |
+
system_chatbot.append((None, f'Suspicion-Agent hand: {bot_hand}'))
|
330 |
+
system_chatbot.append((f'Gameover.\n {win_message}', None))
|
331 |
+
valid_actions = []
|
332 |
+
status_message += " Message received."
|
333 |
+
valid_actions = gr.Dropdown.update(choices=valid_actions, value=None)
|
334 |
+
return system_chatbot, dialogue_chatbot, valid_actions, history, 1, status_message
|
335 |
+
|
336 |
+
#Resetting to blank
|
337 |
+
def reset_textbox():
|
338 |
+
return gr.update(value='')
|
339 |
+
|
340 |
+
#to set a component as visible=False
|
341 |
+
def set_visible_false():
|
342 |
+
return gr.update(visible=False)
|
343 |
+
|
344 |
+
#to set a component as visible=True
|
345 |
+
def set_visible_true():
|
346 |
+
return gr.update(visible=True)
|
347 |
+
|
348 |
+
def update_instruction(game_selection):
|
349 |
+
if game_selection is not None and game_selection != '':
|
350 |
+
if game_selection == 'coup':
|
351 |
+
with open('./game_config/coup.json') as file:
|
352 |
+
contents = json.load(file)
|
353 |
+
elif game_selection == 'leduc-holdem':
|
354 |
+
with open('./game_config/leduc_limit.json') as file:
|
355 |
+
contents = json.load(file)
|
356 |
+
elif game_selection == 'limit-holdem':
|
357 |
+
with open('./game_config/limit_holdem.json') as file:
|
358 |
+
contents = json.load(file)
|
359 |
+
return f"Game rule: {contents['game_rule']}\n\n\nObservation Rule: {contents['observation_rule']}"
|
360 |
+
|
361 |
+
# update valid actions list
|
362 |
+
def set_valid_actions():
|
363 |
+
if game_selection != 'coup':
|
364 |
+
print(env.get_state(env.get_player_id())['raw_legal_actions'])
|
365 |
+
else:
|
366 |
+
print(env.get_valid_actions(text=True))
|
367 |
+
if env is None:
|
368 |
+
return gr.update(value='')
|
369 |
+
else:
|
370 |
+
if game_selection != 'coup':
|
371 |
+
valid_actions_list = env.get_state(env.get_player_id())['raw_legal_actions']
|
372 |
+
else:
|
373 |
+
valid_actions_list = env.get_valid_actions(text=True)
|
374 |
+
return gr.update(value=f'{valid_actions_list}')
|
375 |
+
|
376 |
+
title = """<h1 align="center">Suspicion-Agent Demo</h1>"""
|
377 |
+
#display message for themes feature
|
378 |
+
theme_addon_msg = """<center>This is an official Demo for <b>Suspicion-Agent: Playing Imperfect Information Games with Theory of Mind Aware GPT4</b>. Check out our paper for more details <a href="https://gradio.app/theming-guide/" target="_blank">here</a>!</center>
|
379 |
+
"""
|
380 |
+
|
381 |
+
#Using info to add additional information about System message in GPT4
|
382 |
+
system_msg_info = """A conversation could begin with a system message to gently instruct the assistant.
|
383 |
+
System message helps set the behavior of the AI Assistant. For example, the assistant could be instructed with 'You are a helpful assistant.'"""
|
384 |
+
|
385 |
+
#Modifying existing Gradio Theme
|
386 |
+
theme = gr.themes.Soft(primary_hue="zinc", secondary_hue="blue", neutral_hue="blue",
|
387 |
+
text_size=gr.themes.sizes.text_lg)
|
388 |
+
|
389 |
+
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""",
|
390 |
+
theme=theme) as demo:
|
391 |
+
gr.HTML(title)
|
392 |
+
gr.HTML("""<h3 align="center">🔥This Huggingface Gradio Demo provides a variety of game matches against a GPT4 agent. Please note that you would be needing an OPENAI API key for GPT4 access</h1>""")
|
393 |
+
gr.HTML(theme_addon_msg)
|
394 |
+
gr.HTML('''<center><a href="https://huggingface.co/spaces/paulyoo/suspicion_agent_demo?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
|
395 |
+
|
396 |
+
initial_state = {
|
397 |
+
'verified': False,
|
398 |
+
'settings': None,
|
399 |
+
'env': None,
|
400 |
+
'ctx': None,
|
401 |
+
|
402 |
+
'bot_long_memory': [],
|
403 |
+
'bot_short_memory': [],
|
404 |
+
|
405 |
+
'agents_num': 2,
|
406 |
+
'chips': [50,50],
|
407 |
+
'user_index': 1,
|
408 |
+
'game_idx': 0,
|
409 |
+
'mode': 'first_tom',
|
410 |
+
}
|
411 |
+
|
412 |
+
with gr.Column(elem_id = "col_container"):
|
413 |
+
#Users need to provide their own GPT4 API key, it is no longer provided by Huggingface
|
414 |
+
with gr.Row():
|
415 |
+
with gr.Column():
|
416 |
+
openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", value="", type="password", placeholder="sk..", info = "You have to provide your own GPT4 keys for this app to function properly",)
|
417 |
+
with gr.Column():
|
418 |
+
game_selection = gr.Dropdown(
|
419 |
+
["leduc-holdem", "limit-holdem","coup"], label="Game Selections", info="Select the game to play from the dropdown"
|
420 |
+
)
|
421 |
+
#with gr.Accordion(label="System message:", open=False):
|
422 |
+
# system_msg = gr.Textbox(label="Instruct the AI Assistant to set its beaviour", info = system_msg_info, value="",placeholder="Type here..")
|
423 |
+
# accordion_msg = gr.HTML(value="🚧 To set System message you will have to refresh the app", visible=False)
|
424 |
+
|
425 |
+
with gr.Row():
|
426 |
+
instruction_panel = gr.Textbox(label='Game Instructions')
|
427 |
+
|
428 |
+
with gr.Row():
|
429 |
+
with gr.Column():
|
430 |
+
system_chatbot = gr.Chatbot(label='Game Status', elem_id="system_chatbot")
|
431 |
+
with gr.Column():
|
432 |
+
dialogue_chatbot = gr.Chatbot(label='Dialogue with GPT4', elem_id="dialogue_chatbot")
|
433 |
+
|
434 |
+
#chatbot = gr.Chatbot(label='GPT4', elem_id="chatbot")
|
435 |
+
#action = gr.Radio(['call', 'raise', 'fold'], label="Actions", info="Select the action to play")
|
436 |
+
action = gr.Dropdown(placeholder="", label="Select an action.", info="")
|
437 |
+
inputs = gr.Textbox(placeholder="", label="Type a message for the opponent. Messages are optional.")
|
438 |
+
state = gr.State(initial_state)
|
439 |
+
with gr.Row():
|
440 |
+
with gr.Column(scale=7):
|
441 |
+
b1 = gr.Button().style(full_width=True)
|
442 |
+
with gr.Column(scale=3):
|
443 |
+
server_status_code = gr.Textbox(label="Status code from OpenAI server", )
|
444 |
+
|
445 |
+
#top_p, temperature
|
446 |
+
with gr.Accordion("Parameters", open=False):
|
447 |
+
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
|
448 |
+
temperature = gr.Slider(minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
|
449 |
+
chat_counter = gr.Number(value=0, visible=False, precision=0)
|
450 |
+
|
451 |
+
#Event handling
|
452 |
+
inputs.submit(predict, [openai_gpt4_key, game_selection, action, inputs, top_p, temperature, chat_counter, dialogue_chatbot, system_chatbot, state], [system_chatbot, dialogue_chatbot, action, state, chat_counter, server_status_code],) #openai_api_key
|
453 |
+
b1.click(predict, [openai_gpt4_key, game_selection, action, inputs, top_p, temperature, chat_counter, dialogue_chatbot, system_chatbot, state], [system_chatbot, dialogue_chatbot, action, state, chat_counter, server_status_code],) #openai_api_key
|
454 |
+
|
455 |
+
#inputs.submit(set_visible_false, [], [system_msg])
|
456 |
+
#b1.click(set_visible_false, [], [system_msg])
|
457 |
+
#inputs.submit(set_visible_true, [], [accordion_msg])
|
458 |
+
#b1.click(set_visible_true, [], [accordion_msg])
|
459 |
+
|
460 |
+
game_selection.select(update_instruction, [game_selection], [instruction_panel])
|
461 |
+
|
462 |
+
b1.click(reset_textbox, [], [inputs])
|
463 |
+
inputs.submit(reset_textbox, [], [inputs])
|
464 |
+
|
465 |
+
#b1.click(set_valid_actions, [], [valid_actions])
|
466 |
+
#inputs.submit(set_valid_actions, [], [valid_actions])
|
467 |
+
|
468 |
+
demo.queue(max_size=99, concurrency_count=20).launch(debug=True)
|
context.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
from rich.console import Console
|
4 |
+
from rich.prompt import Prompt
|
5 |
+
|
6 |
+
from setting import Settings
|
7 |
+
from agent import SuspicionAgent
|
8 |
+
|
9 |
+
|
10 |
+
class Context:
|
11 |
+
def __init__(self, console: Console, settings: Settings, webcontext=None) -> None:
|
12 |
+
self.clock: int = 0
|
13 |
+
self.console: Console = console
|
14 |
+
self.agents: List[SuspicionAgent] = []
|
15 |
+
self.user_agent: SuspicionAgent = None
|
16 |
+
self.robot_agents: List[SuspicionAgent] = []
|
17 |
+
self.observations = ["Beginning of the day, people are living their lives."]
|
18 |
+
self.timewindow_size = 3
|
19 |
+
self.observations_size_history = []
|
20 |
+
self.settings = settings
|
21 |
+
self.webcontext = webcontext
|
22 |
+
|
23 |
+
def print(self, message: str, style: str = None):
|
24 |
+
if style:
|
25 |
+
self.console.print(message, style=style)
|
26 |
+
else:
|
27 |
+
self.console.print(message)
|
28 |
+
|
29 |
+
if self.webcontext:
|
30 |
+
self.webcontext.send_response(message)
|
31 |
+
|
32 |
+
def ask(self, message: str = "", choices: List[str] = None) -> str:
|
33 |
+
if self.webcontext:
|
34 |
+
return self.webcontext.ask_human(message, choices)
|
35 |
+
else:
|
36 |
+
if choices:
|
37 |
+
return Prompt.ask(message, choices=choices, default=choices[0])
|
38 |
+
else:
|
39 |
+
return Prompt.ask(message)
|
game_config/coup.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "Coup",
|
3 |
+
"game_rule":
|
4 |
+
"In the coup game, you play the role of the head of a family in an Italian city-state, a city run by a weak and corrupt court. You need to manipulate, bluff and bribe your way to power. Your object is to destroy the influence of all the other families, forcing them into exile. Only one family will survive...In Coup, you want to be the last player with influence in the game, with influence being represented by face-down character cards in your playing area. Each player starts the game with two coins and two influence – i.e., two face-down character cards; the fifteen card deck consists of three copies of five different characters, each with a unique set of powers: Duke: tax: Take three coins from the treasury. block_foreignaid: Block someone from taking foreign aid. Assassin: Assassination: Pay three coins and try to assassinate another player's character Contessa: block_ass: Block an assassination attempt against yourself. Captain: steal: Take two coins from another player, or block_steal: block someone from stealing coins from you. Ambassador: exchange: Draw two character cards from the Court (the deck), choose which (if any) to exchange with your face-down characters, then return two, block_steal: Block someone from stealing coins from you. Note that, the total number of each role is only 3. On your turn, you can take any of the actions listed above, regardless of which characters you actually have in front of you, or you can take one of three other actions: income: Take one coin from the treasury. foreign aid: Take two coins from the treasury. coup: Pay 7 coins and launch a coup against an opponent, forcing that player to lose an influence. (If you have 10 coins or more, you must take this action.) \n Single Win/Loss Rule: When you take one of the character actions – whether actively on your turn, or defensively in response to someone else's action – that character's action automatically succeeds unless an opponent challenges you. In this case, if you can't (or don't) reveal the appropriate character, you lose an influence, turning one of your characters face-up. Face-up characters cannot be used, and if both of your characters are face-up, you're out of the game. If you do have the character in question and choose to reveal it, the opponent loses an influence, then you shuffle that character into the deck and draw a new one, perhaps getting the same character again and perhaps not. The last player to still have influence – that is, a face-down character – wins the game! Win/Loss Payoff Rule: If you let one character of the opponent die or face-up, you will win one chip. If one of your character is face-up or die, you will lose one chip.",
|
5 |
+
"observation_rule": " You will receive a observation with 21 elements in the list, where the first 4 elements are the role cards of the yours where none denotes the role is hidden but alive or there is no card, The elements in index 9-12 denote whether corresponding cards index 1-4 of yours are dead or not, 0 is alive but hidden, 1 denotes the corresponding card is dead, and none is no this card. Elements index with 5-8 are the role cards of the other player where none denotes the role is alive but hidden or there is no card (depends on following 0/1 numbers), if they have name, that is the name of a dead card. The elements in index 13-16 denote whether corresponding cards in index 5-8 of the other player are dead or not, 0 is alive but hidden, 1 denotes the corresponding card is dead, and none is no this card. The 17th elements denote the coins the you have. The 18th elements denote the coins the the other player 1. The 19th elements denote the last action the you did.The 20th elements denote the last action the pther player did. Please directly tell me what card I and my opponent have and available to use (Please specify which role card is dead and cannot be used anymore). \n"
|
6 |
+
|
7 |
+
}
|
game_config/leduc_limit.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "Leduc Hold'em Poker Limit",
|
3 |
+
"game_rule":
|
4 |
+
"Leduc Hold'em is first introduced in Bayes' Bluff: Opponent Modeling in Poker. Note that the deck consists only two cards of King, Queen and Jack, six cards in total. Each game is fixed with two players, only two rounds. Note that only one public hand is available, two-bet maximum. Raise action: In the first round, you will put chips with amounts of 4 more than chips of your opponent, in the following round, you will put chips with amounts of 8 more than chips of your opponent. Call action: you will put chips with same amounts as your opponent (if his chips in pot are higher than yours, normally 4 ). In the first round, one player is randomly choosed to put 1 unit in pot as small blind while the other puts 2 unit as big blind, and each player is dealt one card, then starts betting. The player with small blind acts first. In the second round, one public card is revealed first, then the players bet again. (Only one public card is available in the whole game.) \n Single Game Win/Draw/Lose Rule: the player whose hand has the same rank as the public card is the winner of one round game. If neither, then the one with higher rank wins this round game, if the rank of cards of two players are the same, it is draw. you can also 'fold' in one round game. \n Whole Game Win/Draw/Lose Rule: you are requested to attend 100 games with your opponent, you both are given 100 chips originally, and the guy who have more chips will win the game after 100 games (i.e. You aim to have more chips than your original chips). \n Winning Payoff Rule: The half of the total pot. \n Lose Payoff Rule: The half of the total pot. ",
|
5 |
+
"observation_rule": "The observation is a dictionary. The main observation space: `'raw_legal_actions'` which holds the legal moves, described in the Legal Actions Mask section. 'hand' is the hole hand, where SJ denotes the J of Spades, HK denotes King of Hearts. game_num is the idx of game now, the total number is 100. all_chips is list which contains the number of chips you and the opponent has been put in the pot, respectively, where the first element of 'all_chips' is the number of chips you have put in the pot, the second element of 'all_chips' is the number of chips the opponent have put in the pot. rest_chips is how much chips you have now, opponent_rest_chips how much chips your opponent have now. Based on your hole hand and public hand (only one), please analysis what is your best combination now and potential combinations in future with probability (number) reasonably and accurately step by step."
|
6 |
+
}
|
game_config/limit_holdem.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "Texas Hold'em Poker Limit",
|
3 |
+
"game_rule":
|
4 |
+
"General Rule: There are the basic rules for Texas Hold'em Poker limit with a limit betting structure:\n\nIn this game, you are requested to attend 40 games with your opponent, you both are given 20 chips originally, and the guy who have more chips will win the game after 40 games, note that if your chips become 0 or negatives, you will loss the whole game. The goal of you is to obtain more chips than your opponent. The Deck: Texas Hold'em is played with a standard deck of 52 cards, where there are 13 number cards (A,1 to K) with 4 colors.\n\nThe Blinds: The game begins with two players to the left of the dealer posting a small blind and a big blind respectively. The small blind is usually half of the big blind.\n\nDealing the Cards: Each player is then dealt two private cards known as 'hole cards' that belong to them alone.\n\nFirst Round of Betting: Starting with the player to the left of the big blind, each player can call the big blind, raise, or fold. In limit hold'em, the amount that a player can raise in the first two rounds of betting (pre-flop and flop) is set at the lower limit of the stakes structure. For instance, in a $4/$8 limit hold'em game, players can bet and raise in increments of $8 during the pre-flop and flop rounds.\n\nThe Flop: After the first round of betting, the dealer discards the top card of the deck (a move called 'burning' the card) and then deals three face-up community cards known as the flop.\n\nSecond Round of Betting: Another round of betting ensues, starting with the player to the left of the dealer. The same betting rules apply as the first round.\n\nThe Turn: The dealer burns another card and then adds a fourth face-up card to the community cards. This fourth card is known as the turn.\n\nThird Round of Betting: Another round of betting starts. In limit hold'em, the betting limit now doubles. So in our $4/$8 game example, players can now bet and raise in increments of $8.\n\nThe River: The dealer burns another card before placing a fifth and final community card face-up on the table. This card is known as the river.\n\nFinal Round of Betting: The final round of betting starts with the same rules as the third round.\n\nThe Showdown: If two or more players remain after the final round of betting, a showdown occurs. This is where players reveal their hands, and the player with the best hand wins the pot. \n Single Game Win/Draw/Lose Rule: Texas Hold'em hands are ranked from highest to lowest as follows: Royal Flush, Straight Flush, Four of a Kind, Full House, Flush, Straight, Three of a Kind, Two Pair, One Pair, High Card.\n\nNote: If at any point a player makes a bet or a raise that no other player calls, they win the pot without revealing their hand.\n\nRotating the Dealer: After the pot is awarded, the dealer button moves one seat to the left, and the process begins anew.\nThe main difference between Limit Hold'em and other betting structures like No Limit or Pot Limit is the amount a player can bet or raise. In Limit, the amount is capped at a set amount per round. \n Whole Game Win/Draw/Lose Rule: you are requested to attend 10 games with your opponent, you both are given 50 chips originally, and the guy who have more chips will win the game after 10 games (i.e. You aim to have more chips than your original chips). \n Winning Payoff Rule: The half of the pot. \n Lose Payoff Rule: The half of the pot. ",
|
5 |
+
"observation_rule": "The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an `'raw_legal_actions'` which holds the legal moves, described in the Legal Actions Mask section. The main observation space is similar to Texas Hold'em. 'hand' is the hole hand, where H3 denotes the 3 of diamonds, ST denotes 10 of Spades. raise_nums is a list with 4 numbers which represents the how many times you have raised in 4 rounds. Specifically, you have no more than 4 chances to raise in each round. action_record denotes the action history of the current game. The first dimension is the agent number, the second dimension is the action type. game_num is the idx of game now, the total number is 40. rest_chips is how much chips you have now, opponent_rest_chips how much chips your opponent have now. Based on your hole hand and public hands, please analysis what is your best combination now and potential combinations in future with probability (number 100% total) reasonably step by step."
|
6 |
+
}
|
model.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Type
|
2 |
+
|
3 |
+
from langchain import chat_models, embeddings, llms
|
4 |
+
from langchain.embeddings.base import Embeddings
|
5 |
+
from langchain.llms.base import BaseLanguageModel
|
6 |
+
|
7 |
+
from setting import EmbeddingSettings, LLMSettings
|
8 |
+
from context import Context
|
9 |
+
from setting import Settings
|
10 |
+
from rich.console import Console
|
11 |
+
from agent import SuspicionAgent
|
12 |
+
|
13 |
+
def agi_init(
|
14 |
+
agent_configs: List[dict],
|
15 |
+
game_config:dict,
|
16 |
+
console: Console,
|
17 |
+
settings: Settings,
|
18 |
+
user_idx: int = 0,
|
19 |
+
webcontext=None,
|
20 |
+
) -> Context:
|
21 |
+
ctx = Context(console, settings, webcontext)
|
22 |
+
ctx.print("Creating all agents one by one...", style="yellow")
|
23 |
+
for idx, agent_config in enumerate(agent_configs):
|
24 |
+
agent_name = agent_config["name"]
|
25 |
+
with ctx.console.status(f"[yellow]Creating agent {agent_name}..."):
|
26 |
+
agent = SuspicionAgent(
|
27 |
+
name=agent_config["name"],
|
28 |
+
age=agent_config["age"],
|
29 |
+
rule=game_config["game_rule"],
|
30 |
+
game_name=game_config["name"],
|
31 |
+
observation_rule=game_config["observation_rule"],
|
32 |
+
status="N/A",
|
33 |
+
llm=load_llm_from_config(ctx.settings.model.llm),
|
34 |
+
|
35 |
+
reflection_threshold=8,
|
36 |
+
)
|
37 |
+
for memory in agent_config["memories"]:
|
38 |
+
agent.add_memory(memory)
|
39 |
+
ctx.robot_agents.append(agent)
|
40 |
+
ctx.agents.append(agent)
|
41 |
+
|
42 |
+
ctx.print(f"Agent {agent_name} successfully created", style="green")
|
43 |
+
|
44 |
+
ctx.print("Suspicion Agent started...")
|
45 |
+
|
46 |
+
return ctx
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
# ------------------------- LLM/Chat models registry ------------------------- #
|
51 |
+
llm_type_to_cls_dict: Dict[str, Type[BaseLanguageModel]] = {
|
52 |
+
"chatopenai": chat_models.ChatOpenAI,
|
53 |
+
"openai": llms.OpenAI,
|
54 |
+
}
|
55 |
+
|
56 |
+
# ------------------------- Embedding models registry ------------------------ #
|
57 |
+
embedding_type_to_cls_dict: Dict[str, Type[Embeddings]] = {
|
58 |
+
"openaiembeddings": embeddings.OpenAIEmbeddings
|
59 |
+
}
|
60 |
+
|
61 |
+
|
62 |
+
# ---------------------------------------------------------------------------- #
|
63 |
+
# LLM/Chat models #
|
64 |
+
# ---------------------------------------------------------------------------- #
|
65 |
+
def load_llm_from_config(config: LLMSettings) -> BaseLanguageModel:
|
66 |
+
"""Load LLM from Config."""
|
67 |
+
config_dict = config.dict()
|
68 |
+
config_type = config_dict.pop("type")
|
69 |
+
|
70 |
+
if config_type not in llm_type_to_cls_dict:
|
71 |
+
raise ValueError(f"Loading {config_type} LLM not supported")
|
72 |
+
|
73 |
+
cls = llm_type_to_cls_dict[config_type]
|
74 |
+
return cls(**config_dict)
|
75 |
+
|
76 |
+
|
77 |
+
def get_all_llms() -> List[str]:
|
78 |
+
"""Get all supported LLMs"""
|
79 |
+
return list(llm_type_to_cls_dict.keys())
|
80 |
+
|
81 |
+
|
82 |
+
# ---------------------------------------------------------------------------- #
|
83 |
+
# Embeddings models #
|
84 |
+
# ---------------------------------------------------------------------------- #
|
85 |
+
def load_embedding_from_config(config: EmbeddingSettings) -> Embeddings:
|
86 |
+
"""Load Embedding from Config."""
|
87 |
+
config_dict = config.dict()
|
88 |
+
config_type = config_dict.pop("type")
|
89 |
+
print(config)
|
90 |
+
if config_type not in embedding_type_to_cls_dict:
|
91 |
+
raise ValueError(f"Loading {config_type} Embedding not supported")
|
92 |
+
|
93 |
+
cls = embedding_type_to_cls_dict[config_type]
|
94 |
+
return cls(**config_dict)
|
95 |
+
|
96 |
+
|
97 |
+
def get_all_embeddings() -> List[str]:
|
98 |
+
"""Get all supported Embeddings"""
|
99 |
+
return list(embedding_type_to_cls_dict.keys())
|
person_config/GoodGuy.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "GoodGuy",
|
3 |
+
"age": 27,
|
4 |
+
"personality": "flexible",
|
5 |
+
"memories":[]
|
6 |
+
}
|
person_config/Persuader.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "board_game_expert",
|
3 |
+
"age": 27,
|
4 |
+
"personality": "flexible",
|
5 |
+
"memories":[]
|
6 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
rich
|
2 |
+
textual
|
3 |
+
typer
|
4 |
+
termcolor
|
5 |
+
tiktoken
|
6 |
+
faiss-cpu
|
7 |
+
openai
|
8 |
+
pinecone-client
|
9 |
+
langchain
|
10 |
+
discord.py
|
11 |
+
inquirer
|
12 |
+
rlcard[torch]
|
13 |
+
python-json-logger
|
14 |
+
gitpython
|
15 |
+
gym
|
16 |
+
git+https://github.com/CR-Gjx/gym-coup.git
|
retriever.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
from langchain import FAISS
|
4 |
+
from langchain.retrievers import TimeWeightedVectorStoreRetriever
|
5 |
+
from langchain.embeddings.base import Embeddings
|
6 |
+
import faiss
|
7 |
+
from context import Context
|
8 |
+
from model import load_embedding_from_config
|
9 |
+
import math
|
10 |
+
from langchain.docstore import InMemoryDocstore
|
11 |
+
|
12 |
+
# reference:
|
13 |
+
# https://python.langchain.com/en/latest/use_cases/agent_simulations/characters.html#create-a-generative-character
|
14 |
+
def relevance_score_fn(score: float) -> float:
|
15 |
+
"""Return a similarity score on a scale [0, 1]."""
|
16 |
+
# This will differ depending on a few things:
|
17 |
+
# - the distance / similarity metric used by the VectorStore
|
18 |
+
# - the scale of your embeddings (OpenAI's are unit norm. Many others are not!)
|
19 |
+
# This function converts the euclidean norm of normalized embeddings
|
20 |
+
# (0 is most similar, sqrt(2) most dissimilar)
|
21 |
+
# to a similarity function (0 to 1)
|
22 |
+
return 1.0 - score / math.sqrt(2)
|
23 |
+
|
24 |
+
|
25 |
+
# reference:
|
26 |
+
# https://python.langchain.com/en/latest/use_cases/agent_simulations/characters.html#create-a-generative-character
|
27 |
+
def create_new_memory_retriever(ctx: Context):
|
28 |
+
"""Create a new vector store retriever unique to the agent."""
|
29 |
+
# Define your embedding model
|
30 |
+
embeddings_model = load_embedding_from_config(ctx.settings.model.embedding)
|
31 |
+
# Initialize the vectorstore as empty
|
32 |
+
embedding_size = 1536
|
33 |
+
index = faiss.IndexFlatL2(embedding_size)
|
34 |
+
vectorstore = FAISS(
|
35 |
+
embeddings_model.embed_query,
|
36 |
+
index,
|
37 |
+
InMemoryDocstore({}),
|
38 |
+
{},
|
39 |
+
relevance_score_fn=relevance_score_fn,
|
40 |
+
)
|
41 |
+
return TimeWeightedVectorStoreRetriever(
|
42 |
+
vectorstore=vectorstore, other_score_keys=["importance"], k=15
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
class Retriever(TimeWeightedVectorStoreRetriever):
|
47 |
+
embedding_model: Embeddings
|
48 |
+
|
49 |
+
def faiss_path(self, path) -> str:
|
50 |
+
return path + "/faiss"
|
51 |
+
|
52 |
+
def mem_path(self, path) -> str:
|
53 |
+
return path + "/memory.pickle"
|
54 |
+
|
55 |
+
def try_load_memory(self, path: str) -> bool:
|
56 |
+
if not os.path.isdir(path):
|
57 |
+
return False
|
58 |
+
|
59 |
+
faiss_path = self.faiss_path(path)
|
60 |
+
faiss: FAISS = self.vectorstore
|
61 |
+
faiss.load_local(faiss_path, self.embedding_model)
|
62 |
+
|
63 |
+
mem_path = self.mem_path(path)
|
64 |
+
with open(mem_path, "rb") as mem_file:
|
65 |
+
self.memory_stream = pickle.load(mem_file)
|
66 |
+
|
67 |
+
return True
|
68 |
+
|
69 |
+
def dump_memory(self, path: str) -> bool:
|
70 |
+
faiss: FAISS = self.vectorstore
|
71 |
+
faiss.save_local(self.faiss_path(path))
|
72 |
+
with open(self.mem_path(path), "wb") as mem_file:
|
73 |
+
pickle.dump(self.memory_stream, mem_file)
|
setting.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from typing import Any, Dict, List, Type
|
3 |
+
|
4 |
+
from pydantic import BaseModel, BaseSettings, Extra
|
5 |
+
import os
|
6 |
+
|
7 |
+
def json_config_settings_source(settings: BaseSettings) -> Dict[str, Any]:
|
8 |
+
from util import load_json
|
9 |
+
|
10 |
+
# Load settings from JSON config file
|
11 |
+
config_dir = Path(os.getcwd(), ".suspicionagent")
|
12 |
+
config_file = Path(config_dir, "config.json")
|
13 |
+
if not config_dir.exists() or not config_file.exists():
|
14 |
+
print("[Error] Please config suspicionagent")
|
15 |
+
import sys
|
16 |
+
|
17 |
+
sys.exit(-1)
|
18 |
+
return load_json(config_file)
|
19 |
+
|
20 |
+
|
21 |
+
class LLMSettings(BaseModel):
|
22 |
+
"""
|
23 |
+
LLM/ChatModel related settings
|
24 |
+
"""
|
25 |
+
|
26 |
+
type: str = "chatopenai"
|
27 |
+
|
28 |
+
class Config:
|
29 |
+
extra = Extra.allow
|
30 |
+
|
31 |
+
|
32 |
+
class EmbeddingSettings(BaseModel):
|
33 |
+
"""
|
34 |
+
Embedding related settings
|
35 |
+
"""
|
36 |
+
|
37 |
+
type: str = "openaiembeddings"
|
38 |
+
|
39 |
+
class Config:
|
40 |
+
extra = Extra.allow
|
41 |
+
|
42 |
+
|
43 |
+
class ModelSettings(BaseModel):
|
44 |
+
"""
|
45 |
+
Model related settings
|
46 |
+
"""
|
47 |
+
|
48 |
+
type: str = ""
|
49 |
+
llm: LLMSettings = LLMSettings()
|
50 |
+
embedding: EmbeddingSettings = EmbeddingSettings()
|
51 |
+
|
52 |
+
class Config:
|
53 |
+
extra = Extra.allow
|
54 |
+
|
55 |
+
|
56 |
+
class Settings(BaseSettings):
|
57 |
+
"""
|
58 |
+
Root settings
|
59 |
+
"""
|
60 |
+
|
61 |
+
name: str = "default"
|
62 |
+
model: ModelSettings = ModelSettings()
|
63 |
+
|
64 |
+
class Config:
|
65 |
+
env_prefix = "skyagi_"
|
66 |
+
env_file_encoding = "utf-8"
|
67 |
+
extra = Extra.allow
|
68 |
+
|
69 |
+
@classmethod
|
70 |
+
def customise_sources(
|
71 |
+
cls,
|
72 |
+
init_settings,
|
73 |
+
env_settings,
|
74 |
+
file_secret_settings,
|
75 |
+
):
|
76 |
+
return (
|
77 |
+
init_settings,
|
78 |
+
#json_config_settings_source,
|
79 |
+
env_settings,
|
80 |
+
file_secret_settings,
|
81 |
+
)
|
82 |
+
|
83 |
+
|
84 |
+
# ---------------------------------------------------------------------------- #
|
85 |
+
# Preset configurations #
|
86 |
+
# ---------------------------------------------------------------------------- #
|
87 |
+
class OpenAIGPT4Settings(ModelSettings):
|
88 |
+
# NOTE: GPT4 is in waitlist
|
89 |
+
type = "openai-gpt-4-0613"
|
90 |
+
llm = LLMSettings(type="chatopenai", model="gpt-4-0613", max_tokens=3000,temperature=0.1, request_timeout=120)
|
91 |
+
embedding = EmbeddingSettings(type="openaiembeddings")
|
92 |
+
|
93 |
+
class OpenAIGPT432kSettings(ModelSettings):
|
94 |
+
# NOTE: GPT4 is in waitlist
|
95 |
+
type = "openai-gpt-4-32k-0613"
|
96 |
+
llm = LLMSettings(type="chatopenai", model="gpt-4-32k-0613", max_tokens=2500)
|
97 |
+
embedding = EmbeddingSettings(type="openaiembeddings")
|
98 |
+
|
99 |
+
|
100 |
+
class OpenAIGPT3_5TurboSettings(ModelSettings):
|
101 |
+
type = "openai-gpt-3.5-turbo"
|
102 |
+
llm = LLMSettings(type="chatopenai", model="gpt-3.5-turbo-16k-0613", max_tokens=2500)
|
103 |
+
embedding = EmbeddingSettings(type="openaiembeddings")
|
104 |
+
|
105 |
+
|
106 |
+
class OpenAIGPT3_5TextDavinci003Settings(ModelSettings):
|
107 |
+
type = "openai-gpt-3.5-text-davinci-003"
|
108 |
+
llm = LLMSettings(type="openai", model_name="text-davinci-003", max_tokens=2500)
|
109 |
+
embedding = EmbeddingSettings(type="openaiembeddings")
|
110 |
+
|
111 |
+
# class Llama2_70b_Settings(ModelSettings):
|
112 |
+
# from transformers import LlamaForCausalLM, LlamaTokenizer
|
113 |
+
# type = "llama2-70b"
|
114 |
+
# tokenizer = LlamaTokenizer.from_pretrained("/groups/gcb50389/pretrained/llama2-HF/Llama-2-70b-hf")
|
115 |
+
# llm = LlamaForCausalLM.from_pretrained("/groups/gcb50389/pretrained/llama2-HF/Llama-2-70b-hf")
|
116 |
+
# embedding = EmbeddingSettings(type="openaiembeddings")
|
117 |
+
|
118 |
+
|
119 |
+
# ------------------------- Model settings registry ------------------------ #
|
120 |
+
model_setting_type_to_cls_dict: Dict[str, Type[ModelSettings]] = {
|
121 |
+
"openai-gpt-4-0613": OpenAIGPT4Settings,
|
122 |
+
"openai-gpt-4-32k-0613": OpenAIGPT432kSettings,
|
123 |
+
"openai-gpt-3.5-turbo": OpenAIGPT3_5TurboSettings,
|
124 |
+
"openai-gpt-3.5-text-davinci-003": OpenAIGPT3_5TextDavinci003Settings,
|
125 |
+
# "llama2-70b":Llama2_70b_Settings
|
126 |
+
}
|
127 |
+
|
128 |
+
|
129 |
+
def load_model_setting(type: str) -> ModelSettings:
|
130 |
+
if type not in model_setting_type_to_cls_dict:
|
131 |
+
raise ValueError(f"Loading {type} setting not supported")
|
132 |
+
|
133 |
+
cls = model_setting_type_to_cls_dict[type]
|
134 |
+
return cls()
|
135 |
+
|
136 |
+
|
137 |
+
def get_all_model_settings() -> List[str]:
|
138 |
+
"""Get all supported Embeddings"""
|
139 |
+
return list(model_setting_type_to_cls_dict.keys())
|
util.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Any, Dict
|
5 |
+
|
6 |
+
from model import load_embedding_from_config, load_llm_from_config
|
7 |
+
from setting import Settings
|
8 |
+
import logging
|
9 |
+
from pythonjsonlogger import jsonlogger
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
def verify_openai_token(token: str) -> str:
|
14 |
+
import openai
|
15 |
+
|
16 |
+
openai.api_key = token
|
17 |
+
try:
|
18 |
+
openai.Completion.create(
|
19 |
+
model="text-ada-001",
|
20 |
+
prompt="Hello",
|
21 |
+
temperature=0,
|
22 |
+
max_tokens=10,
|
23 |
+
top_p=1,
|
24 |
+
frequency_penalty=0.5,
|
25 |
+
presence_penalty=0,
|
26 |
+
)
|
27 |
+
return "OK"
|
28 |
+
except Exception as e:
|
29 |
+
return str(e)
|
30 |
+
|
31 |
+
def get_logging(logger_name,content=''):
|
32 |
+
logger = logging.getLogger(logger_name)
|
33 |
+
if not logger.handlers:
|
34 |
+
logger.setLevel(logging.DEBUG)
|
35 |
+
logHandlerJson = logging.FileHandler('./memory_data/'+logger_name+'.json')
|
36 |
+
formatter = jsonlogger.JsonFormatter()
|
37 |
+
logHandlerJson.setFormatter(formatter)
|
38 |
+
|
39 |
+
# handler = logging.FileHandler('./memory_data/'+logger_name+'.txt')
|
40 |
+
# handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
41 |
+
logger.addHandler(logHandlerJson)
|
42 |
+
logger.info(content)
|
43 |
+
|
44 |
+
|
45 |
+
def verify_model_initialization(settings: Settings) -> str:
|
46 |
+
try:
|
47 |
+
load_llm_from_config(settings.model.llm)
|
48 |
+
except Exception as e:
|
49 |
+
return f"LLM initialization check failed: {e}"
|
50 |
+
|
51 |
+
try:
|
52 |
+
load_embedding_from_config(settings.model.embedding)
|
53 |
+
except Exception as e:
|
54 |
+
return f"Embedding initialization check failed: {e}"
|
55 |
+
|
56 |
+
return "OK"
|
57 |
+
|
58 |
+
|
59 |
+
def verify_pinecone_token(token: str) -> str:
|
60 |
+
return "OK"
|
61 |
+
|
62 |
+
|
63 |
+
def verify_discord_token(token: str) -> str:
|
64 |
+
return "OK"
|
65 |
+
|
66 |
+
|
67 |
+
def load_json_value(filepath: Path, key: str, default_value: Any) -> Any:
|
68 |
+
if not Path(filepath).exists():
|
69 |
+
return default_value
|
70 |
+
json_obj = load_json(filepath)
|
71 |
+
if key not in json_obj:
|
72 |
+
return default_value
|
73 |
+
return json_obj[key]
|
74 |
+
|
75 |
+
|
76 |
+
def set_json_value(filepath: Path, key: str, value: Any) -> None:
|
77 |
+
# key needs to follow python naming convention, such as trial_id
|
78 |
+
json_obj = load_json(filepath)
|
79 |
+
json_obj[key] = value
|
80 |
+
with open(filepath, "w+") as json_file:
|
81 |
+
json.dump(json_obj, json_file, sort_keys=True)
|
82 |
+
json_file.flush()
|
83 |
+
|
84 |
+
|
85 |
+
def load_json(filepath: Path) -> Dict:
|
86 |
+
if not Path(filepath).exists():
|
87 |
+
return {}
|
88 |
+
with open(filepath, "r") as file:
|
89 |
+
try:
|
90 |
+
json_obj = json.load(file)
|
91 |
+
return json_obj
|
92 |
+
except json.JSONDecodeError as e:
|
93 |
+
if os.stat(filepath).st_size == 0:
|
94 |
+
# Empty file
|
95 |
+
return {}
|
96 |
+
else:
|
97 |
+
raise e
|
98 |
+
|
99 |
+
def load_log(file_name, key_name):
|
100 |
+
content_list = []
|
101 |
+
key_list = []
|
102 |
+
with open('./memory_data/'+file_name) as f:
|
103 |
+
contents = f.readlines()
|
104 |
+
for i in contents:
|
105 |
+
print(i)
|
106 |
+
contents = json.loads(i)
|
107 |
+
content_list.append(list(contents.values())[1][key_name])
|
108 |
+
key_list.append(list(contents.keys())[1])
|
109 |
+
return content_list, key_list
|
110 |
+
|
111 |
+
def load_log_full(file_name, key_name):
|
112 |
+
content_list = []
|
113 |
+
key_list = []
|
114 |
+
with open(file_name) as f:
|
115 |
+
contents = f.readlines()
|
116 |
+
for i in contents:
|
117 |
+
#print(i)
|
118 |
+
contents = json.loads(i)
|
119 |
+
if key_name is None:
|
120 |
+
content_list.append(list(contents.values())[1])
|
121 |
+
else:
|
122 |
+
content_list.append(list(contents.values())[1][key_name])
|
123 |
+
key_list.append(list(contents.keys())[1])
|
124 |
+
return content_list, key_list
|
125 |
+
|
126 |
+
def get_checkpoint_dir(agent_file: str) -> str:
|
127 |
+
return "./{}.cpt".format(os.path.basename(agent_file))
|