Jiaxian Guo commited on
Commit
28a47b6
1 Parent(s): 581bd24

Add application file

Browse files
.DS_Store ADDED
Binary file (10.2 kB). View file
 
agent.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reference: https://python.langchain.com/en/latest/use_cases/agent_simulations
2
+
3
+ import re
4
+ from datetime import datetime
5
+ from typing import List, Optional, Tuple
6
+
7
+ from langchain import LLMChain
8
+ from langchain.base_language import BaseLanguageModel
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.retrievers import TimeWeightedVectorStoreRetriever
11
+ from langchain.schema import Document
12
+ from pydantic import BaseModel, Field
13
+ from termcolor import colored
14
+ import util
15
+ import time
16
+
17
+ class SuspicionAgent(BaseModel):
18
+ """A character with memory and innate characteristics."""
19
+
20
+ name: str
21
+ game_name: str
22
+ age: int
23
+ observation_rule: str
24
+ """The traits of the character you wish not to change."""
25
+ status: str
26
+ """Current activities of the character."""
27
+ llm: BaseLanguageModel
28
+
29
+ """The retriever to fetch related memories."""
30
+ verbose: bool = False
31
+
32
+ reflection_threshold: Optional[float] = None
33
+ """When the total 'importance' of memories exceeds the above threshold, stop to reflect."""
34
+
35
+ current_plan: List[str] = []
36
+ belief: str = ""
37
+ pattern: str = ""
38
+ long_belief: str = ""
39
+ counter_belief: str = ""
40
+ plan: str = ""
41
+ high_plan: str = ""
42
+ """The current plan of the agent."""
43
+
44
+ memory: List = ['']
45
+ summary: str = "" #: :meta private:
46
+ summary_refresh_seconds: int = 3600 #: :meta private:
47
+ last_refreshed: datetime = Field(default_factory=datetime.now) #: :meta private:
48
+
49
+ memory_importance: float = 0.0 #: :meta private:
50
+ max_tokens_limit: int = 1200 #: :meta private:
51
+ read_observation: str = "" #: :meta private:
52
+
53
+ rule: str = "" #: :meta private:
54
+ class Config:
55
+ """Configuration for this pydantic object."""
56
+
57
+ arbitrary_types_allowed = True
58
+
59
+
60
+
61
+
62
+
63
+
64
+ def add_long_memory(self, memory_content: str) -> List[str]:
65
+ """Add an observation or memory to the agent's memory."""
66
+ self.memory.append(memory_content)
67
+ return self.memory
68
+
69
+
70
+
71
+
72
+ def planning_module(self, observation: str, recipient_name:str, previous_conversation: List[str] =None, belief: str =None, valid_action_list: List[str] = None, short_memory_summary:str = "",pattern:str = "",last_plan:str = "", mode: str = "second_tom") -> str:
73
+ """Make Plans and Evaluate Plans."""
74
+ """Combining these two modules together to save costs"""
75
+
76
+ if mode == 'second_tom':
77
+ prompt = PromptTemplate.from_template(
78
+ "You are the objective player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
79
+ + " The game rule is: {rule} \n"
80
+ +'{pattern}\n'
81
+ + " Your observation about the game status now is: {observation}\n"
82
+ +'{belief}\n'
83
+ + " Understanding all given information, can you do following things:"
84
+ + " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent. "
85
+ + " Potential {recipient_name}'s actions (if release) and Estimate Winning/Lose/Draw Rate for Each Plan: From the perspective of {recipient_name} , please infer what the action {recipient_name} with probability (normalize to number 100% in total) would do when {recipient_name} holds different cards and then calculate the winning/lose/draw rates when {recipient_name} holds different cards step by step. At last, please calculate the overall winning/lose/draw rates for each plan step by step considering {recipient_name}'s behaviour pattern. Output in a tree-structure: "
86
+ + "Output: Plan 1: If I execute plan1. "
87
+ "The winning/lose/draw rates when {recipient_name} holds card1: Based on {recipient_name}'s behaviour pattern, In the xx round, because {recipient_name} holds card1 (probability) and the combination with current public card (if release) (based on my belief on {recipient_name}), and if he sees my action, {recipient_name} will do action1 (probability) ( I actually hold card and the public card (if reveal) is , he holds card1 and the public card (if reveal), considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ), action2 (probability) (considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ),.. (normalize to number 100% in total); \n Overall (winning rate for his card1) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
88
+ "The winning/lose/draw rates when {recipient_name} holds card2: Based on {recipient_name}'s behaviour pattern, In the xx round, because {recipient_name} holds card2 (probability) and the combination with current public card (if release) (based on my belief on {recipient_name}) , and if he sees my action, he will do action1 (probability) (I actually hold card and the public card (if reveal) is , he holds card1 and the public card (if reveal), considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ).. action2 (probability) (normalize to number 100% in total) (considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ),.. ;..... continue ....\n Overall (winning rate for his card2) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
89
+ "...\n"
90
+ "Plan1 overall {initiator_name}'s Winning/Lose/Draw rates : the Winning rate (probability) for plan 1 is (winning rate for his card1) + (winning rate for his card2) + .. ; Lose rate (probability) for plan 1 : (lose rate for his card1) + (lose rate for his card2) + .. ; Draw Rate (probability) for plan 1 : (draw rate for his card1) + (draw rate for his card2) + ... ; (normalize to number 100% in total) for plan1 \n"
91
+ "Plan 2: If I execute plan2, The winning/lose/draw rates when {recipient_name} holds card1: Based on {recipient_name}'s behaviour pattern, In the xx round, if {recipient_name} holds card1 (probability) and the combination with current public card (if release), .. (format is similar with before ) ... continue .."
92
+ "Plan 3: .. Coninue ... "
93
+ + " The number of payoffs for each plan: Understanding your current observation, each new plans, please infer the number of wininng/lose payoffs for each plan step by step, Output: Plan1: After the action, All chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules step by step) : After the action, All chips in the pot: If lose , the lose payoff would be: (Calculated by Lose Payoff Rules step by step). Plan2: After the action, All chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules step by step): After the action, All chips in the pot: If lose , the lose chips would be: (Calculated by Lose Payoff Rules step by step). If the number of my chips in pots have no change, please directly output them. \n"
94
+ + " Estimate Expected Chips Gain for Each Plan: Understanding all the information and Estimate Winning/Lose/Draw Rate for Each Plan, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule) step by step"
95
+ + " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n "
96
+ )
97
+
98
+ elif mode == 'first_tom':
99
+ prompt = PromptTemplate.from_template(
100
+ "You are the player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
101
+ + " The game rule is: {rule} \n"
102
+ + " {pattern} \n"
103
+ + " Your observation about the game status now is: {observation}\n"
104
+ + ' {belief}\n'
105
+ + " Understanding all given information, can you do following things:"
106
+ + " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent."
107
+ + " Potential {recipient_name}'s actions and Estimate Winning/Lose/Draw Rate: From the perspective of {recipient_name}, please infer what the action {recipient_name} with probability (normalize to number 100% in total) would do when {recipient_name} holds different cards, and then calculate the winning/lose/draw rates when {recipient_name} holds different cards step by step. Output in a tree-structure: "
108
+ + "Output: Based on {recipient_name}'s behaviour pattern and Analysis on {recipient_name}'s cards, "
109
+ "Winning/lose/draw rates when {recipient_name} holds card1 in the xx round,: if {recipient_name} holds card1 (probability) (based on my belief on {recipient_name}) with the public card (if release), {recipient_name} will do action1 (probability) (infer I will win/draw/lose step by step (considering Single Game Win/Draw/Lose Rule and my factual card analysis with public card (if release), his card analysis with public card (if release) step by step ), action2 (probability) (infer I will win/draw/lose step by step ),.. (normalize to number 100% in total); Overall (winning rate for his card1) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
110
+ "The winning/lose/draw rates when {recipient_name} holds card2 in the xx round,: If {recipient_name} holds card2 (probability) (based on my belief on {recipient_name}) with the public card (if release), he will do action1 (probability) (infer I will win/draw/lose (considering Single Game Win/Draw/Lose Rule and my factual card analysis with current public card (if release), his card analysis with current public card (if release)) step by step ).. action2 (probability) (normalize to number 100% in total) (infer I will win/draw/lose step by step ),.. based on {recipient_name}'s behaviour pattern;..... continue .... Overall (winning rate for his card2) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
111
+ "..."
112
+ "Overall {initiator_name}'s Winning/Lose/Draw rates : Based on the above analysis, the Winning rate (probability) is (winning rate for his card1) + (winning rate for his card2) + .. ; Lose rate (probability): (lose rate for his card1) + (lose rate for his card2) + .. ; Draw Rate (probability): (draw rate for his card1) + (draw rate for his card2) + ... ; (normalize to number 100% in total). \n"
113
+ + " Potential believes about the number of winning and lose payoffs for each plan: Understanding the game rule, your current observation, previous actions summarization, each new plans, Winning Payoff Rule, Lose Payoff Rule, please infer your several believes about the number of chips in pots for each plan step by step, Output: Plan1: Chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules in the game rule) : After the action, If lose , the lose payoff would be: . Plan2: Chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules in the game rule): After the action, If lose , the lose chips would be: . If the number of my chips in pots have no change, please directly output them. "
114
+ + " Estimate Expected Chips Gain for Each Plan: Understanding the game rule, plans, and your knowledge about the {game_name}, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule)., explain what is the results if you do not select the plan, and explain why is this final Expected Chips Gain reasonablely step by step? "
115
+ + " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n\n "
116
+ )
117
+ else:
118
+ prompt = PromptTemplate.from_template(
119
+ "You are the player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
120
+ + " The game rule is: {rule} \n"
121
+ + " {pattern} \n"
122
+ + " Your observation about the game status now is: {observation}\n"
123
+ + " Understanding all given information, can you do following things:"
124
+ + " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent."
125
+ + " Estimate Winning/Lose/Draw Rate for Each Plan: Understanding the given information, and your knowledge about the {game_name}, please estimate the success rate of each step of each plan step by step and the overall average winning/lose/draw rate (normalize to number 100% in total) of each plan/strategy for the current game step by step following the templete: If I do plan1, because I hold card, the public information (if release) and Single Game Win/Draw/Lose Rule, I will win or Lose or draw (probability); ... continue .... Overall win/draw/lose rate: Based on the analysis, I can do the weighted average step by step to get that the overall weighted average winning rate is (probability), average lose rate is (probability), draw rate is (probability) (normalize to number 100% in total)\n "
126
+ + " Potential believes about the number of winning and lose payoffs for each plan: Understanding the game rule, your current observation, previous actions summarization, each new plans, Winning Payoff Rule, Lose Payoff Rule, please infer your several believes about the number of chips in pots for each plan step by step, Output: Plan1: Chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules in the game rule) : After the action, Chips in the pot: If lose , the lose payoff would be: . Plan2: Chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules in the game rule): After the action, Chips in the pot: If lose , the lose chips would be: . If the number of my chips in pots have no change, please directly output them. "
127
+ +" Estimate Expected Chips Gain for Each Plan: Understanding the game rule, plans, and your knowledge about the {game_name}, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule)., explain what is the results if you do not select the plan, and explain why is this final Expected Chips Gain reasonablely step by step? "
128
+ + " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n\n "
129
+ )
130
+
131
+ agent_summary_description = short_memory_summary
132
+
133
+ belief = self.belief if belief is None else belief
134
+
135
+ kwargs = dict(
136
+
137
+ recent_observations=agent_summary_description,
138
+ last_plan=last_plan,
139
+ belief=belief,
140
+ initiator_name=self.name,
141
+ pattern=pattern,
142
+ recipient_name=recipient_name,
143
+ observation=observation,
144
+ rule=self.rule,
145
+ game_name=self.game_name,
146
+ valid_action_list=valid_action_list
147
+ )
148
+
149
+
150
+ plan_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
151
+ self.plan = plan_prediction_chain.run(**kwargs)
152
+ self.plan = self.plan.strip()
153
+
154
+ return self.plan.strip()
155
+
156
+
157
+
158
+ def get_belief(self, observation: str, recipient_name: str,short_memory_summary:str,pattern:str = "",mode: str = "second_tom") -> str:
159
+ """React to get a belief."""
160
+ if mode == 'second_tom':
161
+ prompt = PromptTemplate.from_template(
162
+ "You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
163
+ + " The game rule is: {rule} \n"
164
+ + " Your estimated judgement about the behaviour pattern of {recipient_name} and improved strategy is: {pattern} \n"
165
+ + " Your observation now is: {observation}\n"
166
+ + " Your current game progress summarization including actions and conversations with {recipient_name} is: {recent_observations}\n"
167
+ + " Understanding the game rule, the cards you have, your observation, progress summarization in the current game, the estimated behaviour pattern of {recipient_name}, the potential guess pattern of {recipient_name} on you, and your knowledge about the {game_name}, can you do following things? "
168
+ + " Analysis on my Cards: Understanding all given information and your knowledge about the {game_name}, please analysis what is your best combination and advantages of your cards in the current round step by step."
169
+ + " Belief on {recipient_name}'s cards: Understanding all given information, please infer the probabilities about the cards of {recipient_name} (normalize to number 100% in total) objectively step by step."
170
+ "Output: {recipient_name} saw my history actions (or not) and then did action1 (probability) in the 1st round , ... continue..... Before this round, {recipient_name} say my history actions (or not) and did action1 (probability), because {recipient_name}'s behaviour pattern and the match with the public card (if release), {recipient_name} tends to have card1 (probability), card2 (probability) ..continue.. (normalize to number 100% in total)."
171
+ + " Analysis on {recipient_name}'s Cards: Understanding all given information and your knowledge about the {game_name}, please analysis what is {recipient_name}'s best combination and advantages of {recipient_name}'s cards in the current round step by step."
172
+ + " Potential {recipient_name}'s current believes about your cards: Understanding all given information and your knowledge about the {game_name}, If you were {recipient_name} (he can only observe my actions but cannot see my cards), please infer the {recipient_name}'s believes about your cards with probability (normalize to number 100% in total) step by step. Output: {agent_name} did action1 (probability) (after I did action or not) in the 1st round, , ... continue... {agent_name} did action1 (probability) (after I did action or not) in the current round,, from the perspective of {recipient_name}, {agent_name} tends to have card1 (probability), card2 (probability) ... (normalize to number 100% in total) ."
173
+ )
174
+ elif mode == 'first_tom':
175
+ prompt = PromptTemplate.from_template(
176
+ "You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
177
+ + " The game rule is: {rule} \n"
178
+ + " Your estimated judgement about the behaviour pattern of {recipient_name} and improved strategy is: {pattern} \n"
179
+ + " Your observation now is: {observation}\n"
180
+ + " Your current game progress summarization including actions and conversations with {recipient_name} is: {recent_observations}\n"
181
+ + " Understanding the game rule, the cards you have, your observation, progress summarization in the current game, the estimated behaviour pattern of {recipient_name} on you, and your knowledge about the {game_name}, can you do following things? "
182
+ + " Analysis on my Cards: Understanding all given information, please analysis what is your best combination and advantages of your cards in the current round step by step."
183
+ + " Belief on {recipient_name}'s cards: Understanding all given information, please infer your the probabilities about the cards of {recipient_name} (normalize to number 100% total) step by step. Templete: In the 1st round, {recipient_name} did action1 (probability), ... continue... In the current round, {recipient_name} did action1 (probability), because {recipient_name}'s behaviour pattern and the match with the current public card (if release), he tends to have card1 (probability), card2 (probability) (normalize to number 100% in total). "
184
+ + " Analysis on {recipient_name}'s Cards: Understanding all given information, please analysis what is {recipient_name}'s best combination and advantages of {recipient_name}'s cards in the current round step by step."
185
+
186
+ )
187
+ agent_summary_description = short_memory_summary
188
+
189
+ kwargs = dict(
190
+ agent_summary_description=agent_summary_description,
191
+ recent_observations=agent_summary_description,
192
+ agent_name=self.name,
193
+ pattern= pattern,
194
+ recipient_name=recipient_name,
195
+ observation=observation,
196
+ game_name=self.game_name,
197
+ rule=self.rule
198
+
199
+ )
200
+ print(recipient_name)
201
+
202
+ belief_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
203
+ self.belief = belief_prediction_chain.run(**kwargs)
204
+ self.belief = self.belief.strip()
205
+ return self.belief.strip()
206
+
207
+
208
+ def get_pattern(self, recipient_name: str,game_pattern: str='', last_k:int=20,short_summarization:str='',mode:str='second_tom') -> str:
209
+ """React to get a belief."""
210
+
211
+ if mode == 'second_tom':
212
+ prompt = PromptTemplate.from_template(
213
+ "You are the objective player behind a NPC character called {agent_name}, and you are playing {game_name} with {recipient_name}. \n"
214
+ + " The game rule is: {rule} \n"
215
+ + " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
216
+ + " {recipient_name}'s game pattern: Understanding all given information and your understanding about the {game_name}, please infer and estimate as many as possible reasonable {recipient_name}'s game behaviour pattern/preferences for each card he holds and each round with probability (normalize to number 100\% in total for each pattern item) and please also infer advantages of his card, and analysis how the {recipient_name}'s behaviour pattern/preferences are influenced by my actions when he holds different cards step by step. Output as a tree-structure "
217
+ + "Output: When {recipient_name} holds card1 and the combination of public card (if release): if {recipient_name} is the first to act, he would like to do action1 (probabilities), action2 (probabilities) ... continue .. If {recipient_name} sees the action1/action2/action3 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) ... continue ... (normalize to number 100% in total), if {recipient_name} sees the action2 of the opponent or not, ... continue ..(more patterns with different actions).. in the 1st round, ; If {recipient_name} sees the action1 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) ... continue... (normalize to number 100% in total), ... continue ..(more patterns)..In the 2nd round,;"
218
+ "When {recipient_name} holds card2 and combination of public card (if release): if {recipient_name} is the first to act, he would like to do action1 (probabilities), action2 (probabilities) ... continue .. If {recipient_name} sees the action1 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) .. continue ... (normalize to number 100% in total)...in the 1st round,; .. continue ..(more patterns with different actions).in the 2nd round .. "
219
+ " (more patterns with different cards).. continue.."
220
+ + " {recipient_name}'s guess on my game pattern: Understanding all given information, please infer several reasonable believes about my game pattern/preference when holding different cards from the perspective of {recipient_name} (please consider the advantages of the card, actions and the the match with the public card (if release)) for every round of the game in detail as a tree-structure output step by step"
221
+ + "Output: In the 1st round, When name holds card1 with public card (if release), he would like to do (probabilities), action2 (probabilities) (normalize to number 100% in total) o ... continue .. and then do action ...;"
222
+ "When name holds card2 with public card (if release), ... "
223
+ " .. continue.."
224
+ + " Strategy Improvement: Understanding the above information, think about what strategies I can adopt to exploit the game pattern of {recipient_name} and {recipient_name}'s guess on my game pattern for winning {recipient_name} in the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
225
+ "When I hold card and the public card (if release), and see the action of the opponent, I would like to do action1; ... "
226
+ )
227
+ elif mode == 'first_tom':
228
+ prompt = PromptTemplate.from_template(
229
+ "You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
230
+ + " The game rule is: {rule} \n"
231
+ + " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
232
+ + " Please understand the game rule, previous all game history and your knowledge about the {game_name}, can you do following things for future games? "
233
+ + " {recipient_name}'s game pattern: Understanding all given information, please infer all possible reasonable {recipient_name}'s game pattern/preferences for each card he holds and each round with probability (normalize to number 100\% in total for each pattern item) for every round of the game as a tree-structure output step by step "
234
+ + "Output: In the 1st round, when name holds card1 and the public card (if release), he would like to do action (probabilities); when name holds card2 and the public card (if release), he would like to do action (probabilities), ... continue.. In the 2nd round, when name holds card1 and the public card (if release), .(similar with before).. continue. "
235
+ + " Number of chips reason: Think about why you can have these chips in all previous games step by step. "
236
+ + " Reflex: Reflex which your actions are right or wrong in previous games to win or Lose conrete chips step by step (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions) "
237
+ + " Strategy Improvement: Understanding the above information, think about what strategies I can adopt to exploit the game pattern of {recipient_name} for winning {recipient_name} in the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
238
+ )
239
+ else:
240
+ prompt = PromptTemplate.from_template(
241
+ "You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
242
+ + " The game rule is: {rule} \n"
243
+ + " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
244
+ + " Please understand the game rule, previous all game history and your knowledge about the {game_name}, can you do following things for future games? "
245
+ + " Number of chips reason: Think about why you can have these chips in all previous games step by step. "
246
+ + " Reflex: Reflex which your actions are right or wrong in previous games to win or Lose conrete chips step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions) "
247
+ + " Strategy Improvement: Understanding the above information, think about what strategies I need to adopt to win {recipient_name} for the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
248
+ )
249
+ reflection_chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
250
+ long_memory = self.memory[-last_k:]
251
+ long_memory_str = "\n\n".join([o for o in long_memory])
252
+
253
+ kwargs = dict(
254
+ long_memory=long_memory_str,
255
+ game_pattern=game_pattern,
256
+ agent_name=self.name,
257
+ recipient_name=recipient_name,
258
+ game_name=self.game_name,
259
+ rule=self.rule
260
+
261
+ )
262
+ # print(kwargs)
263
+
264
+ self.long_belief = reflection_chain.run(**kwargs)
265
+ self.long_belief = self.long_belief.strip()
266
+ return self.long_belief.strip()
267
+
268
+
269
+
270
+ def get_summarization(self, recipient_name: str,game_memory: str, opponent_name:str) -> str:
271
+ """Get a long memory summarization to save costs."""
272
+ prompt = PromptTemplate.from_template(
273
+ "You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
274
+ + " The game rule is: {rule} \n"
275
+ + " The observation conversion rules are: {observation_rule}\n"
276
+ + " One game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
277
+ + " Understanding the game rule, observation conversion rules and game history and your knowledge about the {game_name}, can you do following things:"
278
+ + " History summarization: summary the game history with action, observation, and results information? using the templete, and respond shortly: In the first round of first game, name holds card1 does action .... continue ..."
279
+ + "{opponent_name}'s card reasoning: If the card of {opponent_name} is not available, because {agent_name}'s card is xx and public card (if release) is xxx, and {opponent_name} behaviours are xx, the current game result is xx, please infer {opponent_name}'s card with probability (100% in total) with your understanding about the above all information confidently step by step. \n"
280
+ )
281
+ reflection_chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
282
+ kwargs = dict(
283
+ observation_rule=self.observation_rule,
284
+ long_memory=game_memory,
285
+ agent_name=self.name,
286
+ recipient_name=recipient_name,
287
+ opponent_name=opponent_name,
288
+ # observation=observation,
289
+ game_name=self.game_name,
290
+ rule=self.rule
291
+
292
+ )
293
+ # print(kwargs)
294
+
295
+ self.long_belief = reflection_chain.run(**kwargs)
296
+ self.long_belief = self.long_belief.strip()
297
+ return self.long_belief.strip()
298
+
299
+
300
+ def get_short_memory_summary(self, observation: str, recipient_name: str,short_memory_summary:str) -> str:
301
+ """React to get a belief."""
302
+ prompt = PromptTemplate.from_template(
303
+ "You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
304
+ + " The game rule is: {rule} \n"
305
+ + " Your current observation is: {observation}\n"
306
+ + " The current game history including previous action, observations and conversation is: {agent_summary_description}\n"
307
+ + " Based on the game rule, your observation and your knowledge about the {game_name}, please summarize the current history. Output as a tree-structure, and respond shortly: "
308
+ + " In the first round, name does action, and say xxx .... continue ..."
309
+ )
310
+
311
+ agent_summary_description = short_memory_summary
312
+
313
+ kwargs = dict(
314
+ agent_summary_description=agent_summary_description,
315
+ recent_observations=agent_summary_description,
316
+ agent_name=self.name,
317
+ recipient_name=recipient_name,
318
+ observation=observation,
319
+ game_name=self.game_name,
320
+ rule=self.rule
321
+
322
+ )
323
+
324
+ belief_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
325
+ self.belief = belief_prediction_chain.run(**kwargs)
326
+ self.belief = self.belief.strip()
327
+ return self.belief.strip()
328
+
329
+
330
+
331
+ def convert_obs(self, observation: str, recipient_name: str, user_index: str, valid_action_list:str) -> str:
332
+ """React to get a belief."""
333
+ prompt = PromptTemplate.from_template(
334
+ "You are the player behind a NPC character called {agent_name} with player index {user_index}, and you are playing the board game {game_name} with {recipient_name}. \n"
335
+ + " The game rule is: {rule} \n"
336
+ + " Your observation now is: {observation}\n"
337
+ + " You will receive a valid action list you can perform in this turn \n"
338
+ + " Your valid action list is: {valid_action_list}\n"
339
+ + " The observation conversion rules are: {observation_rule}\n"
340
+ + " Please convert {observation} and {valid_action_list} to the readable text based on the observation conversion rules and your knowledge about the {game_name} (respond shortly).\n\n"
341
+ )
342
+ kwargs = dict(
343
+ user_index=user_index,
344
+ agent_name=self.name,
345
+ rule=self.rule,
346
+ recipient_name=recipient_name,
347
+ observation=observation,
348
+ valid_action_list=valid_action_list,
349
+ game_name=self.game_name,
350
+ observation_rule=self.observation_rule
351
+ )
352
+ obs_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
353
+ self.read_observation = obs_prediction_chain.run(**kwargs)
354
+ self.read_observation = self.read_observation.strip()
355
+ return self.read_observation
356
+
357
+
358
+
359
+ def action_decision(self, observation: str, valid_action_list: List[str], promp_head: str, act: str = None,short_memory_summary:str="") -> Tuple[str,str]:
360
+ """React to a given observation."""
361
+ """React to a given observation."""
362
+ prompt = PromptTemplate.from_template(
363
+ promp_head
364
+ + "\nYour plan is: {plan}"
365
+ + "\n Based on the plan, please select the next action from the available action list: {valid_action_list} (Just one word) and say something to the opponent player to bluff or confuse him or keep silent to finally win the whole game and reduce the risk of your action (respond sentence only). Please respond them and split them by |"
366
+ + "\n\n"
367
+ )
368
+
369
+ agent_summary_description = short_memory_summary
370
+
371
+ kwargs = dict(
372
+ agent_summary_description= agent_summary_description,
373
+ # current_time=current_time_str,
374
+ # relevant_memories=relevant_memories_str,
375
+ agent_name= self.name,
376
+ game_name=self.game_name,
377
+ observation= observation,
378
+ agent_status= self.status,
379
+ valid_action_list = valid_action_list,
380
+ plan = self.plan,
381
+ belief = self.belief,
382
+ act = act
383
+ )
384
+ action_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
385
+
386
+ result = action_prediction_chain.run(**kwargs)
387
+ if "|" in result:
388
+ result,result_comm = result.split("|",1)
389
+ else:
390
+ result_comm = ""
391
+ return result.strip(),result_comm.strip()
392
+
393
+ def make_act(self, observation: str,opponent_name: str, player_index:int,valid_action_list: List, verbose_print:bool,game_idx:int,round:int,bot_short_memory:List, bot_long_memory:List, console,log_file_name='', mode='second_tom') -> Tuple[bool, str]:
394
+ readable_text_amy_obs = self.convert_obs(observation, opponent_name, player_index, valid_action_list)
395
+ if verbose_print:
396
+ util.get_logging(logger_name=log_file_name + '_obs',
397
+ content={str(game_idx + 1) + "_" + str(round): {"raw_obs": observation,
398
+ "readable_text_obs": readable_text_amy_obs}})
399
+ console.print('readable_text_obs: ', style="red")
400
+ print(readable_text_amy_obs)
401
+ time.sleep(0)
402
+ if len(bot_short_memory[player_index]) == 1:
403
+ short_memory_summary = f'{game_idx+1}th Game Start \n'+readable_text_amy_obs
404
+ else:
405
+ short_memory_summary = self.get_short_memory_summary(observation=readable_text_amy_obs, recipient_name=opponent_name,short_memory_summary='\n'.join(bot_short_memory[player_index]))
406
+
407
+ if log_file_name is not None:
408
+ util.get_logging(logger_name=log_file_name + '_short_memory',
409
+ content={str(game_idx + 1) + "_" + str(round): {
410
+ "raw_short_memory": '\n'.join(bot_short_memory[player_index]),
411
+ "short_memory_summary": short_memory_summary}})
412
+ if verbose_print:
413
+ console.print('short_memory_summary: ', style="yellow")
414
+ print(short_memory_summary)
415
+
416
+ time.sleep(0)
417
+ if round <= 1:
418
+ self.pattern = self.get_pattern(opponent_name,'',short_summarization=short_memory_summary,mode=mode)
419
+ if log_file_name is not None:
420
+ util.get_logging(logger_name=log_file_name + '_pattern_model',
421
+ content={str(game_idx + 1) + "_" + str(round): self.pattern})
422
+ console.print('pattern: ', style="blue")
423
+ print(self.pattern)
424
+
425
+ time.sleep(0)
426
+ print(opponent_name)
427
+
428
+ if mode == 'second_tom' or mode == 'first_tom':
429
+ belief = self.get_belief(readable_text_amy_obs,opponent_name,short_memory_summary=short_memory_summary,pattern=self.pattern,mode=mode)
430
+ if verbose_print:
431
+ console.print(self.name + " belief: " , style="deep_pink3")
432
+ print(self.name + " belief: " + str(belief))
433
+ util.get_logging(logger_name=log_file_name + '_belief',
434
+ content={str(game_idx + 1) + "_" + str(round): {
435
+ "belief": str(belief)}})
436
+ else:
437
+ belief = ''
438
+
439
+ time.sleep(0)
440
+ plan = self.planning_module(readable_text_amy_obs,opponent_name, belief=belief,valid_action_list=valid_action_list,short_memory_summary=short_memory_summary,pattern=self.pattern,last_plan='', mode=mode)
441
+ if verbose_print:
442
+ console.print(self.name + " plan: " , style="orchid")
443
+ print(self.name + " plan: " + str(plan))
444
+ util.get_logging(logger_name=log_file_name + '_plan',
445
+ content={str(game_idx + 1) + "_" + str(round): {
446
+ "plan": str(plan)}})
447
+ time.sleep(0)
448
+ promp_head = ''
449
+ act, comm = self.action_decision(readable_text_amy_obs, valid_action_list, promp_head,short_memory_summary=short_memory_summary)
450
+ if log_file_name is not None:
451
+ util.get_logging(logger_name= log_file_name + '_act',
452
+ content={str(game_idx + 1) + "_" + str(round): {
453
+ "act": str(act), "talk_sentence": str(comm)}})
454
+
455
+ while act not in valid_action_list:
456
+ print('Action + ', str(act), ' is not a valid action in valid_action_list, please try again.\n')
457
+ promp_head += 'Action {act} is not a valid action in {valid_action_list}, please try again.\n'
458
+ act, comm = self.action_decision( readable_text_amy_obs, valid_action_list, promp_head,act)
459
+ print(self.name + " act: " + str(act))
460
+ print(comm)
461
+
462
+ bot_short_memory[player_index].append(f"{self.name} have the observation {readable_text_amy_obs}, try to take action: {act} and say {comm} to {opponent_name}")
463
+ bot_short_memory[((player_index + 1)%2)].append(f"{self.name} try to take action: {act} and say {comm} to {opponent_name}")
464
+
465
+ bot_long_memory[player_index].append(
466
+ f"{self.name} have the observation {observation}, try to take action: {act} and say {comm} to {opponent_name}")
467
+ return act,comm,bot_short_memory,bot_long_memory
app.py ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import inquirer
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.prompt import IntPrompt, Prompt, Confirm
12
+ import argparse
13
+ import logging
14
+
15
+ import util
16
+ from model import get_all_embeddings, get_all_llms
17
+ from setting import Settings, get_all_model_settings, load_model_setting
18
+ # import Settings, get_all_model_settings, load_model_setting
19
+ from model import agi_init
20
+ import gym
21
+ from retriever import (
22
+ create_new_memory_retriever,
23
+ )
24
+ import gym_coup
25
+ import random
26
+ from rlcard.utils import set_seed
27
+ import rlcard
28
+ from rlcard import models
29
+ from rlcard.models import leducholdem_rule_models
30
+
31
+
32
+ #Inferenec function
33
+ def predict(openai_gpt4_key, game_selection, action, inputs, top_p, temperature, chat_counter, dialogue_chatbot=[], system_chatbot=[], history=[]):
34
+
35
+ verified, settings, env, ctx = history['verified'], history['settings'], history['env'], history['ctx']
36
+ bot_long_memory, bot_short_memory = history['bot_long_memory'], history['bot_short_memory']
37
+ agents_num, chips, user_index, game_idx, mode = history['agents_num'], history['chips'], history['user_index'], history['game_idx'], history['mode']
38
+
39
+ status_message = ''
40
+ valid_actions = gr.Dropdown.update(choices=[], value=None)
41
+
42
+ if env is None: #settings.model.llm.openai_api_key is None:
43
+ if not verified:
44
+ res = util.verify_openai_token(openai_gpt4_key)
45
+ if res != "OK":
46
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, res
47
+ history['verified'] = True
48
+
49
+ if game_selection == '' or game_selection is None:
50
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, "Please select a game."
51
+
52
+ settings = Settings()
53
+ settings.model = load_model_setting("openai-gpt-4-0613")
54
+
55
+ #settings.model.llm.openai_api_key = None
56
+ #settings.model.embedding.openai_api_key = None
57
+
58
+ settings.model.llm.openai_api_key = openai_gpt4_key
59
+ settings.model.embedding.openai_api_key = openai_gpt4_key
60
+ res = util.verify_model_initialization(settings)
61
+
62
+ if res != "OK":
63
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, res
64
+ else:
65
+ # read agents configs
66
+ agent1_file = 'person_config/Persuader.json'
67
+ agent1_config = util.load_json(Path(agent1_file))
68
+ agent1_config["path"] = agent1_file
69
+
70
+ agent2_file = 'person_config/GoodGuy.json'
71
+ agent2_config = util.load_json(Path(agent2_file))
72
+ agent2_config["path"] = agent2_file
73
+
74
+ agent_configs = [agent1_config, agent2_config]
75
+ agent_names = [agent1_config["name"], agent2_config["name"]]
76
+ if game_selection == 'coup':
77
+ game_config_file = 'game_config/coup.json'
78
+ elif game_selection == 'leduc-holdem':
79
+ game_config_file = 'game_config/leduc_limit.json'
80
+ elif game_selection == 'limit-holdem':
81
+ game_config_file = 'game_config/limit_holdem.json'
82
+
83
+ game_config = util.load_json(Path(game_config_file))
84
+ game_config["path"] = game_config_file
85
+
86
+ user_index = 1
87
+ console = Console()
88
+ ctx = agi_init(agent_configs, game_config, console, settings, user_index)
89
+
90
+ os.environ["OPENAI_API_KEY"] = openai_gpt4_key
91
+ print(game_selection)
92
+ if game_selection != 'coup':
93
+ env = rlcard.make(game_selection)
94
+ else:
95
+ env = gym.make('coup-v0')
96
+ env.reset()
97
+
98
+ history['env'] = env
99
+ history['ctx'] = ctx
100
+
101
+ #valid_actions['label'] = 'hello there'
102
+
103
+ for i in range(agents_num):
104
+ bot_short_memory.append([f'{game_idx+1}th Game Start'])
105
+ bot_long_memory.append([f'{game_idx+1}th Game Start'])
106
+
107
+ status_message = 'Verified.'
108
+ if game_selection != 'coup':
109
+ valid_actions = f"{env.get_state(env.get_player_id())['raw_legal_actions']}"
110
+ else:
111
+ valid_action_list = env.get_valid_actions(text=True)
112
+
113
+ # check if opponent makes move first
114
+ if game_selection != 'coup':
115
+ idx = env.get_player_id()
116
+ else:
117
+ idx = env.game.whose_action
118
+ if idx != user_index:
119
+ amy = ctx.robot_agents[idx]
120
+ if game_selection != 'coup':
121
+ amy_obs = env.get_state(env.get_player_id())['raw_obs']
122
+ amy_index = env.get_player_id()
123
+ amy_obs['game_num'] = game_idx+1
124
+ amy_obs['rest_chips'] = chips[idx]
125
+ amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
126
+ valid_action_list = env.get_state(env.get_player_id())['raw_legal_actions']
127
+ else:
128
+ amy_obs =env.get_obs(text=True,p2_view = (idx==1))
129
+ amy_index = env.game.whose_action
130
+ valid_action_list = env.get_valid_actions(text=True)
131
+
132
+
133
+ opponent_name = ctx.robot_agents[(idx+1)%agents_num].name
134
+
135
+ act, comm, bot_short_memory, bot_long_memory = amy.make_act(amy_obs, opponent_name, amy_index, valid_action_list, verbose_print=False,
136
+ game_idx=game_idx, round=0, bot_short_memory=bot_short_memory, bot_long_memory=bot_long_memory, console=Console(),
137
+ log_file_name=None, mode=mode)
138
+ if game_selection != 'coup':
139
+ env.step(act, raw_action=True)
140
+ else:
141
+ env.step(act)
142
+ win_message = env.game.call_system_info()
143
+ # print(win_message)
144
+ if win_message is not None:
145
+ print(win_message)
146
+ win_message = win_message.replace('Player 0',ctx.robot_agents[0].name)
147
+ win_message = win_message.replace('Player 1',ctx.robot_agents[1].name)
148
+
149
+ win_message = win_message.replace('I',ctx.robot_agents[idx].name)
150
+ win_message = win_message.replace('the opponent',ctx.robot_agents[(idx + 1) % agents_num].name)
151
+
152
+ bot_short_memory.append(win_message)
153
+ bot_long_memory.append(win_message)
154
+
155
+ dialogue_chatbot.append((None, comm))
156
+ system_chatbot.append((None, f'Suspicion-Agent action: {act}'))
157
+
158
+
159
+ # get user observation
160
+ idx = user_index #env.get_player_id()
161
+ if game_selection != 'coup':
162
+ amy_obs = env.get_state(idx)['raw_obs']
163
+ #amy_obs['game_num'] = game_idx+1
164
+ amy_obs['rest_chips'] = chips[idx]
165
+ amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
166
+
167
+ valid_actions = env.get_state(idx)['raw_legal_actions']
168
+ else:
169
+ amy_obs =env.get_obs(text=True,p2_view = (idx==1))
170
+ valid_actions = env.get_valid_actions(text=True)
171
+
172
+ if game_selection != 'coup':
173
+ game_state_string = ""
174
+ for key, value in amy_obs.items():
175
+ if key != 'legal_actions':
176
+ game_state_string += f"{key}: {value}\n"
177
+
178
+ system_chatbot.append((f'Game state:\n{game_state_string}', None))
179
+ else:
180
+ system_chatbot.append((f'Game state:\n{amy_obs}', None))
181
+ #system_chatbot.append((f'{amy_obs}', None))
182
+
183
+ valid_actions = gr.Dropdown.update(choices=valid_actions, value=None)
184
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
185
+ #else:
186
+ # return system_chatbot, dialogue_chatbot, history, chat_counter, "Already Verified."
187
+
188
+ # check if game is over
189
+ if game_selection != 'coup':
190
+ game_over = env.is_over()
191
+ else:
192
+ game_over = env.game.game_over
193
+ if game_over:
194
+ status_message = "Game ended."
195
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
196
+
197
+ if action is None:
198
+ status_message = "No action received."
199
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
200
+ if game_selection != 'coup':
201
+ if action not in env.get_state(env.get_player_id())['raw_legal_actions']:
202
+ status_message = "Not a valid action. Please enter a valid action."
203
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
204
+ else:
205
+ if action not in env.get_valid_actions(text=True):
206
+ status_message = "Not a valid action. Please enter a valid action."
207
+ return system_chatbot, dialogue_chatbot, valid_actions, history, chat_counter, status_message
208
+
209
+ # message can be empty
210
+ #if inputs is None or inputs == "":
211
+ # status_message += " No message received."
212
+ # return system_chatbot, dialogue_chatbot, history, chat_counter, status_message
213
+
214
+
215
+ # user takes action
216
+ if game_selection != 'coup':
217
+ env_state = env.get_state(env.get_player_id())['raw_obs']
218
+ else:
219
+ env_state = env.get_obs(text=True,p2_view = (env.game.whose_action==1))
220
+
221
+ # here action comes from user input
222
+ #act,_ = rule_model.eval_step(env.get_state(env.get_player_id()))
223
+ act = action #env.get_state(env.get_player_id())['raw_legal_actions'][0]
224
+ if game_selection != 'coup':
225
+ bot_short_memory[(user_index + 1) % agents_num].append(
226
+ f"The valid action list of {ctx.robot_agents[user_index].name} is {env.get_state(env.get_player_id())['raw_legal_actions']}, and he tries to take action: {act}. He said, {inputs}")
227
+ # bot_short_memory[(args.user_index) % args.agents_num].append(
228
+ # f"{ctx.robot_agents[args.user_index].name} have the observation: {env.get_state(env.get_player_id())['raw_obs']}, and try to take action: {act}.")
229
+ bot_long_memory[(user_index) % agents_num].append(
230
+ f"{ctx.robot_agents[user_index].name} have the observation: {env.get_state(env.get_player_id())['raw_obs']}, and try to take action: {act}.")
231
+ # bot_long_memory[(args.user_index) % args.agents_num].append(
232
+ # f"{ctx.robot_agents[args.user_index].name} try to take action: {act}.")
233
+ else:
234
+ bot_short_memory[(user_index + 1) % agents_num].append(
235
+ f"The valid action list of {ctx.robot_agents[user_index].name} is {env.get_valid_actions(text=True)}, and he tries to take action: {act}. He said, {inputs}")
236
+ # bot_short_memory[(args.user_index) % args.agents_num].append(
237
+ # f"{ctx.robot_agents[args.user_index].name} have the observation: {env.get_state(env.get_player_id())['raw_obs']}, and try to take action: {act}.")
238
+ bot_long_memory[(user_index) % agents_num].append(
239
+ f"{ctx.robot_agents[user_index].name} have the observation: {env.get_obs(text=True,p2_view = (env.game.whose_action==1))}, and try to take action: {act}.")
240
+ # bot_long_memory[(args.user_index) % args.agents_num].append(
241
+ # f"{ctx.robot_agents[args.user_index].name} try to take action: {act}.")
242
+
243
+ if game_selection != 'coup':
244
+ env.step(act, raw_action=True)
245
+ else:
246
+ env.step(act)
247
+
248
+ comm = None
249
+ if game_selection != 'coup':
250
+ game_over = env.is_over()
251
+ else:
252
+ game_over = env.game.game_over
253
+ if not game_over:
254
+ # opponent move
255
+ # bot reaction
256
+
257
+ if game_selection != 'coup':
258
+ idx = env.get_player_id()
259
+ amy = ctx.robot_agents[idx]
260
+ amy_index = env.get_player_id()
261
+ amy_obs = env.get_state(env.get_player_id())['raw_obs']
262
+ amy_obs['game_num'] = game_idx+1
263
+ amy_obs['rest_chips'] = chips[idx]
264
+ amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
265
+ valid_action_list = env.get_state(env.get_player_id())['raw_legal_actions']
266
+ else:
267
+ idx = env.game.whose_action
268
+ amy = ctx.robot_agents[idx]
269
+ amy_index = env.game.whose_action
270
+ amy_obs = env.get_obs(text=True,p2_view = (idx==1))
271
+ valid_action_list = env.get_valid_actions(text=True)
272
+ opponent_name = ctx.robot_agents[(idx+1)%agents_num].name
273
+
274
+ act, comm, bot_short_memory, bot_long_memory = amy.make_act(amy_obs, opponent_name, amy_index, valid_action_list, verbose_print=False,
275
+ game_idx=game_idx, round=0, bot_short_memory=bot_short_memory, bot_long_memory=bot_long_memory, console=Console(),
276
+ log_file_name=None, mode=mode)
277
+ if game_selection != 'coup':
278
+ env.step(act, raw_action=True)
279
+ else:
280
+ env.step(act)
281
+
282
+ idx = user_index # env.get_player_id()
283
+ if game_selection != 'coup':
284
+ amy_obs = env.get_state(idx)['raw_obs']
285
+ else:
286
+ amy_obs = env.get_obs(text=True,p2_view = (idx==1))
287
+ #amy_obs['game_num'] = game_idx+1
288
+ if game_selection != 'coup':
289
+ amy_obs['rest_chips'] = chips[idx]
290
+ amy_obs['opponent_rest_chips'] = chips[(idx+1)%agents_num]
291
+
292
+ valid_actions = env.get_state(idx)['raw_legal_actions']
293
+ else:
294
+ valid_actions = env.get_valid_actions(text=True)
295
+
296
+ game_state_string = ""
297
+ for key, value in amy_obs.items():
298
+ if key != 'legal_actions':
299
+ game_state_string += f"{key}: {value}\n"
300
+
301
+ dialogue_chatbot.append((inputs if inputs != "" else None, comm))
302
+ system_chatbot.append((f'My action: {action}', f'Suspicion-Agent action: {act}'))
303
+ system_chatbot.append((f'Game state:\n{game_state_string}', None))
304
+
305
+
306
+ if game_selection != 'coup':
307
+ if env.is_over():
308
+ pay_offs = env.get_payoffs()
309
+ for idx in range(len(pay_offs)):
310
+ pay_offs[idx] = pay_offs[idx]*2
311
+ chips[idx] += pay_offs[idx]
312
+ if pay_offs[user_index] > 0:
313
+ win_message = f'You win {pay_offs[user_index]} chips, Suspicion-Agent lose {pay_offs[user_index]} chips'
314
+ else:
315
+ win_message = f'Suspicion-Agent win {pay_offs[(user_index+1)%agents_num]} chips, you lose {pay_offs[(user_index+1)%agents_num]} chips'
316
+
317
+ idx = (user_index + 1)%agents_num
318
+ amy_obs = env.get_state(idx)['raw_obs']
319
+ bot_hand = amy_obs['hand']
320
+ system_chatbot.append((None, f'Suspicion-Agent hand: {bot_hand}'))
321
+ system_chatbot.append((f'Gameover.\n {win_message}', None))
322
+
323
+ valid_actions = []
324
+ else:
325
+ if env.game.game_over:
326
+ idx = (user_index + 1)%agents_num
327
+ amy_obs = env.get_obs(text=True,p2_view = (idx==1))
328
+ bot_hand = amy_obs
329
+ system_chatbot.append((None, f'Suspicion-Agent hand: {bot_hand}'))
330
+ system_chatbot.append((f'Gameover.\n {win_message}', None))
331
+ valid_actions = []
332
+ status_message += " Message received."
333
+ valid_actions = gr.Dropdown.update(choices=valid_actions, value=None)
334
+ return system_chatbot, dialogue_chatbot, valid_actions, history, 1, status_message
335
+
336
+ #Resetting to blank
337
+ def reset_textbox():
338
+ return gr.update(value='')
339
+
340
+ #to set a component as visible=False
341
+ def set_visible_false():
342
+ return gr.update(visible=False)
343
+
344
+ #to set a component as visible=True
345
+ def set_visible_true():
346
+ return gr.update(visible=True)
347
+
348
+ def update_instruction(game_selection):
349
+ if game_selection is not None and game_selection != '':
350
+ if game_selection == 'coup':
351
+ with open('./game_config/coup.json') as file:
352
+ contents = json.load(file)
353
+ elif game_selection == 'leduc-holdem':
354
+ with open('./game_config/leduc_limit.json') as file:
355
+ contents = json.load(file)
356
+ elif game_selection == 'limit-holdem':
357
+ with open('./game_config/limit_holdem.json') as file:
358
+ contents = json.load(file)
359
+ return f"Game rule: {contents['game_rule']}\n\n\nObservation Rule: {contents['observation_rule']}"
360
+
361
+ # update valid actions list
362
+ def set_valid_actions():
363
+ if game_selection != 'coup':
364
+ print(env.get_state(env.get_player_id())['raw_legal_actions'])
365
+ else:
366
+ print(env.get_valid_actions(text=True))
367
+ if env is None:
368
+ return gr.update(value='')
369
+ else:
370
+ if game_selection != 'coup':
371
+ valid_actions_list = env.get_state(env.get_player_id())['raw_legal_actions']
372
+ else:
373
+ valid_actions_list = env.get_valid_actions(text=True)
374
+ return gr.update(value=f'{valid_actions_list}')
375
+
376
+ title = """<h1 align="center">Suspicion-Agent Demo</h1>"""
377
+ #display message for themes feature
378
+ theme_addon_msg = """<center>This is an official Demo for <b>Suspicion-Agent: Playing Imperfect Information Games with Theory of Mind Aware GPT4</b>. Check out our paper for more details <a href="https://gradio.app/theming-guide/" target="_blank">here</a>!</center>
379
+ """
380
+
381
+ #Using info to add additional information about System message in GPT4
382
+ system_msg_info = """A conversation could begin with a system message to gently instruct the assistant.
383
+ System message helps set the behavior of the AI Assistant. For example, the assistant could be instructed with 'You are a helpful assistant.'"""
384
+
385
+ #Modifying existing Gradio Theme
386
+ theme = gr.themes.Soft(primary_hue="zinc", secondary_hue="blue", neutral_hue="blue",
387
+ text_size=gr.themes.sizes.text_lg)
388
+
389
+ with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""",
390
+ theme=theme) as demo:
391
+ gr.HTML(title)
392
+ gr.HTML("""<h3 align="center">🔥This Huggingface Gradio Demo provides a variety of game matches against a GPT4 agent. Please note that you would be needing an OPENAI API key for GPT4 access</h1>""")
393
+ gr.HTML(theme_addon_msg)
394
+ gr.HTML('''<center><a href="https://huggingface.co/spaces/paulyoo/suspicion_agent_demo?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
395
+
396
+ initial_state = {
397
+ 'verified': False,
398
+ 'settings': None,
399
+ 'env': None,
400
+ 'ctx': None,
401
+
402
+ 'bot_long_memory': [],
403
+ 'bot_short_memory': [],
404
+
405
+ 'agents_num': 2,
406
+ 'chips': [50,50],
407
+ 'user_index': 1,
408
+ 'game_idx': 0,
409
+ 'mode': 'first_tom',
410
+ }
411
+
412
+ with gr.Column(elem_id = "col_container"):
413
+ #Users need to provide their own GPT4 API key, it is no longer provided by Huggingface
414
+ with gr.Row():
415
+ with gr.Column():
416
+ openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", value="", type="password", placeholder="sk..", info = "You have to provide your own GPT4 keys for this app to function properly",)
417
+ with gr.Column():
418
+ game_selection = gr.Dropdown(
419
+ ["leduc-holdem", "limit-holdem","coup"], label="Game Selections", info="Select the game to play from the dropdown"
420
+ )
421
+ #with gr.Accordion(label="System message:", open=False):
422
+ # system_msg = gr.Textbox(label="Instruct the AI Assistant to set its beaviour", info = system_msg_info, value="",placeholder="Type here..")
423
+ # accordion_msg = gr.HTML(value="🚧 To set System message you will have to refresh the app", visible=False)
424
+
425
+ with gr.Row():
426
+ instruction_panel = gr.Textbox(label='Game Instructions')
427
+
428
+ with gr.Row():
429
+ with gr.Column():
430
+ system_chatbot = gr.Chatbot(label='Game Status', elem_id="system_chatbot")
431
+ with gr.Column():
432
+ dialogue_chatbot = gr.Chatbot(label='Dialogue with GPT4', elem_id="dialogue_chatbot")
433
+
434
+ #chatbot = gr.Chatbot(label='GPT4', elem_id="chatbot")
435
+ #action = gr.Radio(['call', 'raise', 'fold'], label="Actions", info="Select the action to play")
436
+ action = gr.Dropdown(placeholder="", label="Select an action.", info="")
437
+ inputs = gr.Textbox(placeholder="", label="Type a message for the opponent. Messages are optional.")
438
+ state = gr.State(initial_state)
439
+ with gr.Row():
440
+ with gr.Column(scale=7):
441
+ b1 = gr.Button().style(full_width=True)
442
+ with gr.Column(scale=3):
443
+ server_status_code = gr.Textbox(label="Status code from OpenAI server", )
444
+
445
+ #top_p, temperature
446
+ with gr.Accordion("Parameters", open=False):
447
+ top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
448
+ temperature = gr.Slider(minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
449
+ chat_counter = gr.Number(value=0, visible=False, precision=0)
450
+
451
+ #Event handling
452
+ inputs.submit(predict, [openai_gpt4_key, game_selection, action, inputs, top_p, temperature, chat_counter, dialogue_chatbot, system_chatbot, state], [system_chatbot, dialogue_chatbot, action, state, chat_counter, server_status_code],) #openai_api_key
453
+ b1.click(predict, [openai_gpt4_key, game_selection, action, inputs, top_p, temperature, chat_counter, dialogue_chatbot, system_chatbot, state], [system_chatbot, dialogue_chatbot, action, state, chat_counter, server_status_code],) #openai_api_key
454
+
455
+ #inputs.submit(set_visible_false, [], [system_msg])
456
+ #b1.click(set_visible_false, [], [system_msg])
457
+ #inputs.submit(set_visible_true, [], [accordion_msg])
458
+ #b1.click(set_visible_true, [], [accordion_msg])
459
+
460
+ game_selection.select(update_instruction, [game_selection], [instruction_panel])
461
+
462
+ b1.click(reset_textbox, [], [inputs])
463
+ inputs.submit(reset_textbox, [], [inputs])
464
+
465
+ #b1.click(set_valid_actions, [], [valid_actions])
466
+ #inputs.submit(set_valid_actions, [], [valid_actions])
467
+
468
+ demo.queue(max_size=99, concurrency_count=20).launch(debug=True)
context.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from rich.console import Console
4
+ from rich.prompt import Prompt
5
+
6
+ from setting import Settings
7
+ from agent import SuspicionAgent
8
+
9
+
10
+ class Context:
11
+ def __init__(self, console: Console, settings: Settings, webcontext=None) -> None:
12
+ self.clock: int = 0
13
+ self.console: Console = console
14
+ self.agents: List[SuspicionAgent] = []
15
+ self.user_agent: SuspicionAgent = None
16
+ self.robot_agents: List[SuspicionAgent] = []
17
+ self.observations = ["Beginning of the day, people are living their lives."]
18
+ self.timewindow_size = 3
19
+ self.observations_size_history = []
20
+ self.settings = settings
21
+ self.webcontext = webcontext
22
+
23
+ def print(self, message: str, style: str = None):
24
+ if style:
25
+ self.console.print(message, style=style)
26
+ else:
27
+ self.console.print(message)
28
+
29
+ if self.webcontext:
30
+ self.webcontext.send_response(message)
31
+
32
+ def ask(self, message: str = "", choices: List[str] = None) -> str:
33
+ if self.webcontext:
34
+ return self.webcontext.ask_human(message, choices)
35
+ else:
36
+ if choices:
37
+ return Prompt.ask(message, choices=choices, default=choices[0])
38
+ else:
39
+ return Prompt.ask(message)
game_config/coup.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Coup",
3
+ "game_rule":
4
+ "In the coup game, you play the role of the head of a family in an Italian city-state, a city run by a weak and corrupt court. You need to manipulate, bluff and bribe your way to power. Your object is to destroy the influence of all the other families, forcing them into exile. Only one family will survive...In Coup, you want to be the last player with influence in the game, with influence being represented by face-down character cards in your playing area. Each player starts the game with two coins and two influence – i.e., two face-down character cards; the fifteen card deck consists of three copies of five different characters, each with a unique set of powers: Duke: tax: Take three coins from the treasury. block_foreignaid: Block someone from taking foreign aid. Assassin: Assassination: Pay three coins and try to assassinate another player's character Contessa: block_ass: Block an assassination attempt against yourself. Captain: steal: Take two coins from another player, or block_steal: block someone from stealing coins from you. Ambassador: exchange: Draw two character cards from the Court (the deck), choose which (if any) to exchange with your face-down characters, then return two, block_steal: Block someone from stealing coins from you. Note that, the total number of each role is only 3. On your turn, you can take any of the actions listed above, regardless of which characters you actually have in front of you, or you can take one of three other actions: income: Take one coin from the treasury. foreign aid: Take two coins from the treasury. coup: Pay 7 coins and launch a coup against an opponent, forcing that player to lose an influence. (If you have 10 coins or more, you must take this action.) \n Single Win/Loss Rule: When you take one of the character actions – whether actively on your turn, or defensively in response to someone else's action – that character's action automatically succeeds unless an opponent challenges you. In this case, if you can't (or don't) reveal the appropriate character, you lose an influence, turning one of your characters face-up. Face-up characters cannot be used, and if both of your characters are face-up, you're out of the game. If you do have the character in question and choose to reveal it, the opponent loses an influence, then you shuffle that character into the deck and draw a new one, perhaps getting the same character again and perhaps not. The last player to still have influence – that is, a face-down character – wins the game! Win/Loss Payoff Rule: If you let one character of the opponent die or face-up, you will win one chip. If one of your character is face-up or die, you will lose one chip.",
5
+ "observation_rule": " You will receive a observation with 21 elements in the list, where the first 4 elements are the role cards of the yours where none denotes the role is hidden but alive or there is no card, The elements in index 9-12 denote whether corresponding cards index 1-4 of yours are dead or not, 0 is alive but hidden, 1 denotes the corresponding card is dead, and none is no this card. Elements index with 5-8 are the role cards of the other player where none denotes the role is alive but hidden or there is no card (depends on following 0/1 numbers), if they have name, that is the name of a dead card. The elements in index 13-16 denote whether corresponding cards in index 5-8 of the other player are dead or not, 0 is alive but hidden, 1 denotes the corresponding card is dead, and none is no this card. The 17th elements denote the coins the you have. The 18th elements denote the coins the the other player 1. The 19th elements denote the last action the you did.The 20th elements denote the last action the pther player did. Please directly tell me what card I and my opponent have and available to use (Please specify which role card is dead and cannot be used anymore). \n"
6
+
7
+ }
game_config/leduc_limit.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Leduc Hold'em Poker Limit",
3
+ "game_rule":
4
+ "Leduc Hold'em is first introduced in Bayes' Bluff: Opponent Modeling in Poker. Note that the deck consists only two cards of King, Queen and Jack, six cards in total. Each game is fixed with two players, only two rounds. Note that only one public hand is available, two-bet maximum. Raise action: In the first round, you will put chips with amounts of 4 more than chips of your opponent, in the following round, you will put chips with amounts of 8 more than chips of your opponent. Call action: you will put chips with same amounts as your opponent (if his chips in pot are higher than yours, normally 4 ). In the first round, one player is randomly choosed to put 1 unit in pot as small blind while the other puts 2 unit as big blind, and each player is dealt one card, then starts betting. The player with small blind acts first. In the second round, one public card is revealed first, then the players bet again. (Only one public card is available in the whole game.) \n Single Game Win/Draw/Lose Rule: the player whose hand has the same rank as the public card is the winner of one round game. If neither, then the one with higher rank wins this round game, if the rank of cards of two players are the same, it is draw. you can also 'fold' in one round game. \n Whole Game Win/Draw/Lose Rule: you are requested to attend 100 games with your opponent, you both are given 100 chips originally, and the guy who have more chips will win the game after 100 games (i.e. You aim to have more chips than your original chips). \n Winning Payoff Rule: The half of the total pot. \n Lose Payoff Rule: The half of the total pot. ",
5
+ "observation_rule": "The observation is a dictionary. The main observation space: `'raw_legal_actions'` which holds the legal moves, described in the Legal Actions Mask section. 'hand' is the hole hand, where SJ denotes the J of Spades, HK denotes King of Hearts. game_num is the idx of game now, the total number is 100. all_chips is list which contains the number of chips you and the opponent has been put in the pot, respectively, where the first element of 'all_chips' is the number of chips you have put in the pot, the second element of 'all_chips' is the number of chips the opponent have put in the pot. rest_chips is how much chips you have now, opponent_rest_chips how much chips your opponent have now. Based on your hole hand and public hand (only one), please analysis what is your best combination now and potential combinations in future with probability (number) reasonably and accurately step by step."
6
+ }
game_config/limit_holdem.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Texas Hold'em Poker Limit",
3
+ "game_rule":
4
+ "General Rule: There are the basic rules for Texas Hold'em Poker limit with a limit betting structure:\n\nIn this game, you are requested to attend 40 games with your opponent, you both are given 20 chips originally, and the guy who have more chips will win the game after 40 games, note that if your chips become 0 or negatives, you will loss the whole game. The goal of you is to obtain more chips than your opponent. The Deck: Texas Hold'em is played with a standard deck of 52 cards, where there are 13 number cards (A,1 to K) with 4 colors.\n\nThe Blinds: The game begins with two players to the left of the dealer posting a small blind and a big blind respectively. The small blind is usually half of the big blind.\n\nDealing the Cards: Each player is then dealt two private cards known as 'hole cards' that belong to them alone.\n\nFirst Round of Betting: Starting with the player to the left of the big blind, each player can call the big blind, raise, or fold. In limit hold'em, the amount that a player can raise in the first two rounds of betting (pre-flop and flop) is set at the lower limit of the stakes structure. For instance, in a $4/$8 limit hold'em game, players can bet and raise in increments of $8 during the pre-flop and flop rounds.\n\nThe Flop: After the first round of betting, the dealer discards the top card of the deck (a move called 'burning' the card) and then deals three face-up community cards known as the flop.\n\nSecond Round of Betting: Another round of betting ensues, starting with the player to the left of the dealer. The same betting rules apply as the first round.\n\nThe Turn: The dealer burns another card and then adds a fourth face-up card to the community cards. This fourth card is known as the turn.\n\nThird Round of Betting: Another round of betting starts. In limit hold'em, the betting limit now doubles. So in our $4/$8 game example, players can now bet and raise in increments of $8.\n\nThe River: The dealer burns another card before placing a fifth and final community card face-up on the table. This card is known as the river.\n\nFinal Round of Betting: The final round of betting starts with the same rules as the third round.\n\nThe Showdown: If two or more players remain after the final round of betting, a showdown occurs. This is where players reveal their hands, and the player with the best hand wins the pot. \n Single Game Win/Draw/Lose Rule: Texas Hold'em hands are ranked from highest to lowest as follows: Royal Flush, Straight Flush, Four of a Kind, Full House, Flush, Straight, Three of a Kind, Two Pair, One Pair, High Card.\n\nNote: If at any point a player makes a bet or a raise that no other player calls, they win the pot without revealing their hand.\n\nRotating the Dealer: After the pot is awarded, the dealer button moves one seat to the left, and the process begins anew.\nThe main difference between Limit Hold'em and other betting structures like No Limit or Pot Limit is the amount a player can bet or raise. In Limit, the amount is capped at a set amount per round. \n Whole Game Win/Draw/Lose Rule: you are requested to attend 10 games with your opponent, you both are given 50 chips originally, and the guy who have more chips will win the game after 10 games (i.e. You aim to have more chips than your original chips). \n Winning Payoff Rule: The half of the pot. \n Lose Payoff Rule: The half of the pot. ",
5
+ "observation_rule": "The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an `'raw_legal_actions'` which holds the legal moves, described in the Legal Actions Mask section. The main observation space is similar to Texas Hold'em. 'hand' is the hole hand, where H3 denotes the 3 of diamonds, ST denotes 10 of Spades. raise_nums is a list with 4 numbers which represents the how many times you have raised in 4 rounds. Specifically, you have no more than 4 chances to raise in each round. action_record denotes the action history of the current game. The first dimension is the agent number, the second dimension is the action type. game_num is the idx of game now, the total number is 40. rest_chips is how much chips you have now, opponent_rest_chips how much chips your opponent have now. Based on your hole hand and public hands, please analysis what is your best combination now and potential combinations in future with probability (number 100% total) reasonably step by step."
6
+ }
model.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Type
2
+
3
+ from langchain import chat_models, embeddings, llms
4
+ from langchain.embeddings.base import Embeddings
5
+ from langchain.llms.base import BaseLanguageModel
6
+
7
+ from setting import EmbeddingSettings, LLMSettings
8
+ from context import Context
9
+ from setting import Settings
10
+ from rich.console import Console
11
+ from agent import SuspicionAgent
12
+
13
+ def agi_init(
14
+ agent_configs: List[dict],
15
+ game_config:dict,
16
+ console: Console,
17
+ settings: Settings,
18
+ user_idx: int = 0,
19
+ webcontext=None,
20
+ ) -> Context:
21
+ ctx = Context(console, settings, webcontext)
22
+ ctx.print("Creating all agents one by one...", style="yellow")
23
+ for idx, agent_config in enumerate(agent_configs):
24
+ agent_name = agent_config["name"]
25
+ with ctx.console.status(f"[yellow]Creating agent {agent_name}..."):
26
+ agent = SuspicionAgent(
27
+ name=agent_config["name"],
28
+ age=agent_config["age"],
29
+ rule=game_config["game_rule"],
30
+ game_name=game_config["name"],
31
+ observation_rule=game_config["observation_rule"],
32
+ status="N/A",
33
+ llm=load_llm_from_config(ctx.settings.model.llm),
34
+
35
+ reflection_threshold=8,
36
+ )
37
+ for memory in agent_config["memories"]:
38
+ agent.add_memory(memory)
39
+ ctx.robot_agents.append(agent)
40
+ ctx.agents.append(agent)
41
+
42
+ ctx.print(f"Agent {agent_name} successfully created", style="green")
43
+
44
+ ctx.print("Suspicion Agent started...")
45
+
46
+ return ctx
47
+
48
+
49
+
50
+ # ------------------------- LLM/Chat models registry ------------------------- #
51
+ llm_type_to_cls_dict: Dict[str, Type[BaseLanguageModel]] = {
52
+ "chatopenai": chat_models.ChatOpenAI,
53
+ "openai": llms.OpenAI,
54
+ }
55
+
56
+ # ------------------------- Embedding models registry ------------------------ #
57
+ embedding_type_to_cls_dict: Dict[str, Type[Embeddings]] = {
58
+ "openaiembeddings": embeddings.OpenAIEmbeddings
59
+ }
60
+
61
+
62
+ # ---------------------------------------------------------------------------- #
63
+ # LLM/Chat models #
64
+ # ---------------------------------------------------------------------------- #
65
+ def load_llm_from_config(config: LLMSettings) -> BaseLanguageModel:
66
+ """Load LLM from Config."""
67
+ config_dict = config.dict()
68
+ config_type = config_dict.pop("type")
69
+
70
+ if config_type not in llm_type_to_cls_dict:
71
+ raise ValueError(f"Loading {config_type} LLM not supported")
72
+
73
+ cls = llm_type_to_cls_dict[config_type]
74
+ return cls(**config_dict)
75
+
76
+
77
+ def get_all_llms() -> List[str]:
78
+ """Get all supported LLMs"""
79
+ return list(llm_type_to_cls_dict.keys())
80
+
81
+
82
+ # ---------------------------------------------------------------------------- #
83
+ # Embeddings models #
84
+ # ---------------------------------------------------------------------------- #
85
+ def load_embedding_from_config(config: EmbeddingSettings) -> Embeddings:
86
+ """Load Embedding from Config."""
87
+ config_dict = config.dict()
88
+ config_type = config_dict.pop("type")
89
+ print(config)
90
+ if config_type not in embedding_type_to_cls_dict:
91
+ raise ValueError(f"Loading {config_type} Embedding not supported")
92
+
93
+ cls = embedding_type_to_cls_dict[config_type]
94
+ return cls(**config_dict)
95
+
96
+
97
+ def get_all_embeddings() -> List[str]:
98
+ """Get all supported Embeddings"""
99
+ return list(embedding_type_to_cls_dict.keys())
person_config/GoodGuy.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "name": "GoodGuy",
3
+ "age": 27,
4
+ "personality": "flexible",
5
+ "memories":[]
6
+ }
person_config/Persuader.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "name": "board_game_expert",
3
+ "age": 27,
4
+ "personality": "flexible",
5
+ "memories":[]
6
+ }
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rich
2
+ textual
3
+ typer
4
+ termcolor
5
+ tiktoken
6
+ faiss-cpu
7
+ openai
8
+ pinecone-client
9
+ langchain
10
+ discord.py
11
+ inquirer
12
+ rlcard[torch]
13
+ python-json-logger
14
+ gitpython
15
+ gym
16
+ git+https://github.com/CR-Gjx/gym-coup.git
retriever.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ from langchain import FAISS
4
+ from langchain.retrievers import TimeWeightedVectorStoreRetriever
5
+ from langchain.embeddings.base import Embeddings
6
+ import faiss
7
+ from context import Context
8
+ from model import load_embedding_from_config
9
+ import math
10
+ from langchain.docstore import InMemoryDocstore
11
+
12
+ # reference:
13
+ # https://python.langchain.com/en/latest/use_cases/agent_simulations/characters.html#create-a-generative-character
14
+ def relevance_score_fn(score: float) -> float:
15
+ """Return a similarity score on a scale [0, 1]."""
16
+ # This will differ depending on a few things:
17
+ # - the distance / similarity metric used by the VectorStore
18
+ # - the scale of your embeddings (OpenAI's are unit norm. Many others are not!)
19
+ # This function converts the euclidean norm of normalized embeddings
20
+ # (0 is most similar, sqrt(2) most dissimilar)
21
+ # to a similarity function (0 to 1)
22
+ return 1.0 - score / math.sqrt(2)
23
+
24
+
25
+ # reference:
26
+ # https://python.langchain.com/en/latest/use_cases/agent_simulations/characters.html#create-a-generative-character
27
+ def create_new_memory_retriever(ctx: Context):
28
+ """Create a new vector store retriever unique to the agent."""
29
+ # Define your embedding model
30
+ embeddings_model = load_embedding_from_config(ctx.settings.model.embedding)
31
+ # Initialize the vectorstore as empty
32
+ embedding_size = 1536
33
+ index = faiss.IndexFlatL2(embedding_size)
34
+ vectorstore = FAISS(
35
+ embeddings_model.embed_query,
36
+ index,
37
+ InMemoryDocstore({}),
38
+ {},
39
+ relevance_score_fn=relevance_score_fn,
40
+ )
41
+ return TimeWeightedVectorStoreRetriever(
42
+ vectorstore=vectorstore, other_score_keys=["importance"], k=15
43
+ )
44
+
45
+
46
+ class Retriever(TimeWeightedVectorStoreRetriever):
47
+ embedding_model: Embeddings
48
+
49
+ def faiss_path(self, path) -> str:
50
+ return path + "/faiss"
51
+
52
+ def mem_path(self, path) -> str:
53
+ return path + "/memory.pickle"
54
+
55
+ def try_load_memory(self, path: str) -> bool:
56
+ if not os.path.isdir(path):
57
+ return False
58
+
59
+ faiss_path = self.faiss_path(path)
60
+ faiss: FAISS = self.vectorstore
61
+ faiss.load_local(faiss_path, self.embedding_model)
62
+
63
+ mem_path = self.mem_path(path)
64
+ with open(mem_path, "rb") as mem_file:
65
+ self.memory_stream = pickle.load(mem_file)
66
+
67
+ return True
68
+
69
+ def dump_memory(self, path: str) -> bool:
70
+ faiss: FAISS = self.vectorstore
71
+ faiss.save_local(self.faiss_path(path))
72
+ with open(self.mem_path(path), "wb") as mem_file:
73
+ pickle.dump(self.memory_stream, mem_file)
setting.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Any, Dict, List, Type
3
+
4
+ from pydantic import BaseModel, BaseSettings, Extra
5
+ import os
6
+
7
+ def json_config_settings_source(settings: BaseSettings) -> Dict[str, Any]:
8
+ from util import load_json
9
+
10
+ # Load settings from JSON config file
11
+ config_dir = Path(os.getcwd(), ".suspicionagent")
12
+ config_file = Path(config_dir, "config.json")
13
+ if not config_dir.exists() or not config_file.exists():
14
+ print("[Error] Please config suspicionagent")
15
+ import sys
16
+
17
+ sys.exit(-1)
18
+ return load_json(config_file)
19
+
20
+
21
+ class LLMSettings(BaseModel):
22
+ """
23
+ LLM/ChatModel related settings
24
+ """
25
+
26
+ type: str = "chatopenai"
27
+
28
+ class Config:
29
+ extra = Extra.allow
30
+
31
+
32
+ class EmbeddingSettings(BaseModel):
33
+ """
34
+ Embedding related settings
35
+ """
36
+
37
+ type: str = "openaiembeddings"
38
+
39
+ class Config:
40
+ extra = Extra.allow
41
+
42
+
43
+ class ModelSettings(BaseModel):
44
+ """
45
+ Model related settings
46
+ """
47
+
48
+ type: str = ""
49
+ llm: LLMSettings = LLMSettings()
50
+ embedding: EmbeddingSettings = EmbeddingSettings()
51
+
52
+ class Config:
53
+ extra = Extra.allow
54
+
55
+
56
+ class Settings(BaseSettings):
57
+ """
58
+ Root settings
59
+ """
60
+
61
+ name: str = "default"
62
+ model: ModelSettings = ModelSettings()
63
+
64
+ class Config:
65
+ env_prefix = "skyagi_"
66
+ env_file_encoding = "utf-8"
67
+ extra = Extra.allow
68
+
69
+ @classmethod
70
+ def customise_sources(
71
+ cls,
72
+ init_settings,
73
+ env_settings,
74
+ file_secret_settings,
75
+ ):
76
+ return (
77
+ init_settings,
78
+ #json_config_settings_source,
79
+ env_settings,
80
+ file_secret_settings,
81
+ )
82
+
83
+
84
+ # ---------------------------------------------------------------------------- #
85
+ # Preset configurations #
86
+ # ---------------------------------------------------------------------------- #
87
+ class OpenAIGPT4Settings(ModelSettings):
88
+ # NOTE: GPT4 is in waitlist
89
+ type = "openai-gpt-4-0613"
90
+ llm = LLMSettings(type="chatopenai", model="gpt-4-0613", max_tokens=3000,temperature=0.1, request_timeout=120)
91
+ embedding = EmbeddingSettings(type="openaiembeddings")
92
+
93
+ class OpenAIGPT432kSettings(ModelSettings):
94
+ # NOTE: GPT4 is in waitlist
95
+ type = "openai-gpt-4-32k-0613"
96
+ llm = LLMSettings(type="chatopenai", model="gpt-4-32k-0613", max_tokens=2500)
97
+ embedding = EmbeddingSettings(type="openaiembeddings")
98
+
99
+
100
+ class OpenAIGPT3_5TurboSettings(ModelSettings):
101
+ type = "openai-gpt-3.5-turbo"
102
+ llm = LLMSettings(type="chatopenai", model="gpt-3.5-turbo-16k-0613", max_tokens=2500)
103
+ embedding = EmbeddingSettings(type="openaiembeddings")
104
+
105
+
106
+ class OpenAIGPT3_5TextDavinci003Settings(ModelSettings):
107
+ type = "openai-gpt-3.5-text-davinci-003"
108
+ llm = LLMSettings(type="openai", model_name="text-davinci-003", max_tokens=2500)
109
+ embedding = EmbeddingSettings(type="openaiembeddings")
110
+
111
+ # class Llama2_70b_Settings(ModelSettings):
112
+ # from transformers import LlamaForCausalLM, LlamaTokenizer
113
+ # type = "llama2-70b"
114
+ # tokenizer = LlamaTokenizer.from_pretrained("/groups/gcb50389/pretrained/llama2-HF/Llama-2-70b-hf")
115
+ # llm = LlamaForCausalLM.from_pretrained("/groups/gcb50389/pretrained/llama2-HF/Llama-2-70b-hf")
116
+ # embedding = EmbeddingSettings(type="openaiembeddings")
117
+
118
+
119
+ # ------------------------- Model settings registry ------------------------ #
120
+ model_setting_type_to_cls_dict: Dict[str, Type[ModelSettings]] = {
121
+ "openai-gpt-4-0613": OpenAIGPT4Settings,
122
+ "openai-gpt-4-32k-0613": OpenAIGPT432kSettings,
123
+ "openai-gpt-3.5-turbo": OpenAIGPT3_5TurboSettings,
124
+ "openai-gpt-3.5-text-davinci-003": OpenAIGPT3_5TextDavinci003Settings,
125
+ # "llama2-70b":Llama2_70b_Settings
126
+ }
127
+
128
+
129
+ def load_model_setting(type: str) -> ModelSettings:
130
+ if type not in model_setting_type_to_cls_dict:
131
+ raise ValueError(f"Loading {type} setting not supported")
132
+
133
+ cls = model_setting_type_to_cls_dict[type]
134
+ return cls()
135
+
136
+
137
+ def get_all_model_settings() -> List[str]:
138
+ """Get all supported Embeddings"""
139
+ return list(model_setting_type_to_cls_dict.keys())
util.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Any, Dict
5
+
6
+ from model import load_embedding_from_config, load_llm_from_config
7
+ from setting import Settings
8
+ import logging
9
+ from pythonjsonlogger import jsonlogger
10
+
11
+
12
+
13
+ def verify_openai_token(token: str) -> str:
14
+ import openai
15
+
16
+ openai.api_key = token
17
+ try:
18
+ openai.Completion.create(
19
+ model="text-ada-001",
20
+ prompt="Hello",
21
+ temperature=0,
22
+ max_tokens=10,
23
+ top_p=1,
24
+ frequency_penalty=0.5,
25
+ presence_penalty=0,
26
+ )
27
+ return "OK"
28
+ except Exception as e:
29
+ return str(e)
30
+
31
+ def get_logging(logger_name,content=''):
32
+ logger = logging.getLogger(logger_name)
33
+ if not logger.handlers:
34
+ logger.setLevel(logging.DEBUG)
35
+ logHandlerJson = logging.FileHandler('./memory_data/'+logger_name+'.json')
36
+ formatter = jsonlogger.JsonFormatter()
37
+ logHandlerJson.setFormatter(formatter)
38
+
39
+ # handler = logging.FileHandler('./memory_data/'+logger_name+'.txt')
40
+ # handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
41
+ logger.addHandler(logHandlerJson)
42
+ logger.info(content)
43
+
44
+
45
+ def verify_model_initialization(settings: Settings) -> str:
46
+ try:
47
+ load_llm_from_config(settings.model.llm)
48
+ except Exception as e:
49
+ return f"LLM initialization check failed: {e}"
50
+
51
+ try:
52
+ load_embedding_from_config(settings.model.embedding)
53
+ except Exception as e:
54
+ return f"Embedding initialization check failed: {e}"
55
+
56
+ return "OK"
57
+
58
+
59
+ def verify_pinecone_token(token: str) -> str:
60
+ return "OK"
61
+
62
+
63
+ def verify_discord_token(token: str) -> str:
64
+ return "OK"
65
+
66
+
67
+ def load_json_value(filepath: Path, key: str, default_value: Any) -> Any:
68
+ if not Path(filepath).exists():
69
+ return default_value
70
+ json_obj = load_json(filepath)
71
+ if key not in json_obj:
72
+ return default_value
73
+ return json_obj[key]
74
+
75
+
76
+ def set_json_value(filepath: Path, key: str, value: Any) -> None:
77
+ # key needs to follow python naming convention, such as trial_id
78
+ json_obj = load_json(filepath)
79
+ json_obj[key] = value
80
+ with open(filepath, "w+") as json_file:
81
+ json.dump(json_obj, json_file, sort_keys=True)
82
+ json_file.flush()
83
+
84
+
85
+ def load_json(filepath: Path) -> Dict:
86
+ if not Path(filepath).exists():
87
+ return {}
88
+ with open(filepath, "r") as file:
89
+ try:
90
+ json_obj = json.load(file)
91
+ return json_obj
92
+ except json.JSONDecodeError as e:
93
+ if os.stat(filepath).st_size == 0:
94
+ # Empty file
95
+ return {}
96
+ else:
97
+ raise e
98
+
99
+ def load_log(file_name, key_name):
100
+ content_list = []
101
+ key_list = []
102
+ with open('./memory_data/'+file_name) as f:
103
+ contents = f.readlines()
104
+ for i in contents:
105
+ print(i)
106
+ contents = json.loads(i)
107
+ content_list.append(list(contents.values())[1][key_name])
108
+ key_list.append(list(contents.keys())[1])
109
+ return content_list, key_list
110
+
111
+ def load_log_full(file_name, key_name):
112
+ content_list = []
113
+ key_list = []
114
+ with open(file_name) as f:
115
+ contents = f.readlines()
116
+ for i in contents:
117
+ #print(i)
118
+ contents = json.loads(i)
119
+ if key_name is None:
120
+ content_list.append(list(contents.values())[1])
121
+ else:
122
+ content_list.append(list(contents.values())[1][key_name])
123
+ key_list.append(list(contents.keys())[1])
124
+ return content_list, key_list
125
+
126
+ def get_checkpoint_dir(agent_file: str) -> str:
127
+ return "./{}.cpt".format(os.path.basename(agent_file))