Spaces:
Runtime error
Runtime error
ewanlee
commited on
Commit
•
32d7589
1
Parent(s):
13ed679
translated MsPacman and MontezumaRevenge in Gym Atari
Browse files- .gitignore +2 -1
- envs/__init__.py +44 -1
- envs/atari/montezumarevenge_policies.py +142 -0
- envs/atari/montezumarevenge_translator.py +136 -0
- envs/atari/mspacman_policies.py +88 -0
- envs/atari/mspacman_translator.py +80 -0
- record_reflexion.csv +2 -1
- test_atari.sh +5 -1
.gitignore
CHANGED
@@ -185,4 +185,5 @@ main_test*.sh
|
|
185 |
main_jarvis.sh
|
186 |
test*.py
|
187 |
*.zip
|
188 |
-
test_
|
|
|
|
185 |
main_jarvis.sh
|
186 |
test*.py
|
187 |
*.zip
|
188 |
+
test_
|
189 |
+
*.ipynb
|
envs/__init__.py
CHANGED
@@ -12,8 +12,10 @@ from .toy_text import taxi_translator, taxi_policies
|
|
12 |
from .toy_text import cliffwalking_translator, cliffwalking_policies
|
13 |
from .toy_text import frozenlake_translator, frozenlake_policies
|
14 |
|
15 |
-
from .atari import register_environments
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
|
|
|
|
17 |
register_environments()
|
18 |
|
19 |
from .mujoco import ant_translator, ant_policies
|
@@ -96,6 +98,47 @@ REGISTRY["RepresentedPong_basic_policies"] = [
|
|
96 |
Pong_policies.dedicated_6_policy,
|
97 |
]
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
100 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
101 |
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
|
|
12 |
from .toy_text import cliffwalking_translator, cliffwalking_policies
|
13 |
from .toy_text import frozenlake_translator, frozenlake_policies
|
14 |
|
15 |
+
from .atari import montezumarevenge_policies, register_environments
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
17 |
+
from .atari import mspacman_policies, mspacman_translator
|
18 |
+
from .atari import montezumarevenge_translator
|
19 |
register_environments()
|
20 |
|
21 |
from .mujoco import ant_translator, ant_policies
|
|
|
98 |
Pong_policies.dedicated_6_policy,
|
99 |
]
|
100 |
|
101 |
+
REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
|
102 |
+
REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
|
103 |
+
REGISTRY["RepresentedMsPacman_basic_policies"] = [
|
104 |
+
mspacman_policies.real_random_policy,
|
105 |
+
mspacman_policies.pseudo_random_policy,
|
106 |
+
mspacman_policies.dedicated_1_policy,
|
107 |
+
mspacman_policies.dedicated_2_policy,
|
108 |
+
mspacman_policies.dedicated_3_policy,
|
109 |
+
mspacman_policies.dedicated_4_policy,
|
110 |
+
mspacman_policies.dedicated_5_policy,
|
111 |
+
mspacman_policies.dedicated_6_policy,
|
112 |
+
mspacman_policies.dedicated_7_policy,
|
113 |
+
mspacman_policies.dedicated_8_policy,
|
114 |
+
mspacman_policies.dedicated_9_policy,
|
115 |
+
]
|
116 |
+
|
117 |
+
REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
|
118 |
+
REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
|
119 |
+
REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
|
120 |
+
montezumarevenge_policies.real_random_policy,
|
121 |
+
montezumarevenge_policies.pseudo_random_policy,
|
122 |
+
montezumarevenge_policies.dedicated_1_policy,
|
123 |
+
montezumarevenge_policies.dedicated_2_policy,
|
124 |
+
montezumarevenge_policies.dedicated_3_policy,
|
125 |
+
montezumarevenge_policies.dedicated_4_policy,
|
126 |
+
montezumarevenge_policies.dedicated_5_policy,
|
127 |
+
montezumarevenge_policies.dedicated_6_policy,
|
128 |
+
montezumarevenge_policies.dedicated_7_policy,
|
129 |
+
montezumarevenge_policies.dedicated_8_policy,
|
130 |
+
montezumarevenge_policies.dedicated_9_policy,
|
131 |
+
montezumarevenge_policies.dedicated_10_policy,
|
132 |
+
montezumarevenge_policies.dedicated_11_policy,
|
133 |
+
montezumarevenge_policies.dedicated_12_policy,
|
134 |
+
montezumarevenge_policies.dedicated_13_policy,
|
135 |
+
montezumarevenge_policies.dedicated_14_policy,
|
136 |
+
montezumarevenge_policies.dedicated_15_policy,
|
137 |
+
montezumarevenge_policies.dedicated_16_policy,
|
138 |
+
montezumarevenge_policies.dedicated_17_policy,
|
139 |
+
montezumarevenge_policies.dedicated_18_policy,
|
140 |
+
]
|
141 |
+
|
142 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
143 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
144 |
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
envs/atari/montezumarevenge_policies.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def dedicated_1_policy(state, pre_action=1):
|
5 |
+
def get_description():
|
6 |
+
return "Always select action 1 which does NOOP (no operation)"
|
7 |
+
dedicated_1_policy.description = get_description()
|
8 |
+
return 1
|
9 |
+
|
10 |
+
|
11 |
+
def dedicated_2_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Always select action 2 which hits the enemy"
|
14 |
+
dedicated_1_policy.description = get_description()
|
15 |
+
return 2
|
16 |
+
|
17 |
+
|
18 |
+
def dedicated_3_policy(state, pre_action=1):
|
19 |
+
def get_description():
|
20 |
+
return "Always select action 3 which moves the agent up"
|
21 |
+
dedicated_3_policy.description = get_description()
|
22 |
+
return 3
|
23 |
+
|
24 |
+
|
25 |
+
def dedicated_4_policy(state, pre_action=1):
|
26 |
+
def get_description():
|
27 |
+
return "Always select action 4 which moves the agent right"
|
28 |
+
dedicated_4_policy.description = get_description()
|
29 |
+
return 4
|
30 |
+
|
31 |
+
|
32 |
+
def dedicated_5_policy(state, pre_action=1):
|
33 |
+
def get_description():
|
34 |
+
return "Always select action 5 which moves the agent left"
|
35 |
+
dedicated_5_policy.description = get_description()
|
36 |
+
return 5
|
37 |
+
|
38 |
+
|
39 |
+
def pseudo_random_policy(state, pre_action):
|
40 |
+
def get_description():
|
41 |
+
return "Select an action among 1 to 18 alternatively"
|
42 |
+
pseudo_random_policy.description = get_description()
|
43 |
+
return pre_action % 18 + 1
|
44 |
+
|
45 |
+
|
46 |
+
def real_random_policy(state, pre_action=1):
|
47 |
+
def get_description():
|
48 |
+
return "Select action with a random policy"
|
49 |
+
real_random_policy.description = get_description()
|
50 |
+
return np.random.choice(range(0, 18)) + 1
|
51 |
+
|
52 |
+
|
53 |
+
# Complete set of dedicated action policies
|
54 |
+
def dedicated_6_policy(state, pre_action=1):
|
55 |
+
def get_description():
|
56 |
+
return "Always select action 6 which moves the agent down"
|
57 |
+
dedicated_6_policy.description = get_description()
|
58 |
+
return 6
|
59 |
+
|
60 |
+
|
61 |
+
def dedicated_7_policy(state, pre_action=1):
|
62 |
+
def get_description():
|
63 |
+
return "Always select action 7 which moves the agent up and to the right"
|
64 |
+
dedicated_7_policy.description = get_description()
|
65 |
+
return 7
|
66 |
+
|
67 |
+
|
68 |
+
def dedicated_8_policy(state, pre_action=1):
|
69 |
+
def get_description():
|
70 |
+
return "Always select action 8 which moves the agent up and to the left"
|
71 |
+
dedicated_8_policy.description = get_description()
|
72 |
+
return 8
|
73 |
+
|
74 |
+
|
75 |
+
def dedicated_9_policy(state, pre_action=1):
|
76 |
+
def get_description():
|
77 |
+
return "Always select action 9 which moves the agent down and to the right"
|
78 |
+
dedicated_9_policy.description = get_description()
|
79 |
+
return 9
|
80 |
+
|
81 |
+
|
82 |
+
def dedicated_10_policy(state, pre_action=1):
|
83 |
+
def get_description():
|
84 |
+
return "Always select action 10 which moves the agent down and to the left"
|
85 |
+
dedicated_10_policy.description = get_description()
|
86 |
+
return 10
|
87 |
+
|
88 |
+
|
89 |
+
def dedicated_11_policy(state, pre_action=1):
|
90 |
+
def get_description():
|
91 |
+
return "Always select action 11 which moves the agent up while hiting the enemy"
|
92 |
+
dedicated_11_policy.description = get_description()
|
93 |
+
return 11
|
94 |
+
|
95 |
+
|
96 |
+
def dedicated_12_policy(state, pre_action=1):
|
97 |
+
def get_description():
|
98 |
+
return "Always select action 12 which moves the agent right while hiting the enemy"
|
99 |
+
dedicated_12_policy.description = get_description()
|
100 |
+
return 12
|
101 |
+
|
102 |
+
|
103 |
+
def dedicated_13_policy(state, pre_action=1):
|
104 |
+
def get_description():
|
105 |
+
return "Always select action 13 which moves the agent left while hiting the enemy"
|
106 |
+
dedicated_13_policy.description = get_description()
|
107 |
+
return 13
|
108 |
+
|
109 |
+
|
110 |
+
def dedicated_14_policy(state, pre_action=1):
|
111 |
+
def get_description():
|
112 |
+
return "Always select action 14 which moves the agent down while hiting the enemy"
|
113 |
+
dedicated_14_policy.description = get_description()
|
114 |
+
return 14
|
115 |
+
|
116 |
+
|
117 |
+
def dedicated_15_policy(state, pre_action=1):
|
118 |
+
def get_description():
|
119 |
+
return "Always select action 15 which moves the agent up and to the right while hiting the enemy"
|
120 |
+
dedicated_15_policy.description = get_description()
|
121 |
+
return 15
|
122 |
+
|
123 |
+
|
124 |
+
def dedicated_16_policy(state, pre_action=1):
|
125 |
+
def get_description():
|
126 |
+
return "Always select action 16 which moves the agent up and to the left while hiting the enemy"
|
127 |
+
dedicated_16_policy.description = get_description()
|
128 |
+
return 16
|
129 |
+
|
130 |
+
|
131 |
+
def dedicated_17_policy(state, pre_action=1):
|
132 |
+
def get_description():
|
133 |
+
return "Always select action 17 which moves the agent down and to the right while hiting the enemy"
|
134 |
+
dedicated_17_policy.description = get_description()
|
135 |
+
return 17
|
136 |
+
|
137 |
+
|
138 |
+
def dedicated_18_policy(state, pre_action=1):
|
139 |
+
def get_description():
|
140 |
+
return "Always select action 18 which moves the agent down and to the left while hiting the enemy"
|
141 |
+
dedicated_18_policy.description = get_description()
|
142 |
+
return 18
|
envs/atari/montezumarevenge_translator.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BasicLevelTranslator:
|
2 |
+
def __init__(self):
|
3 |
+
self.player_direction_map = {
|
4 |
+
72: "facing left",
|
5 |
+
40: "facing left, climbing down ladder/rope",
|
6 |
+
24: "facing left, climbing up ladder/rope",
|
7 |
+
128: "facing right",
|
8 |
+
32: "facing right, climbing down ladder/rope",
|
9 |
+
16: "facing right, climbing up ladder/rope",
|
10 |
+
}
|
11 |
+
|
12 |
+
def translate(self, state):
|
13 |
+
(
|
14 |
+
room_number, player_x, player_y, player_direction, enemy_skull_x, enemy_skull_y,
|
15 |
+
key_monster_x, key_monster_y, level, num_lives, items_in_inventory_count,
|
16 |
+
room_state, score_0, score_1, score_2
|
17 |
+
) = state
|
18 |
+
|
19 |
+
player_dir = self.player_direction_map.get(player_direction, "unknown direction")
|
20 |
+
picked_up_items = "None"
|
21 |
+
|
22 |
+
if items_in_inventory_count > 0:
|
23 |
+
items = [
|
24 |
+
("Key", "Opens locked doors.", 1),
|
25 |
+
("Ankh", "Freeze enemies.", 2),
|
26 |
+
("Gem", "Extra bonus points.", 4),
|
27 |
+
("Torch", "Lights up dark rooms.", 8),
|
28 |
+
("Sword", "Vanquishes certain enemies.", 16),
|
29 |
+
]
|
30 |
+
|
31 |
+
picked_up_items = ""
|
32 |
+
for name, desc, val in items:
|
33 |
+
if items_in_inventory_count & val == val:
|
34 |
+
picked_up_items += f"{name} ({desc}), "
|
35 |
+
picked_up_items = picked_up_items[:-2]
|
36 |
+
|
37 |
+
res = f"""Room Number: {room_number}
|
38 |
+
Player Position: ({player_x}, {player_y})
|
39 |
+
Player Direction: {player_dir}
|
40 |
+
Enemy Skull Position: ({enemy_skull_x}, {enemy_skull_y})
|
41 |
+
Key Monster Position: ({key_monster_x}, {key_monster_y})
|
42 |
+
Level: {level}
|
43 |
+
Remaining Lives: {num_lives}
|
44 |
+
Items in Inventory: {picked_up_items if picked_up_items else "None"}
|
45 |
+
Room State (Mapped Based on Room Number): {room_state}
|
46 |
+
Current Score: {score_0}{score_1}{score_2}\n"""
|
47 |
+
return res
|
48 |
+
|
49 |
+
|
50 |
+
class GameDescriber:
|
51 |
+
def __init__(self, args):
|
52 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
53 |
+
self.max_episode_len = args.max_episode_len
|
54 |
+
self.action_desc_dict = {
|
55 |
+
}
|
56 |
+
self.reward_desc_dict = {
|
57 |
+
}
|
58 |
+
|
59 |
+
def describe_goal(self):
|
60 |
+
return ("The goal is to guide PANAMA JOE safely to reach Montezuma's fantastic treasure. "
|
61 |
+
"Avoid danger, collect special tools and rewards, and navigate through the chambers of the emperor's fortress.")
|
62 |
+
|
63 |
+
def describe_game(self):
|
64 |
+
return ("""In Montezuma's Revenge, you control a fearless adventurer named PANAMA JOE who aims to navigate through a maze
|
65 |
+
of death-dealing chambers within Emperor Montezuma's fortress. PANAMA JOE can walk, climb, and jump in the game. In each room of the
|
66 |
+
maze, there are several dangers, including various creatures such as skulls, snakes, spiders, and bats, as well as several deadly room
|
67 |
+
fixtures like fire pits, conveyor belts, disappearing floors, laser gates, floor spikes, and laser walls.
|
68 |
+
|
69 |
+
PANAMA JOE can act on several elements within the game environment. Some items in the game are:
|
70 |
+
1. Keys: Essential to open locked doors, allowing access to other rooms and deeper exploration.
|
71 |
+
2. Ankhs: Freeze all Killer Creatures in the room for 6.5 seconds, during which they can't move or kill.
|
72 |
+
3. Gems: Extra bonus points when collected.
|
73 |
+
4. Torches: Light up dark rooms, making it easier to navigate through threats.
|
74 |
+
5. Swords: Used to defeat certain enemies, by contact with the tip of the sword.
|
75 |
+
|
76 |
+
The game's ultimate goal is to reach the fabulous Treasure Room containing Montezuma's treasure while amassing as many points as
|
77 |
+
possible and keeping PANAMA JOE alive through the challenges. The game ends when you lose all of your PANAMA JOEs, with a maximum
|
78 |
+
of 6 lives.""")
|
79 |
+
|
80 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
81 |
+
return ""
|
82 |
+
|
83 |
+
def translate_potential_next_state(self, state, action):
|
84 |
+
return ""
|
85 |
+
|
86 |
+
def describe_action(self):
|
87 |
+
actions = {
|
88 |
+
1: "No Operation",
|
89 |
+
2: "Move Right",
|
90 |
+
3: "Move Left",
|
91 |
+
4: "Move Down",
|
92 |
+
5: "Move Up",
|
93 |
+
6: "Move Right + Climb Down",
|
94 |
+
7: "Move Left + Climb Down",
|
95 |
+
8: "Move Right + Climb Up",
|
96 |
+
9: "Move Left + Climb Up",
|
97 |
+
10: "Jump",
|
98 |
+
11: "Jump Right",
|
99 |
+
12: "Jump Left",
|
100 |
+
13: "Jump Down",
|
101 |
+
14: "Jump Up",
|
102 |
+
15: "Jump Right + Climb Down",
|
103 |
+
16: "Jump Left + Climb Down",
|
104 |
+
17: "Jump Right + Climb Up",
|
105 |
+
18: "Jump Left + Climb Up",
|
106 |
+
}
|
107 |
+
|
108 |
+
description = "Your Next Move:\n"
|
109 |
+
for action_number, action_name in actions.items():
|
110 |
+
description += f"{action_number}: {action_name}\n"
|
111 |
+
|
112 |
+
description += "Please choose an action from the list above."
|
113 |
+
return description
|
114 |
+
|
115 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
116 |
+
def __init__(self):
|
117 |
+
super().__init__()
|
118 |
+
|
119 |
+
def translate(self, infos, is_current=False):
|
120 |
+
descriptions = []
|
121 |
+
if is_current:
|
122 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
123 |
+
return state_desc
|
124 |
+
for i, info in enumerate(infos):
|
125 |
+
assert 'state' in info, "info should contain state information"
|
126 |
+
|
127 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
128 |
+
action_desc = f'Take Action: {["No Operation", "Move Right", "Move Left", "Move Down", "Move Up", "Move Right + Climb Down", "Move Left + Climb Down", "Move Right + Climb Up", "Move Left + Climb Up", "Jump","Jump Right", "Jump Left", "Jump Down", "Jump Up", "Jump Right + Climb Down", "Jump Left + Climb Down", "Jump Right + Climb Up", "Jump Left + Climb Up"][info["action"]]} ({info["action"]}).'
|
129 |
+
reward_desc = f"Result: Reward of {info['reward']}"
|
130 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
131 |
+
descriptions.append(f"{state_desc}\n"
|
132 |
+
f"{action_desc}\n"
|
133 |
+
f"{reward_desc}\n"
|
134 |
+
f"Transit to {next_state_desc}\n")
|
135 |
+
|
136 |
+
return descriptions
|
envs/atari/mspacman_policies.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def dedicated_1_policy(state, pre_action=1):
|
5 |
+
def get_description():
|
6 |
+
return "Always select action 1 which does NOOP (no operation)"
|
7 |
+
|
8 |
+
dedicated_1_policy.description = get_description()
|
9 |
+
return 1
|
10 |
+
|
11 |
+
|
12 |
+
def dedicated_2_policy(state, pre_action=1):
|
13 |
+
def get_description():
|
14 |
+
return "Always select action 2 which hits the enemy"
|
15 |
+
|
16 |
+
dedicated_1_policy.description = get_description()
|
17 |
+
return 2
|
18 |
+
|
19 |
+
|
20 |
+
def dedicated_3_policy(state, pre_action=1):
|
21 |
+
def get_description():
|
22 |
+
return "Always select action 3 which moves the agent up"
|
23 |
+
|
24 |
+
dedicated_3_policy.description = get_description()
|
25 |
+
return 3
|
26 |
+
|
27 |
+
|
28 |
+
def dedicated_4_policy(state, pre_action=1):
|
29 |
+
def get_description():
|
30 |
+
return "Always select action 4 which moves the agent right"
|
31 |
+
|
32 |
+
dedicated_4_policy.description = get_description()
|
33 |
+
return 4
|
34 |
+
|
35 |
+
|
36 |
+
def dedicated_5_policy(state, pre_action=1):
|
37 |
+
def get_description():
|
38 |
+
return "Always select action 5 which moves the agent left"
|
39 |
+
|
40 |
+
dedicated_5_policy.description = get_description()
|
41 |
+
return 5
|
42 |
+
|
43 |
+
|
44 |
+
def pseudo_random_policy(state, pre_action):
|
45 |
+
def get_description():
|
46 |
+
return "Select an action among 1 to 9 alternatively"
|
47 |
+
pseudo_random_policy.description = get_description()
|
48 |
+
return pre_action % 9 + 1
|
49 |
+
|
50 |
+
|
51 |
+
def real_random_policy(state, pre_action=1):
|
52 |
+
def get_description():
|
53 |
+
return "Select action with a random policy"
|
54 |
+
real_random_policy.description = get_description()
|
55 |
+
return np.random.choice(range(0, 9)) + 1
|
56 |
+
|
57 |
+
|
58 |
+
# Complete set of dedicated action policies
|
59 |
+
def dedicated_6_policy(state, pre_action=1):
|
60 |
+
def get_description():
|
61 |
+
return "Always select action 6 which moves the agent down"
|
62 |
+
|
63 |
+
dedicated_6_policy.description = get_description()
|
64 |
+
return 6
|
65 |
+
|
66 |
+
|
67 |
+
def dedicated_7_policy(state, pre_action=1):
|
68 |
+
def get_description():
|
69 |
+
return "Always select action 7 which moves the agent up and to the right"
|
70 |
+
|
71 |
+
dedicated_7_policy.description = get_description()
|
72 |
+
return 7
|
73 |
+
|
74 |
+
|
75 |
+
def dedicated_8_policy(state, pre_action=1):
|
76 |
+
def get_description():
|
77 |
+
return "Always select action 8 which moves the agent up and to the left"
|
78 |
+
|
79 |
+
dedicated_8_policy.description = get_description()
|
80 |
+
return 8
|
81 |
+
|
82 |
+
|
83 |
+
def dedicated_9_policy(state, pre_action=1):
|
84 |
+
def get_description():
|
85 |
+
return "Always select action 9 which moves the agent down and to the right"
|
86 |
+
|
87 |
+
dedicated_9_policy.description = get_description()
|
88 |
+
return 9
|
envs/atari/mspacman_translator.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BasicLevelTranslator:
|
2 |
+
def __init__(self):
|
3 |
+
pass
|
4 |
+
|
5 |
+
def translate(self, state):
|
6 |
+
x, y = state[8], state[9]
|
7 |
+
ghosts = [(state[0], state[4]), (state[1], state[5]), (state[2], state[6]), (state[3], state[7])]
|
8 |
+
ghost_directions = ["UP", "RIGHT", "LEFT", "DOWN"]
|
9 |
+
|
10 |
+
direction = ghost_directions[int(state[13])]
|
11 |
+
eaten_dots = state[14]
|
12 |
+
score = state[15]
|
13 |
+
lives = state[16]
|
14 |
+
ghosts_count = state[12]
|
15 |
+
|
16 |
+
fruit_x, fruit_y = state[10], state[11]
|
17 |
+
fruit_present = fruit_x != 0 or fruit_y != 0
|
18 |
+
|
19 |
+
player_state = f"Ms. Pac-Man is at position ({x}, {y}), facing {direction} with {lives} lives left. {eaten_dots} dots have been eaten so far and the current score is {score}. The game has {ghosts_count} ghosts."
|
20 |
+
|
21 |
+
ghost_states = []
|
22 |
+
for i, (gx, gy) in enumerate(ghosts):
|
23 |
+
ghost_name = ["Sue", "Inky", "Pinky", "Blinky"][i]
|
24 |
+
ghost_states.append(f"{ghost_name} the ghost is at position ({gx}, {gy})")
|
25 |
+
ghost_state_str = " ".join(ghost_states)
|
26 |
+
|
27 |
+
fruit_state = f"A fruit is present at position ({fruit_x}, {fruit_y})" if fruit_present else "No fruit is currently present on the screen."
|
28 |
+
|
29 |
+
result = f"{player_state} {fruit_state} {ghost_state_str}"
|
30 |
+
return result
|
31 |
+
|
32 |
+
|
33 |
+
class GameDescriber:
|
34 |
+
def __init__(self, args):
|
35 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
36 |
+
self.max_episode_len = args.max_episode_len
|
37 |
+
self.action_desc_dict = {
|
38 |
+
}
|
39 |
+
self.reward_desc_dict = {
|
40 |
+
}
|
41 |
+
|
42 |
+
def describe_goal(self):
|
43 |
+
return "The goal of Ms. Pac-Man is to score as many points as possible while avoiding the ghosts."
|
44 |
+
|
45 |
+
def describe_game(self):
|
46 |
+
return "In the Ms. Pac-Man game, you control Ms. Pac-Man, moving around a maze, eating dots to score points. "\
|
47 |
+
"There are also special bonus items, such as fruit and pretzels, that appear for a limited time and award "\
|
48 |
+
"extra points. Ghosts chase Ms. Pac-Man around the maze, but she can eat an energy pill to temporarily "\
|
49 |
+
"turn the ghosts vulnerable and eat them for extra points. The game ends when you lose all your lives. "\
|
50 |
+
"Score 10,000 points and earn a bonus life."
|
51 |
+
|
52 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
53 |
+
return ""
|
54 |
+
|
55 |
+
def translate_potential_next_state(self, state, action):
|
56 |
+
return ""
|
57 |
+
|
58 |
+
def describe_action(self):
|
59 |
+
return "Your Next Move: \n Please choose an action. Each value corresponds to a directional input as follows: "\
|
60 |
+
"1 - NOOP, 2 - UP, 3 - RIGHT, 4 - LEFT, 5 - DOWN, 6 - UPRIGHT, 7 - UPLEFT, 8 - DOWNRIGHT, 9 - DOWNLEFT. "\
|
61 |
+
"Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6, 7, 8, 9]."
|
62 |
+
|
63 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
64 |
+
def __init__(self):
|
65 |
+
super().__init__()
|
66 |
+
|
67 |
+
def translate(self, infos, is_current=False):
|
68 |
+
descriptions = []
|
69 |
+
if is_current:
|
70 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
71 |
+
return state_desc
|
72 |
+
for i, info in enumerate(infos):
|
73 |
+
assert 'state' in info, "info should contain state information"
|
74 |
+
|
75 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
76 |
+
action_desc = f"Take Action: {['NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT'][info['action']]} ({info['action']})."
|
77 |
+
reward_desc = f"Result: Reward of {info['reward']}, "
|
78 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
79 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
|
80 |
+
return descriptions
|
record_reflexion.csv
CHANGED
@@ -10,4 +10,5 @@ FrozenLake-v1,1,expert,200.0
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
-
|
|
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
+
RepresentedMsPacman-v0,1,expert,10000.0
|
14 |
+
RepresentedMontezumaRevenge-v0,1,expert,10000.0
|
test_atari.sh
CHANGED
@@ -1,2 +1,6 @@
|
|
1 |
python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
2 |
-
python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
|
|
|
|
|
|
|
|
|
1 |
python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
2 |
+
python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
3 |
+
|
4 |
+
python main_reflexion.py --env_name RepresentedMsPacman-v0 --init_summarizer RepresentedMsPacman_init_translator --curr_summarizer RepresentedMsPacman_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name RepresentedMontezumaRevenge-v0 --init_summarizer RepresentedMontezumaRevenge_init_translator --curr_summarizer RepresentedMontezumaRevenge_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|