ewanlee commited on
Commit
32d7589
1 Parent(s): 13ed679

translated MsPacman and MontezumaRevenge in Gym Atari

Browse files
.gitignore CHANGED
@@ -185,4 +185,5 @@ main_test*.sh
185
  main_jarvis.sh
186
  test*.py
187
  *.zip
188
- test_
 
 
185
  main_jarvis.sh
186
  test*.py
187
  *.zip
188
+ test_
189
+ *.ipynb
envs/__init__.py CHANGED
@@ -12,8 +12,10 @@ from .toy_text import taxi_translator, taxi_policies
12
  from .toy_text import cliffwalking_translator, cliffwalking_policies
13
  from .toy_text import frozenlake_translator, frozenlake_policies
14
 
15
- from .atari import register_environments
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
 
 
17
  register_environments()
18
 
19
  from .mujoco import ant_translator, ant_policies
@@ -96,6 +98,47 @@ REGISTRY["RepresentedPong_basic_policies"] = [
96
  Pong_policies.dedicated_6_policy,
97
  ]
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
100
  REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
101
  REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
 
12
  from .toy_text import cliffwalking_translator, cliffwalking_policies
13
  from .toy_text import frozenlake_translator, frozenlake_policies
14
 
15
+ from .atari import montezumarevenge_policies, register_environments
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
17
+ from .atari import mspacman_policies, mspacman_translator
18
+ from .atari import montezumarevenge_translator
19
  register_environments()
20
 
21
  from .mujoco import ant_translator, ant_policies
 
98
  Pong_policies.dedicated_6_policy,
99
  ]
100
 
101
+ REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
102
+ REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
103
+ REGISTRY["RepresentedMsPacman_basic_policies"] = [
104
+ mspacman_policies.real_random_policy,
105
+ mspacman_policies.pseudo_random_policy,
106
+ mspacman_policies.dedicated_1_policy,
107
+ mspacman_policies.dedicated_2_policy,
108
+ mspacman_policies.dedicated_3_policy,
109
+ mspacman_policies.dedicated_4_policy,
110
+ mspacman_policies.dedicated_5_policy,
111
+ mspacman_policies.dedicated_6_policy,
112
+ mspacman_policies.dedicated_7_policy,
113
+ mspacman_policies.dedicated_8_policy,
114
+ mspacman_policies.dedicated_9_policy,
115
+ ]
116
+
117
+ REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
118
+ REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
119
+ REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
120
+ montezumarevenge_policies.real_random_policy,
121
+ montezumarevenge_policies.pseudo_random_policy,
122
+ montezumarevenge_policies.dedicated_1_policy,
123
+ montezumarevenge_policies.dedicated_2_policy,
124
+ montezumarevenge_policies.dedicated_3_policy,
125
+ montezumarevenge_policies.dedicated_4_policy,
126
+ montezumarevenge_policies.dedicated_5_policy,
127
+ montezumarevenge_policies.dedicated_6_policy,
128
+ montezumarevenge_policies.dedicated_7_policy,
129
+ montezumarevenge_policies.dedicated_8_policy,
130
+ montezumarevenge_policies.dedicated_9_policy,
131
+ montezumarevenge_policies.dedicated_10_policy,
132
+ montezumarevenge_policies.dedicated_11_policy,
133
+ montezumarevenge_policies.dedicated_12_policy,
134
+ montezumarevenge_policies.dedicated_13_policy,
135
+ montezumarevenge_policies.dedicated_14_policy,
136
+ montezumarevenge_policies.dedicated_15_policy,
137
+ montezumarevenge_policies.dedicated_16_policy,
138
+ montezumarevenge_policies.dedicated_17_policy,
139
+ montezumarevenge_policies.dedicated_18_policy,
140
+ ]
141
+
142
  REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
143
  REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
144
  REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
envs/atari/montezumarevenge_policies.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def dedicated_1_policy(state, pre_action=1):
5
+ def get_description():
6
+ return "Always select action 1 which does NOOP (no operation)"
7
+ dedicated_1_policy.description = get_description()
8
+ return 1
9
+
10
+
11
+ def dedicated_2_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Always select action 2 which hits the enemy"
14
+ dedicated_1_policy.description = get_description()
15
+ return 2
16
+
17
+
18
+ def dedicated_3_policy(state, pre_action=1):
19
+ def get_description():
20
+ return "Always select action 3 which moves the agent up"
21
+ dedicated_3_policy.description = get_description()
22
+ return 3
23
+
24
+
25
+ def dedicated_4_policy(state, pre_action=1):
26
+ def get_description():
27
+ return "Always select action 4 which moves the agent right"
28
+ dedicated_4_policy.description = get_description()
29
+ return 4
30
+
31
+
32
+ def dedicated_5_policy(state, pre_action=1):
33
+ def get_description():
34
+ return "Always select action 5 which moves the agent left"
35
+ dedicated_5_policy.description = get_description()
36
+ return 5
37
+
38
+
39
+ def pseudo_random_policy(state, pre_action):
40
+ def get_description():
41
+ return "Select an action among 1 to 18 alternatively"
42
+ pseudo_random_policy.description = get_description()
43
+ return pre_action % 18 + 1
44
+
45
+
46
+ def real_random_policy(state, pre_action=1):
47
+ def get_description():
48
+ return "Select action with a random policy"
49
+ real_random_policy.description = get_description()
50
+ return np.random.choice(range(0, 18)) + 1
51
+
52
+
53
+ # Complete set of dedicated action policies
54
+ def dedicated_6_policy(state, pre_action=1):
55
+ def get_description():
56
+ return "Always select action 6 which moves the agent down"
57
+ dedicated_6_policy.description = get_description()
58
+ return 6
59
+
60
+
61
+ def dedicated_7_policy(state, pre_action=1):
62
+ def get_description():
63
+ return "Always select action 7 which moves the agent up and to the right"
64
+ dedicated_7_policy.description = get_description()
65
+ return 7
66
+
67
+
68
+ def dedicated_8_policy(state, pre_action=1):
69
+ def get_description():
70
+ return "Always select action 8 which moves the agent up and to the left"
71
+ dedicated_8_policy.description = get_description()
72
+ return 8
73
+
74
+
75
+ def dedicated_9_policy(state, pre_action=1):
76
+ def get_description():
77
+ return "Always select action 9 which moves the agent down and to the right"
78
+ dedicated_9_policy.description = get_description()
79
+ return 9
80
+
81
+
82
+ def dedicated_10_policy(state, pre_action=1):
83
+ def get_description():
84
+ return "Always select action 10 which moves the agent down and to the left"
85
+ dedicated_10_policy.description = get_description()
86
+ return 10
87
+
88
+
89
+ def dedicated_11_policy(state, pre_action=1):
90
+ def get_description():
91
+ return "Always select action 11 which moves the agent up while hiting the enemy"
92
+ dedicated_11_policy.description = get_description()
93
+ return 11
94
+
95
+
96
+ def dedicated_12_policy(state, pre_action=1):
97
+ def get_description():
98
+ return "Always select action 12 which moves the agent right while hiting the enemy"
99
+ dedicated_12_policy.description = get_description()
100
+ return 12
101
+
102
+
103
+ def dedicated_13_policy(state, pre_action=1):
104
+ def get_description():
105
+ return "Always select action 13 which moves the agent left while hiting the enemy"
106
+ dedicated_13_policy.description = get_description()
107
+ return 13
108
+
109
+
110
+ def dedicated_14_policy(state, pre_action=1):
111
+ def get_description():
112
+ return "Always select action 14 which moves the agent down while hiting the enemy"
113
+ dedicated_14_policy.description = get_description()
114
+ return 14
115
+
116
+
117
+ def dedicated_15_policy(state, pre_action=1):
118
+ def get_description():
119
+ return "Always select action 15 which moves the agent up and to the right while hiting the enemy"
120
+ dedicated_15_policy.description = get_description()
121
+ return 15
122
+
123
+
124
+ def dedicated_16_policy(state, pre_action=1):
125
+ def get_description():
126
+ return "Always select action 16 which moves the agent up and to the left while hiting the enemy"
127
+ dedicated_16_policy.description = get_description()
128
+ return 16
129
+
130
+
131
+ def dedicated_17_policy(state, pre_action=1):
132
+ def get_description():
133
+ return "Always select action 17 which moves the agent down and to the right while hiting the enemy"
134
+ dedicated_17_policy.description = get_description()
135
+ return 17
136
+
137
+
138
+ def dedicated_18_policy(state, pre_action=1):
139
+ def get_description():
140
+ return "Always select action 18 which moves the agent down and to the left while hiting the enemy"
141
+ dedicated_18_policy.description = get_description()
142
+ return 18
envs/atari/montezumarevenge_translator.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BasicLevelTranslator:
2
+ def __init__(self):
3
+ self.player_direction_map = {
4
+ 72: "facing left",
5
+ 40: "facing left, climbing down ladder/rope",
6
+ 24: "facing left, climbing up ladder/rope",
7
+ 128: "facing right",
8
+ 32: "facing right, climbing down ladder/rope",
9
+ 16: "facing right, climbing up ladder/rope",
10
+ }
11
+
12
+ def translate(self, state):
13
+ (
14
+ room_number, player_x, player_y, player_direction, enemy_skull_x, enemy_skull_y,
15
+ key_monster_x, key_monster_y, level, num_lives, items_in_inventory_count,
16
+ room_state, score_0, score_1, score_2
17
+ ) = state
18
+
19
+ player_dir = self.player_direction_map.get(player_direction, "unknown direction")
20
+ picked_up_items = "None"
21
+
22
+ if items_in_inventory_count > 0:
23
+ items = [
24
+ ("Key", "Opens locked doors.", 1),
25
+ ("Ankh", "Freeze enemies.", 2),
26
+ ("Gem", "Extra bonus points.", 4),
27
+ ("Torch", "Lights up dark rooms.", 8),
28
+ ("Sword", "Vanquishes certain enemies.", 16),
29
+ ]
30
+
31
+ picked_up_items = ""
32
+ for name, desc, val in items:
33
+ if items_in_inventory_count & val == val:
34
+ picked_up_items += f"{name} ({desc}), "
35
+ picked_up_items = picked_up_items[:-2]
36
+
37
+ res = f"""Room Number: {room_number}
38
+ Player Position: ({player_x}, {player_y})
39
+ Player Direction: {player_dir}
40
+ Enemy Skull Position: ({enemy_skull_x}, {enemy_skull_y})
41
+ Key Monster Position: ({key_monster_x}, {key_monster_y})
42
+ Level: {level}
43
+ Remaining Lives: {num_lives}
44
+ Items in Inventory: {picked_up_items if picked_up_items else "None"}
45
+ Room State (Mapped Based on Room Number): {room_state}
46
+ Current Score: {score_0}{score_1}{score_2}\n"""
47
+ return res
48
+
49
+
50
+ class GameDescriber:
51
+ def __init__(self, args):
52
+ self.is_only_local_obs = args.is_only_local_obs == 1
53
+ self.max_episode_len = args.max_episode_len
54
+ self.action_desc_dict = {
55
+ }
56
+ self.reward_desc_dict = {
57
+ }
58
+
59
+ def describe_goal(self):
60
+ return ("The goal is to guide PANAMA JOE safely to reach Montezuma's fantastic treasure. "
61
+ "Avoid danger, collect special tools and rewards, and navigate through the chambers of the emperor's fortress.")
62
+
63
+ def describe_game(self):
64
+ return ("""In Montezuma's Revenge, you control a fearless adventurer named PANAMA JOE who aims to navigate through a maze
65
+ of death-dealing chambers within Emperor Montezuma's fortress. PANAMA JOE can walk, climb, and jump in the game. In each room of the
66
+ maze, there are several dangers, including various creatures such as skulls, snakes, spiders, and bats, as well as several deadly room
67
+ fixtures like fire pits, conveyor belts, disappearing floors, laser gates, floor spikes, and laser walls.
68
+
69
+ PANAMA JOE can act on several elements within the game environment. Some items in the game are:
70
+ 1. Keys: Essential to open locked doors, allowing access to other rooms and deeper exploration.
71
+ 2. Ankhs: Freeze all Killer Creatures in the room for 6.5 seconds, during which they can't move or kill.
72
+ 3. Gems: Extra bonus points when collected.
73
+ 4. Torches: Light up dark rooms, making it easier to navigate through threats.
74
+ 5. Swords: Used to defeat certain enemies, by contact with the tip of the sword.
75
+
76
+ The game's ultimate goal is to reach the fabulous Treasure Room containing Montezuma's treasure while amassing as many points as
77
+ possible and keeping PANAMA JOE alive through the challenges. The game ends when you lose all of your PANAMA JOEs, with a maximum
78
+ of 6 lives.""")
79
+
80
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
81
+ return ""
82
+
83
+ def translate_potential_next_state(self, state, action):
84
+ return ""
85
+
86
+ def describe_action(self):
87
+ actions = {
88
+ 1: "No Operation",
89
+ 2: "Move Right",
90
+ 3: "Move Left",
91
+ 4: "Move Down",
92
+ 5: "Move Up",
93
+ 6: "Move Right + Climb Down",
94
+ 7: "Move Left + Climb Down",
95
+ 8: "Move Right + Climb Up",
96
+ 9: "Move Left + Climb Up",
97
+ 10: "Jump",
98
+ 11: "Jump Right",
99
+ 12: "Jump Left",
100
+ 13: "Jump Down",
101
+ 14: "Jump Up",
102
+ 15: "Jump Right + Climb Down",
103
+ 16: "Jump Left + Climb Down",
104
+ 17: "Jump Right + Climb Up",
105
+ 18: "Jump Left + Climb Up",
106
+ }
107
+
108
+ description = "Your Next Move:\n"
109
+ for action_number, action_name in actions.items():
110
+ description += f"{action_number}: {action_name}\n"
111
+
112
+ description += "Please choose an action from the list above."
113
+ return description
114
+
115
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
116
+ def __init__(self):
117
+ super().__init__()
118
+
119
+ def translate(self, infos, is_current=False):
120
+ descriptions = []
121
+ if is_current:
122
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
123
+ return state_desc
124
+ for i, info in enumerate(infos):
125
+ assert 'state' in info, "info should contain state information"
126
+
127
+ state_desc = BasicLevelTranslator().translate(info['state'])
128
+ action_desc = f'Take Action: {["No Operation", "Move Right", "Move Left", "Move Down", "Move Up", "Move Right + Climb Down", "Move Left + Climb Down", "Move Right + Climb Up", "Move Left + Climb Up", "Jump","Jump Right", "Jump Left", "Jump Down", "Jump Up", "Jump Right + Climb Down", "Jump Left + Climb Down", "Jump Right + Climb Up", "Jump Left + Climb Up"][info["action"]]} ({info["action"]}).'
129
+ reward_desc = f"Result: Reward of {info['reward']}"
130
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
131
+ descriptions.append(f"{state_desc}\n"
132
+ f"{action_desc}\n"
133
+ f"{reward_desc}\n"
134
+ f"Transit to {next_state_desc}\n")
135
+
136
+ return descriptions
envs/atari/mspacman_policies.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def dedicated_1_policy(state, pre_action=1):
5
+ def get_description():
6
+ return "Always select action 1 which does NOOP (no operation)"
7
+
8
+ dedicated_1_policy.description = get_description()
9
+ return 1
10
+
11
+
12
+ def dedicated_2_policy(state, pre_action=1):
13
+ def get_description():
14
+ return "Always select action 2 which hits the enemy"
15
+
16
+ dedicated_1_policy.description = get_description()
17
+ return 2
18
+
19
+
20
+ def dedicated_3_policy(state, pre_action=1):
21
+ def get_description():
22
+ return "Always select action 3 which moves the agent up"
23
+
24
+ dedicated_3_policy.description = get_description()
25
+ return 3
26
+
27
+
28
+ def dedicated_4_policy(state, pre_action=1):
29
+ def get_description():
30
+ return "Always select action 4 which moves the agent right"
31
+
32
+ dedicated_4_policy.description = get_description()
33
+ return 4
34
+
35
+
36
+ def dedicated_5_policy(state, pre_action=1):
37
+ def get_description():
38
+ return "Always select action 5 which moves the agent left"
39
+
40
+ dedicated_5_policy.description = get_description()
41
+ return 5
42
+
43
+
44
+ def pseudo_random_policy(state, pre_action):
45
+ def get_description():
46
+ return "Select an action among 1 to 9 alternatively"
47
+ pseudo_random_policy.description = get_description()
48
+ return pre_action % 9 + 1
49
+
50
+
51
+ def real_random_policy(state, pre_action=1):
52
+ def get_description():
53
+ return "Select action with a random policy"
54
+ real_random_policy.description = get_description()
55
+ return np.random.choice(range(0, 9)) + 1
56
+
57
+
58
+ # Complete set of dedicated action policies
59
+ def dedicated_6_policy(state, pre_action=1):
60
+ def get_description():
61
+ return "Always select action 6 which moves the agent down"
62
+
63
+ dedicated_6_policy.description = get_description()
64
+ return 6
65
+
66
+
67
+ def dedicated_7_policy(state, pre_action=1):
68
+ def get_description():
69
+ return "Always select action 7 which moves the agent up and to the right"
70
+
71
+ dedicated_7_policy.description = get_description()
72
+ return 7
73
+
74
+
75
+ def dedicated_8_policy(state, pre_action=1):
76
+ def get_description():
77
+ return "Always select action 8 which moves the agent up and to the left"
78
+
79
+ dedicated_8_policy.description = get_description()
80
+ return 8
81
+
82
+
83
+ def dedicated_9_policy(state, pre_action=1):
84
+ def get_description():
85
+ return "Always select action 9 which moves the agent down and to the right"
86
+
87
+ dedicated_9_policy.description = get_description()
88
+ return 9
envs/atari/mspacman_translator.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BasicLevelTranslator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ def translate(self, state):
6
+ x, y = state[8], state[9]
7
+ ghosts = [(state[0], state[4]), (state[1], state[5]), (state[2], state[6]), (state[3], state[7])]
8
+ ghost_directions = ["UP", "RIGHT", "LEFT", "DOWN"]
9
+
10
+ direction = ghost_directions[int(state[13])]
11
+ eaten_dots = state[14]
12
+ score = state[15]
13
+ lives = state[16]
14
+ ghosts_count = state[12]
15
+
16
+ fruit_x, fruit_y = state[10], state[11]
17
+ fruit_present = fruit_x != 0 or fruit_y != 0
18
+
19
+ player_state = f"Ms. Pac-Man is at position ({x}, {y}), facing {direction} with {lives} lives left. {eaten_dots} dots have been eaten so far and the current score is {score}. The game has {ghosts_count} ghosts."
20
+
21
+ ghost_states = []
22
+ for i, (gx, gy) in enumerate(ghosts):
23
+ ghost_name = ["Sue", "Inky", "Pinky", "Blinky"][i]
24
+ ghost_states.append(f"{ghost_name} the ghost is at position ({gx}, {gy})")
25
+ ghost_state_str = " ".join(ghost_states)
26
+
27
+ fruit_state = f"A fruit is present at position ({fruit_x}, {fruit_y})" if fruit_present else "No fruit is currently present on the screen."
28
+
29
+ result = f"{player_state} {fruit_state} {ghost_state_str}"
30
+ return result
31
+
32
+
33
+ class GameDescriber:
34
+ def __init__(self, args):
35
+ self.is_only_local_obs = args.is_only_local_obs == 1
36
+ self.max_episode_len = args.max_episode_len
37
+ self.action_desc_dict = {
38
+ }
39
+ self.reward_desc_dict = {
40
+ }
41
+
42
+ def describe_goal(self):
43
+ return "The goal of Ms. Pac-Man is to score as many points as possible while avoiding the ghosts."
44
+
45
+ def describe_game(self):
46
+ return "In the Ms. Pac-Man game, you control Ms. Pac-Man, moving around a maze, eating dots to score points. "\
47
+ "There are also special bonus items, such as fruit and pretzels, that appear for a limited time and award "\
48
+ "extra points. Ghosts chase Ms. Pac-Man around the maze, but she can eat an energy pill to temporarily "\
49
+ "turn the ghosts vulnerable and eat them for extra points. The game ends when you lose all your lives. "\
50
+ "Score 10,000 points and earn a bonus life."
51
+
52
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
53
+ return ""
54
+
55
+ def translate_potential_next_state(self, state, action):
56
+ return ""
57
+
58
+ def describe_action(self):
59
+ return "Your Next Move: \n Please choose an action. Each value corresponds to a directional input as follows: "\
60
+ "1 - NOOP, 2 - UP, 3 - RIGHT, 4 - LEFT, 5 - DOWN, 6 - UPRIGHT, 7 - UPLEFT, 8 - DOWNRIGHT, 9 - DOWNLEFT. "\
61
+ "Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6, 7, 8, 9]."
62
+
63
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
64
+ def __init__(self):
65
+ super().__init__()
66
+
67
+ def translate(self, infos, is_current=False):
68
+ descriptions = []
69
+ if is_current:
70
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
71
+ return state_desc
72
+ for i, info in enumerate(infos):
73
+ assert 'state' in info, "info should contain state information"
74
+
75
+ state_desc = BasicLevelTranslator().translate(info['state'])
76
+ action_desc = f"Take Action: {['NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT'][info['action']]} ({info['action']})."
77
+ reward_desc = f"Result: Reward of {info['reward']}, "
78
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
79
+ descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
80
+ return descriptions
record_reflexion.csv CHANGED
@@ -10,4 +10,5 @@ FrozenLake-v1,1,expert,200.0
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
-
 
 
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
+ RepresentedMsPacman-v0,1,expert,10000.0
14
+ RepresentedMontezumaRevenge-v0,1,expert,10000.0
test_atari.sh CHANGED
@@ -1,2 +1,6 @@
1
  python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
2
- python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
 
 
 
 
 
1
  python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
2
+ python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
3
+
4
+ python main_reflexion.py --env_name RepresentedMsPacman-v0 --init_summarizer RepresentedMsPacman_init_translator --curr_summarizer RepresentedMsPacman_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
5
+
6
+ python main_reflexion.py --env_name RepresentedMontezumaRevenge-v0 --init_summarizer RepresentedMontezumaRevenge_init_translator --curr_summarizer RepresentedMontezumaRevenge_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0