Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

ewanlee commited on Jan 5

Commit

32d7589

•

1 Parent(s): 13ed679

translated MsPacman and MontezumaRevenge in Gym Atari

Browse files

Files changed (8) hide show

.gitignore +2 -1
envs/__init__.py +44 -1
envs/atari/montezumarevenge_policies.py +142 -0
envs/atari/montezumarevenge_translator.py +136 -0
envs/atari/mspacman_policies.py +88 -0
envs/atari/mspacman_translator.py +80 -0
record_reflexion.csv +2 -1
test_atari.sh +5 -1

.gitignore CHANGED Viewed

@@ -185,4 +185,5 @@ main_test*.sh
 main_jarvis.sh
 test*.py
 *.zip
-test_

 main_jarvis.sh
 test*.py
 *.zip
+test_
+*.ipynb

envs/__init__.py CHANGED Viewed

@@ -12,8 +12,10 @@ from .toy_text import taxi_translator, taxi_policies
 from .toy_text import cliffwalking_translator, cliffwalking_policies
 from .toy_text import frozenlake_translator, frozenlake_policies
-from .atari import register_environments
 from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
 register_environments()
 from .mujoco import ant_translator, ant_policies
@@ -96,6 +98,47 @@ REGISTRY["RepresentedPong_basic_policies"] = [
     Pong_policies.dedicated_6_policy,
 ]
 REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
 REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
 REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]

 from .toy_text import cliffwalking_translator, cliffwalking_policies
 from .toy_text import frozenlake_translator, frozenlake_policies
+from .atari import montezumarevenge_policies, register_environments
 from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
+from .atari import mspacman_policies, mspacman_translator
+from .atari import montezumarevenge_translator
 register_environments()
 from .mujoco import ant_translator, ant_policies
     Pong_policies.dedicated_6_policy,
 ]
+REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
+REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
+REGISTRY["RepresentedMsPacman_basic_policies"] = [
+    mspacman_policies.real_random_policy,
+    mspacman_policies.pseudo_random_policy,
+    mspacman_policies.dedicated_1_policy,
+    mspacman_policies.dedicated_2_policy,
+    mspacman_policies.dedicated_3_policy,
+    mspacman_policies.dedicated_4_policy,
+    mspacman_policies.dedicated_5_policy,
+    mspacman_policies.dedicated_6_policy,
+    mspacman_policies.dedicated_7_policy,
+    mspacman_policies.dedicated_8_policy,
+    mspacman_policies.dedicated_9_policy,
+]
+REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
+REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
+REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
+    montezumarevenge_policies.real_random_policy,
+    montezumarevenge_policies.pseudo_random_policy,
+    montezumarevenge_policies.dedicated_1_policy,
+    montezumarevenge_policies.dedicated_2_policy,
+    montezumarevenge_policies.dedicated_3_policy,
+    montezumarevenge_policies.dedicated_4_policy,
+    montezumarevenge_policies.dedicated_5_policy,
+    montezumarevenge_policies.dedicated_6_policy,
+    montezumarevenge_policies.dedicated_7_policy,
+    montezumarevenge_policies.dedicated_8_policy,
+    montezumarevenge_policies.dedicated_9_policy,
+    montezumarevenge_policies.dedicated_10_policy,
+    montezumarevenge_policies.dedicated_11_policy,
+    montezumarevenge_policies.dedicated_12_policy,
+    montezumarevenge_policies.dedicated_13_policy,
+    montezumarevenge_policies.dedicated_14_policy,
+    montezumarevenge_policies.dedicated_15_policy,
+    montezumarevenge_policies.dedicated_16_policy,
+    montezumarevenge_policies.dedicated_17_policy,
+    montezumarevenge_policies.dedicated_18_policy,
+]
 REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
 REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
 REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]

envs/atari/montezumarevenge_policies.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+def dedicated_1_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 1 which does NOOP (no operation)"
+    dedicated_1_policy.description = get_description()
+    return 1
+def dedicated_2_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 2 which hits the enemy"
+    dedicated_1_policy.description = get_description()
+    return 2
+def dedicated_3_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 3 which moves the agent up"
+    dedicated_3_policy.description = get_description()
+    return 3
+def dedicated_4_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 4 which moves the agent right"
+    dedicated_4_policy.description = get_description()
+    return 4
+def dedicated_5_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 5 which moves the agent left"
+    dedicated_5_policy.description = get_description()
+    return 5
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select an action among 1 to 18 alternatively"
+    pseudo_random_policy.description = get_description()
+    return pre_action % 18 + 1
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return np.random.choice(range(0, 18)) + 1
+# Complete set of dedicated action policies
+def dedicated_6_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 6 which moves the agent down"
+    dedicated_6_policy.description = get_description()
+    return 6
+def dedicated_7_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 7 which moves the agent up and to the right"
+    dedicated_7_policy.description = get_description()
+    return 7
+def dedicated_8_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 8 which moves the agent up and to the left"
+    dedicated_8_policy.description = get_description()
+    return 8
+def dedicated_9_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 9 which moves the agent down and to the right"
+    dedicated_9_policy.description = get_description()
+    return 9
+def dedicated_10_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 10 which moves the agent down and to the left"
+    dedicated_10_policy.description = get_description()
+    return 10
+def dedicated_11_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 11 which moves the agent up while hiting the enemy"
+    dedicated_11_policy.description = get_description()
+    return 11
+def dedicated_12_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 12 which moves the agent right while hiting the enemy"
+    dedicated_12_policy.description = get_description()
+    return 12
+def dedicated_13_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 13 which moves the agent left while hiting the enemy"
+    dedicated_13_policy.description = get_description()
+    return 13
+def dedicated_14_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 14 which moves the agent down while hiting the enemy"
+    dedicated_14_policy.description = get_description()
+    return 14
+def dedicated_15_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 15 which moves the agent up and to the right while hiting the enemy"
+    dedicated_15_policy.description = get_description()
+    return 15
+def dedicated_16_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 16 which moves the agent up and to the left while hiting the enemy"
+    dedicated_16_policy.description = get_description()
+    return 16
+def dedicated_17_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 17 which moves the agent down and to the right while hiting the enemy"
+    dedicated_17_policy.description = get_description()
+    return 17
+def dedicated_18_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 18 which moves the agent down and to the left while hiting the enemy"
+    dedicated_18_policy.description = get_description()
+    return 18

envs/atari/montezumarevenge_translator.py ADDED Viewed

	@@ -0,0 +1,136 @@

+class BasicLevelTranslator:
+    def __init__(self):
+        self.player_direction_map = {
+            72: "facing left",
+            40: "facing left, climbing down ladder/rope",
+            24: "facing left, climbing up ladder/rope",
+            128: "facing right",
+            32: "facing right, climbing down ladder/rope",
+            16: "facing right, climbing up ladder/rope",
+        }
+    def translate(self, state):
+        (
+            room_number, player_x, player_y, player_direction, enemy_skull_x, enemy_skull_y,
+            key_monster_x, key_monster_y, level, num_lives, items_in_inventory_count,
+            room_state, score_0, score_1, score_2
+        ) = state
+        player_dir = self.player_direction_map.get(player_direction, "unknown direction")
+        picked_up_items = "None"
+        if items_in_inventory_count > 0:
+            items = [
+                ("Key", "Opens locked doors.", 1),
+                ("Ankh", "Freeze enemies.", 2),
+                ("Gem", "Extra bonus points.", 4),
+                ("Torch", "Lights up dark rooms.", 8),
+                ("Sword", "Vanquishes certain enemies.", 16),
+            ]
+            picked_up_items = ""
+            for name, desc, val in items:
+                if items_in_inventory_count & val == val:
+                    picked_up_items += f"{name} ({desc}), "
+            picked_up_items = picked_up_items[:-2]
+        res = f"""Room Number:                                  {room_number}
+Player Position:                             ({player_x}, {player_y})
+Player Direction:                            {player_dir}
+Enemy Skull Position:                       ({enemy_skull_x}, {enemy_skull_y})
+Key Monster Position:                       ({key_monster_x}, {key_monster_y})
+Level:                                           {level}
+Remaining Lives:                             {num_lives}
+Items in Inventory:                          {picked_up_items if picked_up_items else "None"}
+Room State (Mapped Based on Room Number): {room_state}
+Current Score:                                {score_0}{score_1}{score_2}\n"""
+        return res
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+        }
+        self.reward_desc_dict = {
+        }
+    def describe_goal(self):
+        return ("The goal is to guide PANAMA JOE safely to reach Montezuma's fantastic treasure. "
+                "Avoid danger, collect special tools and rewards, and navigate through the chambers of the emperor's fortress.")
+    def describe_game(self):
+        return ("""In Montezuma's Revenge, you control a fearless adventurer named PANAMA JOE who aims to navigate through a maze
+of death-dealing chambers within Emperor Montezuma's fortress. PANAMA JOE can walk, climb, and jump in the game. In each room of the
+maze, there are several dangers, including various creatures such as skulls, snakes, spiders, and bats, as well as several deadly room
+fixtures like fire pits, conveyor belts, disappearing floors, laser gates, floor spikes, and laser walls.
+PANAMA JOE can act on several elements within the game environment. Some items in the game are:
+1. Keys: Essential to open locked doors, allowing access to other rooms and deeper exploration.
+2. Ankhs: Freeze all Killer Creatures in the room for 6.5 seconds, during which they can't move or kill.
+3. Gems: Extra bonus points when collected.
+4. Torches: Light up dark rooms, making it easier to navigate through threats.
+5. Swords: Used to defeat certain enemies, by contact with the tip of the sword.
+The game's ultimate goal is to reach the fabulous Treasure Room containing Montezuma's treasure while amassing as many points as
+possible and keeping PANAMA JOE alive through the challenges. The game ends when you lose all of your PANAMA JOEs, with a maximum
+of 6 lives.""")
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_action(self):
+        actions = {
+            1:  "No Operation",
+            2:  "Move Right",
+            3:  "Move Left",
+            4:  "Move Down",
+            5:  "Move Up",
+            6:  "Move Right + Climb Down",
+            7:  "Move Left + Climb Down",
+            8:  "Move Right + Climb Up",
+            9:  "Move Left + Climb Up",
+            10:  "Jump",
+            11: "Jump Right",
+            12: "Jump Left",
+            13: "Jump Down",
+            14: "Jump Up",
+            15: "Jump Right + Climb Down",
+            16: "Jump Left + Climb Down",
+            17: "Jump Right + Climb Up",
+            18: "Jump Left + Climb Up",
+        }
+        description = "Your Next Move:\n"
+        for action_number, action_name in actions.items():
+            description += f"{action_number}: {action_name}\n"
+        description += "Please choose an action from the list above."
+        return description
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def __init__(self):
+        super().__init__()
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = f'Take Action: {["No Operation", "Move Right", "Move Left", "Move Down", "Move Up", "Move Right + Climb Down", "Move Left + Climb Down", "Move Right + Climb Up", "Move Left + Climb Up", "Jump","Jump Right", "Jump Left", "Jump Down", "Jump Up", "Jump Right + Climb Down", "Jump Left + Climb Down", "Jump Right + Climb Up", "Jump Left + Climb Up"][info["action"]]} ({info["action"]}).'
+            reward_desc = f"Result: Reward of {info['reward']}"
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(f"{state_desc}\n"
+                                f"{action_desc}\n"
+                                f"{reward_desc}\n"
+                                f"Transit to {next_state_desc}\n")
+        return descriptions

envs/atari/mspacman_policies.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import numpy as np
+def dedicated_1_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 1 which does NOOP (no operation)"
+    dedicated_1_policy.description = get_description()
+    return 1
+def dedicated_2_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 2 which hits the enemy"
+    dedicated_1_policy.description = get_description()
+    return 2
+def dedicated_3_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 3 which moves the agent up"
+    dedicated_3_policy.description = get_description()
+    return 3
+def dedicated_4_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 4 which moves the agent right"
+    dedicated_4_policy.description = get_description()
+    return 4
+def dedicated_5_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 5 which moves the agent left"
+    dedicated_5_policy.description = get_description()
+    return 5
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select an action among 1 to 9 alternatively"
+    pseudo_random_policy.description = get_description()
+    return pre_action % 9 + 1
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return np.random.choice(range(0, 9)) + 1
+# Complete set of dedicated action policies
+def dedicated_6_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 6 which moves the agent down"
+    dedicated_6_policy.description = get_description()
+    return 6
+def dedicated_7_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 7 which moves the agent up and to the right"
+    dedicated_7_policy.description = get_description()
+    return 7
+def dedicated_8_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 8 which moves the agent up and to the left"
+    dedicated_8_policy.description = get_description()
+    return 8
+def dedicated_9_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 9 which moves the agent down and to the right"
+    dedicated_9_policy.description = get_description()
+    return 9

envs/atari/mspacman_translator.py ADDED Viewed

	@@ -0,0 +1,80 @@

+class BasicLevelTranslator:
+    def __init__(self):
+        pass
+    def translate(self, state):
+        x, y = state[8], state[9]
+        ghosts = [(state[0], state[4]), (state[1], state[5]), (state[2], state[6]), (state[3], state[7])]
+        ghost_directions = ["UP", "RIGHT", "LEFT", "DOWN"]
+        direction = ghost_directions[int(state[13])]
+        eaten_dots = state[14]
+        score = state[15]
+        lives = state[16]
+        ghosts_count = state[12]
+        fruit_x, fruit_y = state[10], state[11]
+        fruit_present = fruit_x != 0 or fruit_y != 0
+        player_state = f"Ms. Pac-Man is at position ({x}, {y}), facing {direction} with {lives} lives left. {eaten_dots} dots have been eaten so far and the current score is {score}. The game has {ghosts_count} ghosts."
+        ghost_states = []
+        for i, (gx, gy) in enumerate(ghosts):
+            ghost_name = ["Sue", "Inky", "Pinky", "Blinky"][i]
+            ghost_states.append(f"{ghost_name} the ghost is at position ({gx}, {gy})")
+        ghost_state_str = " ".join(ghost_states)
+        fruit_state = f"A fruit is present at position ({fruit_x}, {fruit_y})" if fruit_present else "No fruit is currently present on the screen."
+        result = f"{player_state} {fruit_state} {ghost_state_str}"
+        return result
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+        }
+        self.reward_desc_dict = {
+        }
+    def describe_goal(self):
+        return "The goal of Ms. Pac-Man is to score as many points as possible while avoiding the ghosts."
+    def describe_game(self):
+        return "In the Ms. Pac-Man game, you control Ms. Pac-Man, moving around a maze, eating dots to score points. "\
+               "There are also special bonus items, such as fruit and pretzels, that appear for a limited time and award "\
+               "extra points. Ghosts chase Ms. Pac-Man around the maze, but she can eat an energy pill to temporarily "\
+               "turn the ghosts vulnerable and eat them for extra points. The game ends when you lose all your lives. "\
+               "Score 10,000 points and earn a bonus life."
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_action(self):
+        return "Your Next Move: \n Please choose an action. Each value corresponds to a directional input as follows: "\
+               "1 - NOOP, 2 - UP, 3 - RIGHT, 4 - LEFT, 5 - DOWN, 6 - UPRIGHT, 7 - UPLEFT, 8 - DOWNRIGHT, 9 - DOWNLEFT. "\
+               "Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6, 7, 8, 9]."
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def __init__(self):
+        super().__init__()
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = f"Take Action: {['NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT'][info['action']]} ({info['action']})."
+            reward_desc = f"Result: Reward of {info['reward']}, "
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
+        return descriptions

record_reflexion.csv CHANGED Viewed

@@ -10,4 +10,5 @@ FrozenLake-v1,1,expert,200.0
 MountainCarContinuous-v0,1,expert,200.0
 RepresentedBoxing-v0,1,expert,200.0
 RepresentedPong-v0,1,expert,200.0

 MountainCarContinuous-v0,1,expert,200.0
 RepresentedBoxing-v0,1,expert,200.0
 RepresentedPong-v0,1,expert,200.0
+RepresentedMsPacman-v0,1,expert,10000.0
+RepresentedMontezumaRevenge-v0,1,expert,10000.0

test_atari.sh CHANGED Viewed

@@ -1,2 +1,6 @@
 python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
-python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0

 python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
+python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
+python main_reflexion.py --env_name RepresentedMsPacman-v0 --init_summarizer RepresentedMsPacman_init_translator --curr_summarizer RepresentedMsPacman_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
+python main_reflexion.py --env_name RepresentedMontezumaRevenge-v0 --init_summarizer RepresentedMontezumaRevenge_init_translator --curr_summarizer RepresentedMontezumaRevenge_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0