Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

CharlesZhang commited on Jan 7

Commit

2ec5014

•

1 Parent(s): 9f88948

add 5 more envs: halfcheetah, hopper, walker2d, invertedPendulum, invertedDoublePendulum.

Browse files

Files changed (19) hide show

envs/__init__.py +36 -2
envs/mujoco/ant_translator.py +5 -0
envs/mujoco/halfcheetah_policies.py +15 -0
envs/mujoco/halfcheetah_translator.py +95 -0
envs/mujoco/hopper_policies.py +15 -0
envs/mujoco/hopper_translator.py +84 -0
envs/mujoco/invertedDoublePendulum_policies.py +15 -0
envs/mujoco/invertedDoublePendulum_translator.py +68 -0
envs/mujoco/invertedPendulum_policies.py +15 -0
envs/mujoco/invertedPendulum_translator.py +73 -0
envs/mujoco/walker2d_policies.py +15 -0
envs/mujoco/walker2d_translator.py +86 -0
main_reflexion.py +1 -1
shell/test_mujoco_ant.sh +15 -4
shell/test_mujoco_halfcheetah.sh +51 -0
shell/test_mujoco_hopper.sh +16 -0
shell/test_mujoco_invertedDoublePendulum.sh +16 -0
shell/test_mujoco_invertedPendulum.sh +16 -0
shell/test_mujoco_walker2d.sh +16 -0

envs/__init__.py CHANGED Viewed

@@ -16,7 +16,6 @@ from .atari import register_environments
 from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
 register_environments()
-from .mujoco import ant_translator, ant_policies
 REGISTRY = {}
 REGISTRY["sampling_wrapper"] = SettableStateEnv
@@ -96,6 +95,41 @@ REGISTRY["RepresentedPong_basic_policies"] = [
     Pong_policies.dedicated_6_policy,
 ]
 REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
 REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
-REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]

 from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
 register_environments()
 REGISTRY = {}
 REGISTRY["sampling_wrapper"] = SettableStateEnv
     Pong_policies.dedicated_6_policy,
 ]
+## For mujoco env
+from .mujoco import invertedPendulum_translator, invertedPendulum_policies
+from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
+from .mujoco import hopper_translator, hopper_policies
+from .mujoco import walker2d_translator, walker2d_policies
+from .mujoco import halfcheetah_translator, halfcheetah_policies
+from .mujoco import ant_translator, ant_policies
+REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
+REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
+REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
+REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
+REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
+REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
+REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
+REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
+REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
+REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
+REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
+REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
+REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
+REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
+REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
 REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
 REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
+REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]

envs/mujoco/ant_translator.py CHANGED Viewed

@@ -1,3 +1,8 @@
 class BasicLevelTranslator:
     def __init__(self):
         pass

+'''
+Action Space Box(-1.0, 1.0, (8,), float32)
+Observation Space Box(-inf, inf, (27,), float64)
+'''
 class BasicLevelTranslator:
     def __init__(self):
         pass

envs/mujoco/halfcheetah_policies.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import numpy as np
+import random
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select action randomly"
+    pseudo_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(6)]
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(6)]

envs/mujoco/halfcheetah_translator.py ADDED Viewed

	@@ -0,0 +1,95 @@

+class BasicLevelTranslator:
+    def __init__(self):
+        pass
+    def translate(self, state):
+        (front_tip_z, front_tip_angle, back_thigh_angle_1, back_shin_angle_1,
+         tip_velocity_x, tip_velocity_y, front_tip_angular_velocity,
+         back_thigh_angular_velocity_1, front_tip_x, front_tip_y, front_tip_angle_2,
+         back_thigh_angle_2, back_shin_angle_2, tip_velocity_angular_x,
+         tip_velocity_angular_y, front_tip_angular_velocity_2,
+         back_thigh_angular_velocity_2) = state[:17]
+        res = (
+            f"The front tip is at a z-coordinate of {front_tip_z:.2f} meters. "
+            f"The angle of the front tip is {front_tip_angle:.2f} radians. "
+            f"The angles of the back thigh are {back_thigh_angle_1:.2f} and {back_thigh_angle_2:.2f} radians. "
+            f"The angles of the back shin are {back_shin_angle_1:.2f} and {back_shin_angle_2:.2f} radians. "
+            f"The tip has velocity along the x-axis of {tip_velocity_x:.2f} m/s. "
+            f"The tip has velocity along the y-axis of {tip_velocity_y:.2f} m/s. "
+            f"The angular velocity of the front tip is {front_tip_angular_velocity:.2f} radians/s. "
+            f"The angular velocities of the back thigh are {back_thigh_angular_velocity_1:.2f} and {back_thigh_angular_velocity_2:.2f} radians/s. "
+            f"The x-coordinate of the front tip is {front_tip_x:.2f} meters. "
+            f"The y-coordinate of the front tip is {front_tip_y:.2f} meters. "
+            f"The angle of the front tip is {front_tip_angle_2:.2f} radians. "
+            f"The angular velocity of the tip along the x-axis is {tip_velocity_angular_x:.2f} radians/s. "
+            f"The angular velocity of the tip along the y-axis is {tip_velocity_angular_y:.2f} radians/s. "
+            f"The angular velocity of the back shin is {front_tip_angular_velocity_2:.2f} radians/s."
+        )
+        return res
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+        }
+        self.reward_desc_dict = {
+        }
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_goal(self):
+        return "The goal is to make the Half-Cheetah run forward (right) as fast as possible."
+    def describe_game(self):
+        return (
+            "In the Half-Cheetah game, you control a 2-dimensional robot with 9 links and 8 joints. "
+            "The goal is to apply torque to the joints to make the cheetah run forward (right) as fast as possible. "
+            "You can control the back thigh, back shin, and back foot rotors for the back legs, and the front thigh, "
+            "front shin, and front foot rotors for the front legs. The episode ends after 1000 timesteps. "
+            "Your reward is based on how much forward progress you make and how much control effort you apply."
+        )
+    def describe_action(self):
+        return (
+            "Your next move: \n"
+            "Please select six numerical values, each one within the range of [-1,1], "
+            "which represents the torque being applied to the back thigh rotor, "
+            "back shin rotor, back foot rotor, front thigh rotor, front shin rotor, "
+            "and front foot rotor respectively."
+        )
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = (
+                "Take Action: "
+                "Apply Back Thigh Torque: {:.2f}, "
+                "Apply Back Shin Torque: {:.2f}, "
+                "Apply Back Foot Torque: {:.2f}, "
+                "Apply Front Thigh Torque: {:.2f}, "
+                "Apply Front Shin Torque: {:.2f}, "
+                "Apply Front Foot Torque: {:.2f}"
+            ).format(
+                info['action'][0], info['action'][1], info['action'][2],
+                info['action'][3], info['action'][4], info['action'][5]
+            )
+            reward_desc = f"Result: Forward Reward of {info['forward_reward']:.2f}, "
+            ctrl_cost_desc = f"Control Cost of {info['ctrl_cost']:.2f}, "
+            total_reward_desc = f"Total Reward of {info['reward']:.2f}, "
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} {ctrl_cost_desc} {total_reward_desc} \\n Transit to {next_state_desc}")
+        return descriptions

envs/mujoco/hopper_policies.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import numpy as np
+import random
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select action randomly"
+    pseudo_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(3)]
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(3)]

envs/mujoco/hopper_translator.py ADDED Viewed

	@@ -0,0 +1,84 @@

+'''
+Action Space Box(-1.0, 1.0, (3,), float32)
+Observation Space Box(-inf, inf, (11,), float64)
+'''
+class BasicLevelTranslator:
+    def __init__(self):
+        pass
+    def translate(self, state):
+        (top_z, top_angle, thigh_angle, leg_angle, foot_angle,
+         top_x_velocity, top_z_velocity, top_angular_velocity,
+         thigh_angular_velocity, leg_angular_velocity, foot_angular_velocity) = state[:11]
+        res = (
+            f"The top is at a z-coordinate of {top_z:.2f} meters. "
+            f"The angle of the top is {top_angle:.2f} radians. "
+            f"The angle of the thigh joint is {thigh_angle:.2f} radians. "
+            f"The angle of the leg joint is {leg_angle:.2f} radians. "
+            f"The angle of the foot joint is {foot_angle:.2f} radians. "
+            f"The x-coordinate velocity of the top is {top_x_velocity:.2f} m/s. "
+            f"The z-coordinate (height) velocity of the top is {top_z_velocity:.2f} m/s. "
+            f"The angular velocity of the top is {top_angular_velocity:.2f} radians/s. "
+            f"The angular velocity of the thigh hinge is {thigh_angular_velocity:.2f} radians/s. "
+            f"The angular velocity of the leg hinge is {leg_angular_velocity:.2f} radians/s. "
+            f"The angular velocity of the foot hinge is {foot_angular_velocity:.2f} radians/s."
+        )
+        return res
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {}
+        self.reward_desc_dict = {}
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_goal(self):
+        return (
+            "The goal in the Hopper environment is to make the one-legged hopper move forward (right) "
+            "by applying torques to the thigh, leg, and foot joints."
+        )
+    def describe_game(self):
+        return (
+            "In the Hopper environment, you control a one-legged hopper consisting of a torso, thigh, leg, "
+            "and a foot on which it rests. Your objective is to apply torques to the thigh, leg, and foot joints "
+            "to make the hopper perform hops in the positive x-direction. The environment provides observations "
+            "of the hopper's body parts and velocities, including the height, angles of joints, and angular velocities. "
+            "The episode ends when certain termination conditions are met."
+        )
+    def describe_action(self):
+        return (
+            "Your next move: \n Please provide a list of three numerical values, each within the range of [-1,1], "
+            "representing the torques to be applied at the thigh, leg, and foot joints of the hopper."
+        )
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = (
+                f"Take Action: Apply Thigh Torque: {info['action'][0]:.2f}, "
+                f"Leg Torque: {info['action'][1]:.2f}, Foot Torque: {info['action'][2]:.2f}"
+            )
+            reward_desc = f"Result: Reward of {info['reward']:.2f}, "
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
+        return descriptions

envs/mujoco/invertedDoublePendulum_policies.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import numpy as np
+import random
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select action randomly"
+    pseudo_random_policy.description = get_description()
+    return [6 * random.random() - 3 for i in range(1)]
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return [6 * random.random() - 3 for i in range(1)]

envs/mujoco/invertedDoublePendulum_translator.py ADDED Viewed

	@@ -0,0 +1,68 @@

+'''InvertedDoublePendulum-v4
+Action Space Box(-1.0, 1.0, (1,), float32)
+Observation Space Box(-inf, inf, (11,), float64)
+'''
+class BasicLevelTranslator:
+    def translate(self, state):
+        res = (
+            f"Position of the cart: {state[0]:.2f} m\n"
+            f"Vertical angle of the pole: {state[1]:.2f} rad\n"
+            f"Linear velocity of the cart: {state[2]:.2f} m/s\n"
+            f"Angular velocity of the pole: {state[3]:.2f} rad/s"
+        )
+        return res
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+            0: "Apply a force in the range [-1, 1] to the cart to control its motion.",
+        }
+        self.reward_desc_dict = {}
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_goal(self):
+        return (
+            "The goal in the Inverted Pendulum environment is to balance the pole on top of the cart "\
+            "by applying continuous forces to the cart, keeping it upright."
+        )
+    def describe_game(self):
+        return (
+            "In the Inverted Pendulum environment, you control a cart that can move linearly with a pole "\
+            "attached to it. Your objective is to balance the pole on top of the cart by applying forces "\
+            "to the cart in a way that keeps the pole upright. "\
+            "The environment provides observations of the cart's position, pole angle, velocities, "\
+            "and angular velocities. The goal is to maintain balance as long as possible."
+        )
+    def describe_action(self):
+        return (
+            "Your next move: \n Please provide a numerical value for the force to be applied to the cart. "\
+            "This value should be within the range of [-3, 3], where a positive value indicates applying force "\
+            "in the right direction, and a negative value indicates applying force in the left direction."
+        )
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = f"Applied Force on Cart: {info['action'][0]:.2f}"
+            reward_desc = f"Result: Reward of {info['reward']:.2f}"
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(
+                f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
+            )
+        return descriptions

envs/mujoco/invertedPendulum_policies.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import numpy as np
+import random
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select action randomly"
+    pseudo_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(1)]
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(1)]

envs/mujoco/invertedPendulum_translator.py ADDED Viewed

	@@ -0,0 +1,73 @@

+'''InvertedPendulum-v4
+Action Space Box(-3.0, 3.0, (1,), float32)
+Observation Space Box(-inf, inf, (4,), float64)
+'''
+class BasicLevelTranslator:
+    def translate(self, state):
+        res = (
+            f"Position of the cart: {state[0]:.2f} m\n"
+            f"Sine of the angle between cart and first pole: {state[1]:.2f}\n"
+            f"Sine of the angle between two poles: {state[2]:.2f}\n"
+            f"Cosine of the angle between cart and first pole: {state[3]:.2f}\n"
+            f"Cosine of the angle between two poles: {state[4]:.2f}\n"
+            f"Velocity of the cart: {state[5]:.2f} m/s\n"
+            f"Angular velocity of angle between cart and first pole: {state[6]:.2f} rad/s\n"
+            f"Angular velocity of angle between two poles: {state[7]:.2f} rad/s\n"
+            f"Constraint Force 1: {state[8]:.2f} N\n"
+            f"Constraint Force 2: {state[9]:.2f} N\n"
+            f"Constraint Force 3: {state[10]:.2f} N"
+        )
+        return res
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+            0: "Apply a force in the range [-3, 3] to the cart to control its motion.",
+        }
+        self.reward_desc_dict = {}
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_goal(self):
+        return (
+            "The goal in the InvertedDoublePendulum environment is to balance the two poles "\
+            "on top of the cart by applying continuous forces on the cart."
+        )
+    def describe_game(self):
+        return (
+            "In the InvertedDoublePendulum environment, you control a system with a cart and two poles. "\
+            "Your objective is to balance the two poles on top of the cart by applying continuous forces "\
+            "to the cart. The environment provides observations of the cart's position, angles of the poles, "\
+            "and their angular velocities. The episode ends when certain termination conditions are met."
+        )
+    def describe_action(self):
+        return (
+            "Your next move: \n Please provide a numerical value within the range of [-3,3], "\
+            "representing the force to be applied to the cart."
+        )
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = f"Applied Force on Cart: {info['action'][0]:.2f}"
+            reward_desc = f"Result: Reward of {info['reward']:.2f}"
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(
+                f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
+            )
+        return descriptions

envs/mujoco/walker2d_policies.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import numpy as np
+import random
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select action randomly"
+    pseudo_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(6)]
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return [2 * random.random() - 1 for i in range(6)]

envs/mujoco/walker2d_translator.py ADDED Viewed

	@@ -0,0 +1,86 @@

+'''Walker2d
+Action Space Box(-1.0, 1.0, (6,), float32)
+Observation Space Box(-inf, inf, (17,), float64)
+'''
+class BasicLevelTranslator:
+    def translate(self, state):
+        res = (
+            f"Z-coordinate of the top (height of walker): {state[0]:.2f} m\n"
+            f"Angle of the top: {state[1]:.2f} rad\n"
+            f"Angle of the thigh joint: {state[2]:.2f} rad\n"
+            f"Angle of the leg joint: {state[3]:.2f} rad\n"
+            f"Angle of the foot joint: {state[4]:.2f} rad\n"
+            f"Angle of the left thigh joint: {state[5]:.2f} rad\n"
+            f"Angle of the left leg joint: {state[6]:.2f} rad\n"
+            f"Angle of the left foot joint: {state[7]:.2f} rad\n"
+            f"Velocity of the x-coordinate of the top: {state[8]:.2f} m/s\n"
+            f"Velocity of the z-coordinate (height) of the top: {state[9]:.2f} m/s\n"
+            f"Angular velocity of the angle of the top: {state[10]:.2f} rad/s\n"
+            f"Angular velocity of the thigh hinge: {state[11]:.2f} rad/s\n"
+            f"Angular velocity of the leg hinge: {state[12]:.2f} rad/s\n"
+            f"Angular velocity of the foot hinge: {state[13]:.2f} rad/s\n"
+            f"Angular velocity of the thigh hinge (left): {state[14]:.2f} rad/s\n"
+            f"Angular velocity of the leg hinge (left): {state[15]:.2f} rad/s\n"
+            f"Angular velocity of the foot hinge (left): {state[16]:.2f} rad/s"
+        )
+        return res
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+        }
+        self.reward_desc_dict = {
+        }
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_goal(self):
+        return (
+            "The goal in the Walker2D environment is to coordinate both sets of feet, legs, and thighs "
+            "to move in the forward (right) direction by applying torques to the six hinges connecting "
+            "the six body parts. The objective is to make the robot walk forward."
+        )
+    def describe_game(self):
+        return (
+            "In the Walker2D environment, you control a two-dimensional two-legged walker with four main body parts. "
+            "Your objective is to make the walker move forward by coordinating the torques applied to the six hinges "
+            "connecting the body parts. The environment provides observations of the walker's body parts and velocities, "
+            "including the torso, leg, and thigh angles, orientations, and velocities. The goal is to make the walker walk "
+            "forward in the positive x-direction."
+        )
+    def describe_action(self):
+        return (
+            "Your next move: \nPlease provide a list of six numerical values, each within the range of [-1, 1], "
+            "representing the torques to be applied at the six hinge joints of the walker. These torques will help "
+            "coordinate the walker's movements and make it walk in the desired direction."
+        )
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            action_desc = (
+                "Torques Applied: "
+                f"Thigh: {info['action'][0]:.2f}, Leg: {info['action'][1]:.2f}, Foot: {info['action'][2]:.2f}, "
+                f"Left Thigh: {info['action'][3]:.2f}, Left Leg: {info['action'][4]:.2f}, Left Foot: {info['action'][5]:.2f}"
+            )
+            reward_desc = f"Result: Reward of {info['reward']:.2f}"
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(
+                f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
+            )
+        return descriptions

main_reflexion.py CHANGED Viewed

@@ -292,7 +292,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--api_type",
         type=str,
-        default="azure",
         choices=["azure", "openai"],
         help="choose api type, now support azure and openai"
     )

     parser.add_argument(
         "--api_type",
         type=str,
+        default="openai",
         choices=["azure", "openai"],
         help="choose api type, now support azure and openai"
     )

shell/test_mujoco_ant.sh CHANGED Viewed

@@ -1,6 +1,17 @@
 # exe
 python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

+# Ant-v4
+# REFLEXION
+python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
+python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
 # exe
 python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
+python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_mujoco_halfcheetah.sh ADDED Viewed

	@@ -0,0 +1,51 @@

+# HalfCheetah-v4
+# Naive Actor
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
+# COT
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
+# self consistency
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
+# self-ask
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
+# SPP
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
+# REFLEXION
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 2 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_mujoco_hopper.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# Hopper-v4
+# REFLEXION
+python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
+python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
+python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_mujoco_invertedDoublePendulum.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# InvertedDoublePendulum-v4
+# REFLEXION
+python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
+python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
+python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_mujoco_invertedPendulum.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# InvertedPendulum-v4
+# REFLEXION
+python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
+python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
+python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_mujoco_walker2d.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# Walker2d-v4
+# REFLEXION
+python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
+python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
+python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator