CharlesZhang commited on
Commit
2ec5014
1 Parent(s): 9f88948

add 5 more envs: halfcheetah, hopper, walker2d, invertedPendulum, invertedDoublePendulum.

Browse files
envs/__init__.py CHANGED
@@ -16,7 +16,6 @@ from .atari import register_environments
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
17
  register_environments()
18
 
19
- from .mujoco import ant_translator, ant_policies
20
 
21
  REGISTRY = {}
22
  REGISTRY["sampling_wrapper"] = SettableStateEnv
@@ -96,6 +95,41 @@ REGISTRY["RepresentedPong_basic_policies"] = [
96
  Pong_policies.dedicated_6_policy,
97
  ]
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
100
  REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
101
- REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
 
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
17
  register_environments()
18
 
 
19
 
20
  REGISTRY = {}
21
  REGISTRY["sampling_wrapper"] = SettableStateEnv
 
95
  Pong_policies.dedicated_6_policy,
96
  ]
97
 
98
+ ## For mujoco env
99
+
100
+
101
+ from .mujoco import invertedPendulum_translator, invertedPendulum_policies
102
+ from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
103
+ from .mujoco import hopper_translator, hopper_policies
104
+ from .mujoco import walker2d_translator, walker2d_policies
105
+
106
+ from .mujoco import halfcheetah_translator, halfcheetah_policies
107
+ from .mujoco import ant_translator, ant_policies
108
+
109
+ REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
110
+ REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
111
+ REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
112
+
113
+ REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
114
+ REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
115
+ REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
116
+
117
+
118
+ REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
119
+ REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
120
+ REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
121
+
122
+ REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
123
+ REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
124
+ REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
125
+
126
+
127
+
128
+ REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
129
+ REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
130
+ REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
131
+
132
+
133
  REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
134
  REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
135
+ REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
envs/mujoco/ant_translator.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  class BasicLevelTranslator:
2
  def __init__(self):
3
  pass
 
1
+ '''
2
+ Action Space Box(-1.0, 1.0, (8,), float32)
3
+ Observation Space Box(-inf, inf, (27,), float64)
4
+ '''
5
+
6
  class BasicLevelTranslator:
7
  def __init__(self):
8
  pass
envs/mujoco/halfcheetah_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(6)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(6)]
envs/mujoco/halfcheetah_translator.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BasicLevelTranslator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ def translate(self, state):
6
+ (front_tip_z, front_tip_angle, back_thigh_angle_1, back_shin_angle_1,
7
+ tip_velocity_x, tip_velocity_y, front_tip_angular_velocity,
8
+ back_thigh_angular_velocity_1, front_tip_x, front_tip_y, front_tip_angle_2,
9
+ back_thigh_angle_2, back_shin_angle_2, tip_velocity_angular_x,
10
+ tip_velocity_angular_y, front_tip_angular_velocity_2,
11
+ back_thigh_angular_velocity_2) = state[:17]
12
+
13
+ res = (
14
+ f"The front tip is at a z-coordinate of {front_tip_z:.2f} meters. "
15
+ f"The angle of the front tip is {front_tip_angle:.2f} radians. "
16
+ f"The angles of the back thigh are {back_thigh_angle_1:.2f} and {back_thigh_angle_2:.2f} radians. "
17
+ f"The angles of the back shin are {back_shin_angle_1:.2f} and {back_shin_angle_2:.2f} radians. "
18
+ f"The tip has velocity along the x-axis of {tip_velocity_x:.2f} m/s. "
19
+ f"The tip has velocity along the y-axis of {tip_velocity_y:.2f} m/s. "
20
+ f"The angular velocity of the front tip is {front_tip_angular_velocity:.2f} radians/s. "
21
+ f"The angular velocities of the back thigh are {back_thigh_angular_velocity_1:.2f} and {back_thigh_angular_velocity_2:.2f} radians/s. "
22
+ f"The x-coordinate of the front tip is {front_tip_x:.2f} meters. "
23
+ f"The y-coordinate of the front tip is {front_tip_y:.2f} meters. "
24
+ f"The angle of the front tip is {front_tip_angle_2:.2f} radians. "
25
+ f"The angular velocity of the tip along the x-axis is {tip_velocity_angular_x:.2f} radians/s. "
26
+ f"The angular velocity of the tip along the y-axis is {tip_velocity_angular_y:.2f} radians/s. "
27
+ f"The angular velocity of the back shin is {front_tip_angular_velocity_2:.2f} radians/s."
28
+ )
29
+ return res
30
+
31
+ class GameDescriber:
32
+ def __init__(self, args):
33
+ self.is_only_local_obs = args.is_only_local_obs == 1
34
+ self.max_episode_len = args.max_episode_len
35
+ self.action_desc_dict = {
36
+ }
37
+ self.reward_desc_dict = {
38
+ }
39
+
40
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
41
+ return ""
42
+
43
+ def translate_potential_next_state(self, state, action):
44
+ return ""
45
+
46
+ def describe_goal(self):
47
+ return "The goal is to make the Half-Cheetah run forward (right) as fast as possible."
48
+
49
+ def describe_game(self):
50
+ return (
51
+ "In the Half-Cheetah game, you control a 2-dimensional robot with 9 links and 8 joints. "
52
+ "The goal is to apply torque to the joints to make the cheetah run forward (right) as fast as possible. "
53
+ "You can control the back thigh, back shin, and back foot rotors for the back legs, and the front thigh, "
54
+ "front shin, and front foot rotors for the front legs. The episode ends after 1000 timesteps. "
55
+ "Your reward is based on how much forward progress you make and how much control effort you apply."
56
+ )
57
+
58
+ def describe_action(self):
59
+ return (
60
+ "Your next move: \n"
61
+ "Please select six numerical values, each one within the range of [-1,1], "
62
+ "which represents the torque being applied to the back thigh rotor, "
63
+ "back shin rotor, back foot rotor, front thigh rotor, front shin rotor, "
64
+ "and front foot rotor respectively."
65
+ )
66
+
67
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
68
+ def translate(self, infos, is_current=False):
69
+ descriptions = []
70
+ if is_current:
71
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
72
+ return state_desc
73
+ for i, info in enumerate(infos):
74
+ assert 'state' in info, "info should contain state information"
75
+
76
+ state_desc = BasicLevelTranslator().translate(info['state'])
77
+ action_desc = (
78
+ "Take Action: "
79
+ "Apply Back Thigh Torque: {:.2f}, "
80
+ "Apply Back Shin Torque: {:.2f}, "
81
+ "Apply Back Foot Torque: {:.2f}, "
82
+ "Apply Front Thigh Torque: {:.2f}, "
83
+ "Apply Front Shin Torque: {:.2f}, "
84
+ "Apply Front Foot Torque: {:.2f}"
85
+ ).format(
86
+ info['action'][0], info['action'][1], info['action'][2],
87
+ info['action'][3], info['action'][4], info['action'][5]
88
+ )
89
+
90
+ reward_desc = f"Result: Forward Reward of {info['forward_reward']:.2f}, "
91
+ ctrl_cost_desc = f"Control Cost of {info['ctrl_cost']:.2f}, "
92
+ total_reward_desc = f"Total Reward of {info['reward']:.2f}, "
93
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
94
+ descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} {ctrl_cost_desc} {total_reward_desc} \\n Transit to {next_state_desc}")
95
+ return descriptions
envs/mujoco/hopper_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(3)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(3)]
envs/mujoco/hopper_translator.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Action Space Box(-1.0, 1.0, (3,), float32)
3
+ Observation Space Box(-inf, inf, (11,), float64)
4
+ '''
5
+
6
+ class BasicLevelTranslator:
7
+ def __init__(self):
8
+ pass
9
+
10
+ def translate(self, state):
11
+ (top_z, top_angle, thigh_angle, leg_angle, foot_angle,
12
+ top_x_velocity, top_z_velocity, top_angular_velocity,
13
+ thigh_angular_velocity, leg_angular_velocity, foot_angular_velocity) = state[:11]
14
+
15
+ res = (
16
+ f"The top is at a z-coordinate of {top_z:.2f} meters. "
17
+ f"The angle of the top is {top_angle:.2f} radians. "
18
+ f"The angle of the thigh joint is {thigh_angle:.2f} radians. "
19
+ f"The angle of the leg joint is {leg_angle:.2f} radians. "
20
+ f"The angle of the foot joint is {foot_angle:.2f} radians. "
21
+ f"The x-coordinate velocity of the top is {top_x_velocity:.2f} m/s. "
22
+ f"The z-coordinate (height) velocity of the top is {top_z_velocity:.2f} m/s. "
23
+ f"The angular velocity of the top is {top_angular_velocity:.2f} radians/s. "
24
+ f"The angular velocity of the thigh hinge is {thigh_angular_velocity:.2f} radians/s. "
25
+ f"The angular velocity of the leg hinge is {leg_angular_velocity:.2f} radians/s. "
26
+ f"The angular velocity of the foot hinge is {foot_angular_velocity:.2f} radians/s."
27
+ )
28
+ return res
29
+
30
+ class GameDescriber:
31
+ def __init__(self, args):
32
+ self.is_only_local_obs = args.is_only_local_obs == 1
33
+ self.max_episode_len = args.max_episode_len
34
+ self.action_desc_dict = {}
35
+ self.reward_desc_dict = {}
36
+
37
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
38
+ return ""
39
+
40
+ def translate_potential_next_state(self, state, action):
41
+ return ""
42
+
43
+ def describe_goal(self):
44
+ return (
45
+ "The goal in the Hopper environment is to make the one-legged hopper move forward (right) "
46
+ "by applying torques to the thigh, leg, and foot joints."
47
+ )
48
+
49
+ def describe_game(self):
50
+ return (
51
+ "In the Hopper environment, you control a one-legged hopper consisting of a torso, thigh, leg, "
52
+ "and a foot on which it rests. Your objective is to apply torques to the thigh, leg, and foot joints "
53
+ "to make the hopper perform hops in the positive x-direction. The environment provides observations "
54
+ "of the hopper's body parts and velocities, including the height, angles of joints, and angular velocities. "
55
+ "The episode ends when certain termination conditions are met."
56
+ )
57
+
58
+ def describe_action(self):
59
+ return (
60
+ "Your next move: \n Please provide a list of three numerical values, each within the range of [-1,1], "
61
+ "representing the torques to be applied at the thigh, leg, and foot joints of the hopper."
62
+ )
63
+
64
+
65
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
66
+ def translate(self, infos, is_current=False):
67
+ descriptions = []
68
+ if is_current:
69
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
70
+ return state_desc
71
+ for i, info in enumerate(infos):
72
+ assert 'state' in info, "info should contain state information"
73
+
74
+ state_desc = BasicLevelTranslator().translate(info['state'])
75
+ action_desc = (
76
+ f"Take Action: Apply Thigh Torque: {info['action'][0]:.2f}, "
77
+ f"Leg Torque: {info['action'][1]:.2f}, Foot Torque: {info['action'][2]:.2f}"
78
+ )
79
+
80
+ reward_desc = f"Result: Reward of {info['reward']:.2f}, "
81
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
82
+ descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
83
+ return descriptions
84
+
envs/mujoco/invertedDoublePendulum_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [6 * random.random() - 3 for i in range(1)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [6 * random.random() - 3 for i in range(1)]
envs/mujoco/invertedDoublePendulum_translator.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''InvertedDoublePendulum-v4
2
+ Action Space Box(-1.0, 1.0, (1,), float32)
3
+ Observation Space Box(-inf, inf, (11,), float64)
4
+ '''
5
+
6
+ class BasicLevelTranslator:
7
+ def translate(self, state):
8
+ res = (
9
+ f"Position of the cart: {state[0]:.2f} m\n"
10
+ f"Vertical angle of the pole: {state[1]:.2f} rad\n"
11
+ f"Linear velocity of the cart: {state[2]:.2f} m/s\n"
12
+ f"Angular velocity of the pole: {state[3]:.2f} rad/s"
13
+ )
14
+ return res
15
+
16
+ class GameDescriber:
17
+ def __init__(self, args):
18
+ self.is_only_local_obs = args.is_only_local_obs == 1
19
+ self.max_episode_len = args.max_episode_len
20
+ self.action_desc_dict = {
21
+ 0: "Apply a force in the range [-1, 1] to the cart to control its motion.",
22
+ }
23
+ self.reward_desc_dict = {}
24
+
25
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
26
+ return ""
27
+
28
+ def translate_potential_next_state(self, state, action):
29
+ return ""
30
+
31
+ def describe_goal(self):
32
+ return (
33
+ "The goal in the Inverted Pendulum environment is to balance the pole on top of the cart "\
34
+ "by applying continuous forces to the cart, keeping it upright."
35
+ )
36
+
37
+ def describe_game(self):
38
+ return (
39
+ "In the Inverted Pendulum environment, you control a cart that can move linearly with a pole "\
40
+ "attached to it. Your objective is to balance the pole on top of the cart by applying forces "\
41
+ "to the cart in a way that keeps the pole upright. "\
42
+ "The environment provides observations of the cart's position, pole angle, velocities, "\
43
+ "and angular velocities. The goal is to maintain balance as long as possible."
44
+ )
45
+
46
+ def describe_action(self):
47
+ return (
48
+ "Your next move: \n Please provide a numerical value for the force to be applied to the cart. "\
49
+ "This value should be within the range of [-3, 3], where a positive value indicates applying force "\
50
+ "in the right direction, and a negative value indicates applying force in the left direction."
51
+ )
52
+
53
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
54
+ def translate(self, infos, is_current=False):
55
+ descriptions = []
56
+ if is_current:
57
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
58
+ return state_desc
59
+ for i, info in enumerate(infos):
60
+ assert 'state' in info, "info should contain state information"
61
+ state_desc = BasicLevelTranslator().translate(info['state'])
62
+ action_desc = f"Applied Force on Cart: {info['action'][0]:.2f}"
63
+ reward_desc = f"Result: Reward of {info['reward']:.2f}"
64
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
65
+ descriptions.append(
66
+ f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
67
+ )
68
+ return descriptions
envs/mujoco/invertedPendulum_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(1)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(1)]
envs/mujoco/invertedPendulum_translator.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''InvertedPendulum-v4
2
+ Action Space Box(-3.0, 3.0, (1,), float32)
3
+ Observation Space Box(-inf, inf, (4,), float64)
4
+ '''
5
+
6
+ class BasicLevelTranslator:
7
+ def translate(self, state):
8
+ res = (
9
+ f"Position of the cart: {state[0]:.2f} m\n"
10
+ f"Sine of the angle between cart and first pole: {state[1]:.2f}\n"
11
+ f"Sine of the angle between two poles: {state[2]:.2f}\n"
12
+ f"Cosine of the angle between cart and first pole: {state[3]:.2f}\n"
13
+ f"Cosine of the angle between two poles: {state[4]:.2f}\n"
14
+ f"Velocity of the cart: {state[5]:.2f} m/s\n"
15
+ f"Angular velocity of angle between cart and first pole: {state[6]:.2f} rad/s\n"
16
+ f"Angular velocity of angle between two poles: {state[7]:.2f} rad/s\n"
17
+ f"Constraint Force 1: {state[8]:.2f} N\n"
18
+ f"Constraint Force 2: {state[9]:.2f} N\n"
19
+ f"Constraint Force 3: {state[10]:.2f} N"
20
+ )
21
+ return res
22
+
23
+ class GameDescriber:
24
+ def __init__(self, args):
25
+ self.is_only_local_obs = args.is_only_local_obs == 1
26
+ self.max_episode_len = args.max_episode_len
27
+ self.action_desc_dict = {
28
+ 0: "Apply a force in the range [-3, 3] to the cart to control its motion.",
29
+ }
30
+ self.reward_desc_dict = {}
31
+
32
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
33
+ return ""
34
+
35
+ def translate_potential_next_state(self, state, action):
36
+ return ""
37
+
38
+ def describe_goal(self):
39
+ return (
40
+ "The goal in the InvertedDoublePendulum environment is to balance the two poles "\
41
+ "on top of the cart by applying continuous forces on the cart."
42
+ )
43
+
44
+ def describe_game(self):
45
+ return (
46
+ "In the InvertedDoublePendulum environment, you control a system with a cart and two poles. "\
47
+ "Your objective is to balance the two poles on top of the cart by applying continuous forces "\
48
+ "to the cart. The environment provides observations of the cart's position, angles of the poles, "\
49
+ "and their angular velocities. The episode ends when certain termination conditions are met."
50
+ )
51
+
52
+ def describe_action(self):
53
+ return (
54
+ "Your next move: \n Please provide a numerical value within the range of [-3,3], "\
55
+ "representing the force to be applied to the cart."
56
+ )
57
+
58
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
59
+ def translate(self, infos, is_current=False):
60
+ descriptions = []
61
+ if is_current:
62
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
63
+ return state_desc
64
+ for i, info in enumerate(infos):
65
+ assert 'state' in info, "info should contain state information"
66
+ state_desc = BasicLevelTranslator().translate(info['state'])
67
+ action_desc = f"Applied Force on Cart: {info['action'][0]:.2f}"
68
+ reward_desc = f"Result: Reward of {info['reward']:.2f}"
69
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
70
+ descriptions.append(
71
+ f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
72
+ )
73
+ return descriptions
envs/mujoco/walker2d_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(6)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(6)]
envs/mujoco/walker2d_translator.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ '''Walker2d
3
+ Action Space Box(-1.0, 1.0, (6,), float32)
4
+ Observation Space Box(-inf, inf, (17,), float64)
5
+ '''
6
+ class BasicLevelTranslator:
7
+ def translate(self, state):
8
+ res = (
9
+ f"Z-coordinate of the top (height of walker): {state[0]:.2f} m\n"
10
+ f"Angle of the top: {state[1]:.2f} rad\n"
11
+ f"Angle of the thigh joint: {state[2]:.2f} rad\n"
12
+ f"Angle of the leg joint: {state[3]:.2f} rad\n"
13
+ f"Angle of the foot joint: {state[4]:.2f} rad\n"
14
+ f"Angle of the left thigh joint: {state[5]:.2f} rad\n"
15
+ f"Angle of the left leg joint: {state[6]:.2f} rad\n"
16
+ f"Angle of the left foot joint: {state[7]:.2f} rad\n"
17
+ f"Velocity of the x-coordinate of the top: {state[8]:.2f} m/s\n"
18
+ f"Velocity of the z-coordinate (height) of the top: {state[9]:.2f} m/s\n"
19
+ f"Angular velocity of the angle of the top: {state[10]:.2f} rad/s\n"
20
+ f"Angular velocity of the thigh hinge: {state[11]:.2f} rad/s\n"
21
+ f"Angular velocity of the leg hinge: {state[12]:.2f} rad/s\n"
22
+ f"Angular velocity of the foot hinge: {state[13]:.2f} rad/s\n"
23
+ f"Angular velocity of the thigh hinge (left): {state[14]:.2f} rad/s\n"
24
+ f"Angular velocity of the leg hinge (left): {state[15]:.2f} rad/s\n"
25
+ f"Angular velocity of the foot hinge (left): {state[16]:.2f} rad/s"
26
+ )
27
+ return res
28
+
29
+ class GameDescriber:
30
+ def __init__(self, args):
31
+ self.is_only_local_obs = args.is_only_local_obs == 1
32
+ self.max_episode_len = args.max_episode_len
33
+ self.action_desc_dict = {
34
+ }
35
+ self.reward_desc_dict = {
36
+ }
37
+
38
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
39
+ return ""
40
+
41
+ def translate_potential_next_state(self, state, action):
42
+ return ""
43
+
44
+ def describe_goal(self):
45
+ return (
46
+ "The goal in the Walker2D environment is to coordinate both sets of feet, legs, and thighs "
47
+ "to move in the forward (right) direction by applying torques to the six hinges connecting "
48
+ "the six body parts. The objective is to make the robot walk forward."
49
+ )
50
+
51
+ def describe_game(self):
52
+ return (
53
+ "In the Walker2D environment, you control a two-dimensional two-legged walker with four main body parts. "
54
+ "Your objective is to make the walker move forward by coordinating the torques applied to the six hinges "
55
+ "connecting the body parts. The environment provides observations of the walker's body parts and velocities, "
56
+ "including the torso, leg, and thigh angles, orientations, and velocities. The goal is to make the walker walk "
57
+ "forward in the positive x-direction."
58
+ )
59
+
60
+ def describe_action(self):
61
+ return (
62
+ "Your next move: \nPlease provide a list of six numerical values, each within the range of [-1, 1], "
63
+ "representing the torques to be applied at the six hinge joints of the walker. These torques will help "
64
+ "coordinate the walker's movements and make it walk in the desired direction."
65
+ )
66
+
67
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
68
+ def translate(self, infos, is_current=False):
69
+ descriptions = []
70
+ if is_current:
71
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
72
+ return state_desc
73
+ for i, info in enumerate(infos):
74
+ assert 'state' in info, "info should contain state information"
75
+ state_desc = BasicLevelTranslator().translate(info['state'])
76
+ action_desc = (
77
+ "Torques Applied: "
78
+ f"Thigh: {info['action'][0]:.2f}, Leg: {info['action'][1]:.2f}, Foot: {info['action'][2]:.2f}, "
79
+ f"Left Thigh: {info['action'][3]:.2f}, Left Leg: {info['action'][4]:.2f}, Left Foot: {info['action'][5]:.2f}"
80
+ )
81
+ reward_desc = f"Result: Reward of {info['reward']:.2f}"
82
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
83
+ descriptions.append(
84
+ f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
85
+ )
86
+ return descriptions
main_reflexion.py CHANGED
@@ -292,7 +292,7 @@ if __name__ == "__main__":
292
  parser.add_argument(
293
  "--api_type",
294
  type=str,
295
- default="azure",
296
  choices=["azure", "openai"],
297
  help="choose api type, now support azure and openai"
298
  )
 
292
  parser.add_argument(
293
  "--api_type",
294
  type=str,
295
+ default="openai",
296
  choices=["azure", "openai"],
297
  help="choose api type, now support azure and openai"
298
  )
shell/test_mujoco_ant.sh CHANGED
@@ -1,6 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
1
  # exe
2
  python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
3
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
4
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
5
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
6
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
1
+
2
+ # Ant-v4
3
+
4
+ # REFLEXION
5
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
6
+
7
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
8
+
9
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
10
+
11
+
12
  # exe
13
  python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
14
+
15
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
16
+
17
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_halfcheetah.sh ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # HalfCheetah-v4
3
+ # Naive Actor
4
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
6
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
8
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
+
10
+ # COT
11
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
12
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
13
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
14
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
15
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
16
+
17
+ # self consistency
18
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
19
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
20
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
21
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
22
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
23
+
24
+ # self-ask
25
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
26
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
27
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
28
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
29
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
30
+
31
+ # SPP
32
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
33
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
34
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
35
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
36
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
37
+
38
+ # REFLEXION
39
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
40
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 2 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
41
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
42
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
43
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
44
+
45
+
46
+ # exe
47
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
48
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
49
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
50
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
51
+ python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_hopper.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hopper-v4
2
+
3
+ # REFLEXION
4
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
+
6
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
7
+
8
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
9
+
10
+
11
+ # exe
12
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
13
+
14
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
15
+
16
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_invertedDoublePendulum.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # InvertedDoublePendulum-v4
2
+
3
+ # REFLEXION
4
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
+
6
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
7
+
8
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
9
+
10
+
11
+ # exe
12
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
13
+
14
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
15
+
16
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_invertedPendulum.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # InvertedPendulum-v4
2
+
3
+ # REFLEXION
4
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
+
6
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
7
+
8
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
9
+
10
+
11
+ # exe
12
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
13
+
14
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
15
+
16
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_walker2d.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Walker2d-v4
2
+
3
+ # REFLEXION
4
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
+
6
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
7
+
8
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
9
+
10
+
11
+ # exe
12
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
13
+
14
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
15
+
16
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator