Spaces:
Runtime error
Runtime error
CharlesZhang
commited on
Commit
•
2ec5014
1
Parent(s):
9f88948
add 5 more envs: halfcheetah, hopper, walker2d, invertedPendulum, invertedDoublePendulum.
Browse files- envs/__init__.py +36 -2
- envs/mujoco/ant_translator.py +5 -0
- envs/mujoco/halfcheetah_policies.py +15 -0
- envs/mujoco/halfcheetah_translator.py +95 -0
- envs/mujoco/hopper_policies.py +15 -0
- envs/mujoco/hopper_translator.py +84 -0
- envs/mujoco/invertedDoublePendulum_policies.py +15 -0
- envs/mujoco/invertedDoublePendulum_translator.py +68 -0
- envs/mujoco/invertedPendulum_policies.py +15 -0
- envs/mujoco/invertedPendulum_translator.py +73 -0
- envs/mujoco/walker2d_policies.py +15 -0
- envs/mujoco/walker2d_translator.py +86 -0
- main_reflexion.py +1 -1
- shell/test_mujoco_ant.sh +15 -4
- shell/test_mujoco_halfcheetah.sh +51 -0
- shell/test_mujoco_hopper.sh +16 -0
- shell/test_mujoco_invertedDoublePendulum.sh +16 -0
- shell/test_mujoco_invertedPendulum.sh +16 -0
- shell/test_mujoco_walker2d.sh +16 -0
envs/__init__.py
CHANGED
@@ -16,7 +16,6 @@ from .atari import register_environments
|
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
17 |
register_environments()
|
18 |
|
19 |
-
from .mujoco import ant_translator, ant_policies
|
20 |
|
21 |
REGISTRY = {}
|
22 |
REGISTRY["sampling_wrapper"] = SettableStateEnv
|
@@ -96,6 +95,41 @@ REGISTRY["RepresentedPong_basic_policies"] = [
|
|
96 |
Pong_policies.dedicated_6_policy,
|
97 |
]
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
100 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
101 |
-
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
|
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
17 |
register_environments()
|
18 |
|
|
|
19 |
|
20 |
REGISTRY = {}
|
21 |
REGISTRY["sampling_wrapper"] = SettableStateEnv
|
|
|
95 |
Pong_policies.dedicated_6_policy,
|
96 |
]
|
97 |
|
98 |
+
## For mujoco env
|
99 |
+
|
100 |
+
|
101 |
+
from .mujoco import invertedPendulum_translator, invertedPendulum_policies
|
102 |
+
from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
|
103 |
+
from .mujoco import hopper_translator, hopper_policies
|
104 |
+
from .mujoco import walker2d_translator, walker2d_policies
|
105 |
+
|
106 |
+
from .mujoco import halfcheetah_translator, halfcheetah_policies
|
107 |
+
from .mujoco import ant_translator, ant_policies
|
108 |
+
|
109 |
+
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
110 |
+
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
111 |
+
REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
|
112 |
+
|
113 |
+
REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
|
114 |
+
REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
|
115 |
+
REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
|
116 |
+
|
117 |
+
|
118 |
+
REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
|
119 |
+
REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
|
120 |
+
REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
|
121 |
+
|
122 |
+
REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
|
123 |
+
REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
|
124 |
+
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
|
129 |
+
REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
|
130 |
+
REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
|
131 |
+
|
132 |
+
|
133 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
134 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
135 |
+
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
envs/mujoco/ant_translator.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
class BasicLevelTranslator:
|
2 |
def __init__(self):
|
3 |
pass
|
|
|
1 |
+
'''
|
2 |
+
Action Space Box(-1.0, 1.0, (8,), float32)
|
3 |
+
Observation Space Box(-inf, inf, (27,), float64)
|
4 |
+
'''
|
5 |
+
|
6 |
class BasicLevelTranslator:
|
7 |
def __init__(self):
|
8 |
pass
|
envs/mujoco/halfcheetah_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(6)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(6)]
|
envs/mujoco/halfcheetah_translator.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BasicLevelTranslator:
|
2 |
+
def __init__(self):
|
3 |
+
pass
|
4 |
+
|
5 |
+
def translate(self, state):
|
6 |
+
(front_tip_z, front_tip_angle, back_thigh_angle_1, back_shin_angle_1,
|
7 |
+
tip_velocity_x, tip_velocity_y, front_tip_angular_velocity,
|
8 |
+
back_thigh_angular_velocity_1, front_tip_x, front_tip_y, front_tip_angle_2,
|
9 |
+
back_thigh_angle_2, back_shin_angle_2, tip_velocity_angular_x,
|
10 |
+
tip_velocity_angular_y, front_tip_angular_velocity_2,
|
11 |
+
back_thigh_angular_velocity_2) = state[:17]
|
12 |
+
|
13 |
+
res = (
|
14 |
+
f"The front tip is at a z-coordinate of {front_tip_z:.2f} meters. "
|
15 |
+
f"The angle of the front tip is {front_tip_angle:.2f} radians. "
|
16 |
+
f"The angles of the back thigh are {back_thigh_angle_1:.2f} and {back_thigh_angle_2:.2f} radians. "
|
17 |
+
f"The angles of the back shin are {back_shin_angle_1:.2f} and {back_shin_angle_2:.2f} radians. "
|
18 |
+
f"The tip has velocity along the x-axis of {tip_velocity_x:.2f} m/s. "
|
19 |
+
f"The tip has velocity along the y-axis of {tip_velocity_y:.2f} m/s. "
|
20 |
+
f"The angular velocity of the front tip is {front_tip_angular_velocity:.2f} radians/s. "
|
21 |
+
f"The angular velocities of the back thigh are {back_thigh_angular_velocity_1:.2f} and {back_thigh_angular_velocity_2:.2f} radians/s. "
|
22 |
+
f"The x-coordinate of the front tip is {front_tip_x:.2f} meters. "
|
23 |
+
f"The y-coordinate of the front tip is {front_tip_y:.2f} meters. "
|
24 |
+
f"The angle of the front tip is {front_tip_angle_2:.2f} radians. "
|
25 |
+
f"The angular velocity of the tip along the x-axis is {tip_velocity_angular_x:.2f} radians/s. "
|
26 |
+
f"The angular velocity of the tip along the y-axis is {tip_velocity_angular_y:.2f} radians/s. "
|
27 |
+
f"The angular velocity of the back shin is {front_tip_angular_velocity_2:.2f} radians/s."
|
28 |
+
)
|
29 |
+
return res
|
30 |
+
|
31 |
+
class GameDescriber:
|
32 |
+
def __init__(self, args):
|
33 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
34 |
+
self.max_episode_len = args.max_episode_len
|
35 |
+
self.action_desc_dict = {
|
36 |
+
}
|
37 |
+
self.reward_desc_dict = {
|
38 |
+
}
|
39 |
+
|
40 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
41 |
+
return ""
|
42 |
+
|
43 |
+
def translate_potential_next_state(self, state, action):
|
44 |
+
return ""
|
45 |
+
|
46 |
+
def describe_goal(self):
|
47 |
+
return "The goal is to make the Half-Cheetah run forward (right) as fast as possible."
|
48 |
+
|
49 |
+
def describe_game(self):
|
50 |
+
return (
|
51 |
+
"In the Half-Cheetah game, you control a 2-dimensional robot with 9 links and 8 joints. "
|
52 |
+
"The goal is to apply torque to the joints to make the cheetah run forward (right) as fast as possible. "
|
53 |
+
"You can control the back thigh, back shin, and back foot rotors for the back legs, and the front thigh, "
|
54 |
+
"front shin, and front foot rotors for the front legs. The episode ends after 1000 timesteps. "
|
55 |
+
"Your reward is based on how much forward progress you make and how much control effort you apply."
|
56 |
+
)
|
57 |
+
|
58 |
+
def describe_action(self):
|
59 |
+
return (
|
60 |
+
"Your next move: \n"
|
61 |
+
"Please select six numerical values, each one within the range of [-1,1], "
|
62 |
+
"which represents the torque being applied to the back thigh rotor, "
|
63 |
+
"back shin rotor, back foot rotor, front thigh rotor, front shin rotor, "
|
64 |
+
"and front foot rotor respectively."
|
65 |
+
)
|
66 |
+
|
67 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
68 |
+
def translate(self, infos, is_current=False):
|
69 |
+
descriptions = []
|
70 |
+
if is_current:
|
71 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
72 |
+
return state_desc
|
73 |
+
for i, info in enumerate(infos):
|
74 |
+
assert 'state' in info, "info should contain state information"
|
75 |
+
|
76 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
77 |
+
action_desc = (
|
78 |
+
"Take Action: "
|
79 |
+
"Apply Back Thigh Torque: {:.2f}, "
|
80 |
+
"Apply Back Shin Torque: {:.2f}, "
|
81 |
+
"Apply Back Foot Torque: {:.2f}, "
|
82 |
+
"Apply Front Thigh Torque: {:.2f}, "
|
83 |
+
"Apply Front Shin Torque: {:.2f}, "
|
84 |
+
"Apply Front Foot Torque: {:.2f}"
|
85 |
+
).format(
|
86 |
+
info['action'][0], info['action'][1], info['action'][2],
|
87 |
+
info['action'][3], info['action'][4], info['action'][5]
|
88 |
+
)
|
89 |
+
|
90 |
+
reward_desc = f"Result: Forward Reward of {info['forward_reward']:.2f}, "
|
91 |
+
ctrl_cost_desc = f"Control Cost of {info['ctrl_cost']:.2f}, "
|
92 |
+
total_reward_desc = f"Total Reward of {info['reward']:.2f}, "
|
93 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
94 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} {ctrl_cost_desc} {total_reward_desc} \\n Transit to {next_state_desc}")
|
95 |
+
return descriptions
|
envs/mujoco/hopper_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(3)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(3)]
|
envs/mujoco/hopper_translator.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Action Space Box(-1.0, 1.0, (3,), float32)
|
3 |
+
Observation Space Box(-inf, inf, (11,), float64)
|
4 |
+
'''
|
5 |
+
|
6 |
+
class BasicLevelTranslator:
|
7 |
+
def __init__(self):
|
8 |
+
pass
|
9 |
+
|
10 |
+
def translate(self, state):
|
11 |
+
(top_z, top_angle, thigh_angle, leg_angle, foot_angle,
|
12 |
+
top_x_velocity, top_z_velocity, top_angular_velocity,
|
13 |
+
thigh_angular_velocity, leg_angular_velocity, foot_angular_velocity) = state[:11]
|
14 |
+
|
15 |
+
res = (
|
16 |
+
f"The top is at a z-coordinate of {top_z:.2f} meters. "
|
17 |
+
f"The angle of the top is {top_angle:.2f} radians. "
|
18 |
+
f"The angle of the thigh joint is {thigh_angle:.2f} radians. "
|
19 |
+
f"The angle of the leg joint is {leg_angle:.2f} radians. "
|
20 |
+
f"The angle of the foot joint is {foot_angle:.2f} radians. "
|
21 |
+
f"The x-coordinate velocity of the top is {top_x_velocity:.2f} m/s. "
|
22 |
+
f"The z-coordinate (height) velocity of the top is {top_z_velocity:.2f} m/s. "
|
23 |
+
f"The angular velocity of the top is {top_angular_velocity:.2f} radians/s. "
|
24 |
+
f"The angular velocity of the thigh hinge is {thigh_angular_velocity:.2f} radians/s. "
|
25 |
+
f"The angular velocity of the leg hinge is {leg_angular_velocity:.2f} radians/s. "
|
26 |
+
f"The angular velocity of the foot hinge is {foot_angular_velocity:.2f} radians/s."
|
27 |
+
)
|
28 |
+
return res
|
29 |
+
|
30 |
+
class GameDescriber:
|
31 |
+
def __init__(self, args):
|
32 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
33 |
+
self.max_episode_len = args.max_episode_len
|
34 |
+
self.action_desc_dict = {}
|
35 |
+
self.reward_desc_dict = {}
|
36 |
+
|
37 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
38 |
+
return ""
|
39 |
+
|
40 |
+
def translate_potential_next_state(self, state, action):
|
41 |
+
return ""
|
42 |
+
|
43 |
+
def describe_goal(self):
|
44 |
+
return (
|
45 |
+
"The goal in the Hopper environment is to make the one-legged hopper move forward (right) "
|
46 |
+
"by applying torques to the thigh, leg, and foot joints."
|
47 |
+
)
|
48 |
+
|
49 |
+
def describe_game(self):
|
50 |
+
return (
|
51 |
+
"In the Hopper environment, you control a one-legged hopper consisting of a torso, thigh, leg, "
|
52 |
+
"and a foot on which it rests. Your objective is to apply torques to the thigh, leg, and foot joints "
|
53 |
+
"to make the hopper perform hops in the positive x-direction. The environment provides observations "
|
54 |
+
"of the hopper's body parts and velocities, including the height, angles of joints, and angular velocities. "
|
55 |
+
"The episode ends when certain termination conditions are met."
|
56 |
+
)
|
57 |
+
|
58 |
+
def describe_action(self):
|
59 |
+
return (
|
60 |
+
"Your next move: \n Please provide a list of three numerical values, each within the range of [-1,1], "
|
61 |
+
"representing the torques to be applied at the thigh, leg, and foot joints of the hopper."
|
62 |
+
)
|
63 |
+
|
64 |
+
|
65 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
66 |
+
def translate(self, infos, is_current=False):
|
67 |
+
descriptions = []
|
68 |
+
if is_current:
|
69 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
70 |
+
return state_desc
|
71 |
+
for i, info in enumerate(infos):
|
72 |
+
assert 'state' in info, "info should contain state information"
|
73 |
+
|
74 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
75 |
+
action_desc = (
|
76 |
+
f"Take Action: Apply Thigh Torque: {info['action'][0]:.2f}, "
|
77 |
+
f"Leg Torque: {info['action'][1]:.2f}, Foot Torque: {info['action'][2]:.2f}"
|
78 |
+
)
|
79 |
+
|
80 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}, "
|
81 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
82 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
|
83 |
+
return descriptions
|
84 |
+
|
envs/mujoco/invertedDoublePendulum_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [6 * random.random() - 3 for i in range(1)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [6 * random.random() - 3 for i in range(1)]
|
envs/mujoco/invertedDoublePendulum_translator.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''InvertedDoublePendulum-v4
|
2 |
+
Action Space Box(-1.0, 1.0, (1,), float32)
|
3 |
+
Observation Space Box(-inf, inf, (11,), float64)
|
4 |
+
'''
|
5 |
+
|
6 |
+
class BasicLevelTranslator:
|
7 |
+
def translate(self, state):
|
8 |
+
res = (
|
9 |
+
f"Position of the cart: {state[0]:.2f} m\n"
|
10 |
+
f"Vertical angle of the pole: {state[1]:.2f} rad\n"
|
11 |
+
f"Linear velocity of the cart: {state[2]:.2f} m/s\n"
|
12 |
+
f"Angular velocity of the pole: {state[3]:.2f} rad/s"
|
13 |
+
)
|
14 |
+
return res
|
15 |
+
|
16 |
+
class GameDescriber:
|
17 |
+
def __init__(self, args):
|
18 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
19 |
+
self.max_episode_len = args.max_episode_len
|
20 |
+
self.action_desc_dict = {
|
21 |
+
0: "Apply a force in the range [-1, 1] to the cart to control its motion.",
|
22 |
+
}
|
23 |
+
self.reward_desc_dict = {}
|
24 |
+
|
25 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
26 |
+
return ""
|
27 |
+
|
28 |
+
def translate_potential_next_state(self, state, action):
|
29 |
+
return ""
|
30 |
+
|
31 |
+
def describe_goal(self):
|
32 |
+
return (
|
33 |
+
"The goal in the Inverted Pendulum environment is to balance the pole on top of the cart "\
|
34 |
+
"by applying continuous forces to the cart, keeping it upright."
|
35 |
+
)
|
36 |
+
|
37 |
+
def describe_game(self):
|
38 |
+
return (
|
39 |
+
"In the Inverted Pendulum environment, you control a cart that can move linearly with a pole "\
|
40 |
+
"attached to it. Your objective is to balance the pole on top of the cart by applying forces "\
|
41 |
+
"to the cart in a way that keeps the pole upright. "\
|
42 |
+
"The environment provides observations of the cart's position, pole angle, velocities, "\
|
43 |
+
"and angular velocities. The goal is to maintain balance as long as possible."
|
44 |
+
)
|
45 |
+
|
46 |
+
def describe_action(self):
|
47 |
+
return (
|
48 |
+
"Your next move: \n Please provide a numerical value for the force to be applied to the cart. "\
|
49 |
+
"This value should be within the range of [-3, 3], where a positive value indicates applying force "\
|
50 |
+
"in the right direction, and a negative value indicates applying force in the left direction."
|
51 |
+
)
|
52 |
+
|
53 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
54 |
+
def translate(self, infos, is_current=False):
|
55 |
+
descriptions = []
|
56 |
+
if is_current:
|
57 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
58 |
+
return state_desc
|
59 |
+
for i, info in enumerate(infos):
|
60 |
+
assert 'state' in info, "info should contain state information"
|
61 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
62 |
+
action_desc = f"Applied Force on Cart: {info['action'][0]:.2f}"
|
63 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
64 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
65 |
+
descriptions.append(
|
66 |
+
f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
|
67 |
+
)
|
68 |
+
return descriptions
|
envs/mujoco/invertedPendulum_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(1)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(1)]
|
envs/mujoco/invertedPendulum_translator.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''InvertedPendulum-v4
|
2 |
+
Action Space Box(-3.0, 3.0, (1,), float32)
|
3 |
+
Observation Space Box(-inf, inf, (4,), float64)
|
4 |
+
'''
|
5 |
+
|
6 |
+
class BasicLevelTranslator:
|
7 |
+
def translate(self, state):
|
8 |
+
res = (
|
9 |
+
f"Position of the cart: {state[0]:.2f} m\n"
|
10 |
+
f"Sine of the angle between cart and first pole: {state[1]:.2f}\n"
|
11 |
+
f"Sine of the angle between two poles: {state[2]:.2f}\n"
|
12 |
+
f"Cosine of the angle between cart and first pole: {state[3]:.2f}\n"
|
13 |
+
f"Cosine of the angle between two poles: {state[4]:.2f}\n"
|
14 |
+
f"Velocity of the cart: {state[5]:.2f} m/s\n"
|
15 |
+
f"Angular velocity of angle between cart and first pole: {state[6]:.2f} rad/s\n"
|
16 |
+
f"Angular velocity of angle between two poles: {state[7]:.2f} rad/s\n"
|
17 |
+
f"Constraint Force 1: {state[8]:.2f} N\n"
|
18 |
+
f"Constraint Force 2: {state[9]:.2f} N\n"
|
19 |
+
f"Constraint Force 3: {state[10]:.2f} N"
|
20 |
+
)
|
21 |
+
return res
|
22 |
+
|
23 |
+
class GameDescriber:
|
24 |
+
def __init__(self, args):
|
25 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
26 |
+
self.max_episode_len = args.max_episode_len
|
27 |
+
self.action_desc_dict = {
|
28 |
+
0: "Apply a force in the range [-3, 3] to the cart to control its motion.",
|
29 |
+
}
|
30 |
+
self.reward_desc_dict = {}
|
31 |
+
|
32 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
33 |
+
return ""
|
34 |
+
|
35 |
+
def translate_potential_next_state(self, state, action):
|
36 |
+
return ""
|
37 |
+
|
38 |
+
def describe_goal(self):
|
39 |
+
return (
|
40 |
+
"The goal in the InvertedDoublePendulum environment is to balance the two poles "\
|
41 |
+
"on top of the cart by applying continuous forces on the cart."
|
42 |
+
)
|
43 |
+
|
44 |
+
def describe_game(self):
|
45 |
+
return (
|
46 |
+
"In the InvertedDoublePendulum environment, you control a system with a cart and two poles. "\
|
47 |
+
"Your objective is to balance the two poles on top of the cart by applying continuous forces "\
|
48 |
+
"to the cart. The environment provides observations of the cart's position, angles of the poles, "\
|
49 |
+
"and their angular velocities. The episode ends when certain termination conditions are met."
|
50 |
+
)
|
51 |
+
|
52 |
+
def describe_action(self):
|
53 |
+
return (
|
54 |
+
"Your next move: \n Please provide a numerical value within the range of [-3,3], "\
|
55 |
+
"representing the force to be applied to the cart."
|
56 |
+
)
|
57 |
+
|
58 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
59 |
+
def translate(self, infos, is_current=False):
|
60 |
+
descriptions = []
|
61 |
+
if is_current:
|
62 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
63 |
+
return state_desc
|
64 |
+
for i, info in enumerate(infos):
|
65 |
+
assert 'state' in info, "info should contain state information"
|
66 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
67 |
+
action_desc = f"Applied Force on Cart: {info['action'][0]:.2f}"
|
68 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
69 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
70 |
+
descriptions.append(
|
71 |
+
f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
|
72 |
+
)
|
73 |
+
return descriptions
|
envs/mujoco/walker2d_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(6)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(6)]
|
envs/mujoco/walker2d_translator.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
'''Walker2d
|
3 |
+
Action Space Box(-1.0, 1.0, (6,), float32)
|
4 |
+
Observation Space Box(-inf, inf, (17,), float64)
|
5 |
+
'''
|
6 |
+
class BasicLevelTranslator:
|
7 |
+
def translate(self, state):
|
8 |
+
res = (
|
9 |
+
f"Z-coordinate of the top (height of walker): {state[0]:.2f} m\n"
|
10 |
+
f"Angle of the top: {state[1]:.2f} rad\n"
|
11 |
+
f"Angle of the thigh joint: {state[2]:.2f} rad\n"
|
12 |
+
f"Angle of the leg joint: {state[3]:.2f} rad\n"
|
13 |
+
f"Angle of the foot joint: {state[4]:.2f} rad\n"
|
14 |
+
f"Angle of the left thigh joint: {state[5]:.2f} rad\n"
|
15 |
+
f"Angle of the left leg joint: {state[6]:.2f} rad\n"
|
16 |
+
f"Angle of the left foot joint: {state[7]:.2f} rad\n"
|
17 |
+
f"Velocity of the x-coordinate of the top: {state[8]:.2f} m/s\n"
|
18 |
+
f"Velocity of the z-coordinate (height) of the top: {state[9]:.2f} m/s\n"
|
19 |
+
f"Angular velocity of the angle of the top: {state[10]:.2f} rad/s\n"
|
20 |
+
f"Angular velocity of the thigh hinge: {state[11]:.2f} rad/s\n"
|
21 |
+
f"Angular velocity of the leg hinge: {state[12]:.2f} rad/s\n"
|
22 |
+
f"Angular velocity of the foot hinge: {state[13]:.2f} rad/s\n"
|
23 |
+
f"Angular velocity of the thigh hinge (left): {state[14]:.2f} rad/s\n"
|
24 |
+
f"Angular velocity of the leg hinge (left): {state[15]:.2f} rad/s\n"
|
25 |
+
f"Angular velocity of the foot hinge (left): {state[16]:.2f} rad/s"
|
26 |
+
)
|
27 |
+
return res
|
28 |
+
|
29 |
+
class GameDescriber:
|
30 |
+
def __init__(self, args):
|
31 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
32 |
+
self.max_episode_len = args.max_episode_len
|
33 |
+
self.action_desc_dict = {
|
34 |
+
}
|
35 |
+
self.reward_desc_dict = {
|
36 |
+
}
|
37 |
+
|
38 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
39 |
+
return ""
|
40 |
+
|
41 |
+
def translate_potential_next_state(self, state, action):
|
42 |
+
return ""
|
43 |
+
|
44 |
+
def describe_goal(self):
|
45 |
+
return (
|
46 |
+
"The goal in the Walker2D environment is to coordinate both sets of feet, legs, and thighs "
|
47 |
+
"to move in the forward (right) direction by applying torques to the six hinges connecting "
|
48 |
+
"the six body parts. The objective is to make the robot walk forward."
|
49 |
+
)
|
50 |
+
|
51 |
+
def describe_game(self):
|
52 |
+
return (
|
53 |
+
"In the Walker2D environment, you control a two-dimensional two-legged walker with four main body parts. "
|
54 |
+
"Your objective is to make the walker move forward by coordinating the torques applied to the six hinges "
|
55 |
+
"connecting the body parts. The environment provides observations of the walker's body parts and velocities, "
|
56 |
+
"including the torso, leg, and thigh angles, orientations, and velocities. The goal is to make the walker walk "
|
57 |
+
"forward in the positive x-direction."
|
58 |
+
)
|
59 |
+
|
60 |
+
def describe_action(self):
|
61 |
+
return (
|
62 |
+
"Your next move: \nPlease provide a list of six numerical values, each within the range of [-1, 1], "
|
63 |
+
"representing the torques to be applied at the six hinge joints of the walker. These torques will help "
|
64 |
+
"coordinate the walker's movements and make it walk in the desired direction."
|
65 |
+
)
|
66 |
+
|
67 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
68 |
+
def translate(self, infos, is_current=False):
|
69 |
+
descriptions = []
|
70 |
+
if is_current:
|
71 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
72 |
+
return state_desc
|
73 |
+
for i, info in enumerate(infos):
|
74 |
+
assert 'state' in info, "info should contain state information"
|
75 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
76 |
+
action_desc = (
|
77 |
+
"Torques Applied: "
|
78 |
+
f"Thigh: {info['action'][0]:.2f}, Leg: {info['action'][1]:.2f}, Foot: {info['action'][2]:.2f}, "
|
79 |
+
f"Left Thigh: {info['action'][3]:.2f}, Left Leg: {info['action'][4]:.2f}, Left Foot: {info['action'][5]:.2f}"
|
80 |
+
)
|
81 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
82 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
83 |
+
descriptions.append(
|
84 |
+
f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
|
85 |
+
)
|
86 |
+
return descriptions
|
main_reflexion.py
CHANGED
@@ -292,7 +292,7 @@ if __name__ == "__main__":
|
|
292 |
parser.add_argument(
|
293 |
"--api_type",
|
294 |
type=str,
|
295 |
-
default="
|
296 |
choices=["azure", "openai"],
|
297 |
help="choose api type, now support azure and openai"
|
298 |
)
|
|
|
292 |
parser.add_argument(
|
293 |
"--api_type",
|
294 |
type=str,
|
295 |
+
default="openai",
|
296 |
choices=["azure", "openai"],
|
297 |
help="choose api type, now support azure and openai"
|
298 |
)
|
shell/test_mujoco_ant.sh
CHANGED
@@ -1,6 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# exe
|
2 |
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
3 |
-
|
4 |
-
python main_reflexion.py --env_name
|
5 |
-
|
6 |
-
python main_reflexion.py --env_name
|
|
|
1 |
+
|
2 |
+
# Ant-v4
|
3 |
+
|
4 |
+
# REFLEXION
|
5 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
6 |
+
|
7 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
8 |
+
|
9 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
10 |
+
|
11 |
+
|
12 |
# exe
|
13 |
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
14 |
+
|
15 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
16 |
+
|
17 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_halfcheetah.sh
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# HalfCheetah-v4
|
3 |
+
# Naive Actor
|
4 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
|
5 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
6 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
8 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
|
9 |
+
|
10 |
+
# COT
|
11 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
12 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
13 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
14 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
15 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
|
16 |
+
|
17 |
+
# self consistency
|
18 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
|
19 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
20 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
21 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
22 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
|
23 |
+
|
24 |
+
# self-ask
|
25 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
|
26 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
27 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
28 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
29 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
|
30 |
+
|
31 |
+
# SPP
|
32 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
33 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
34 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
35 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
36 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
|
37 |
+
|
38 |
+
# REFLEXION
|
39 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
40 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 2 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
41 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
42 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/halfcheetahpole"
|
43 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
44 |
+
|
45 |
+
|
46 |
+
# exe
|
47 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
48 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
|
49 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
50 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
|
51 |
+
python main_reflexion.py --env_name HalfCheetah-v4 --init_summarizer halfcheetah_init_translator --curr_summarizer halfcheetah_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_hopper.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hopper-v4
|
2 |
+
|
3 |
+
# REFLEXION
|
4 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
7 |
+
|
8 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
9 |
+
|
10 |
+
|
11 |
+
# exe
|
12 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
13 |
+
|
14 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
15 |
+
|
16 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_invertedDoublePendulum.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# InvertedDoublePendulum-v4
|
2 |
+
|
3 |
+
# REFLEXION
|
4 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
7 |
+
|
8 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
9 |
+
|
10 |
+
|
11 |
+
# exe
|
12 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
13 |
+
|
14 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
15 |
+
|
16 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_invertedPendulum.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# InvertedPendulum-v4
|
2 |
+
|
3 |
+
# REFLEXION
|
4 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
7 |
+
|
8 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
9 |
+
|
10 |
+
|
11 |
+
# exe
|
12 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
13 |
+
|
14 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
15 |
+
|
16 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_walker2d.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Walker2d-v4
|
2 |
+
|
3 |
+
# REFLEXION
|
4 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
7 |
+
|
8 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
9 |
+
|
10 |
+
|
11 |
+
# exe
|
12 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
13 |
+
|
14 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
15 |
+
|
16 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|