Spaces:
Runtime error
Runtime error
Merge branch 'mujoco-env' into master
Browse files- README.md +24 -1
- envs/__init__.py +9 -2
- envs/mujoco/__init__.py +0 -0
- envs/mujoco/ant_policies.py +15 -0
- envs/mujoco/ant_translator.py +129 -0
- main_reflexion.py +2 -1
- shell/test_mujoco_ant.sh +6 -0
README.md
CHANGED
@@ -62,4 +62,27 @@ Or you can also test this by copying a command from a .sh script
|
|
62 |
```
|
63 |
python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
|
64 |
```
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
```
|
63 |
python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
|
64 |
```
|
65 |
+
|
66 |
+
If you use openai key, please add "--api_type openai" at the end of the command!
|
67 |
+
|
68 |
+
### Install Mujoco Environment
|
69 |
+
1. Download the MuJoCo, recommand [mujoco210](https://github.com/google-deepmind/mujoco/releases/tag/2.1.0), for Linux, it is `mujoco210-linux-x86_64.tar.gz
|
70 |
+
`, then
|
71 |
+
- make new file `mkdir ~/.mujoco`
|
72 |
+
- move the dowload file into the file `cp mujoco210-linux-x86_64.tar.gz ~/.mujoco` and extract it by `tar -zxvf mujoco210-linux-x86_64.tar.gz`
|
73 |
+
- `vim ~/.bashrc` and add the following line into the `.bashrc`:
|
74 |
+
`export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/<user>/.mujoco/mujoco210/bin
|
75 |
+
`
|
76 |
+
|
77 |
+
2. install mujoco_py which allows using MuJoCo from Python
|
78 |
+
```
|
79 |
+
sudo apt install libosmesa6-dev libgl1-mesa-glx libglfw3
|
80 |
+
sudo apt-get install libglew-dev
|
81 |
+
|
82 |
+
pip install mujoco-py==2.1.2.14
|
83 |
+
pip install cython==0.29.37
|
84 |
+
```
|
85 |
+
|
86 |
+
3. install gym[mujoco]
|
87 |
+
`pip install gym[mujoco]`
|
88 |
+
|
envs/__init__.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from .base_env import BaseEnv, SettableStateEnv
|
|
|
2 |
from .classic_control import cartpole_translator, cartpole_policies
|
3 |
from .classic_control import acrobot_translator, acrobot_policies
|
4 |
from .classic_control import mountaincar_translator, mountaincar_policies
|
@@ -10,11 +11,13 @@ from .toy_text import blackjack_translator, blackjack_policies
|
|
10 |
from .toy_text import taxi_translator, taxi_policies
|
11 |
from .toy_text import cliffwalking_translator, cliffwalking_policies
|
12 |
from .toy_text import frozenlake_translator, frozenlake_policies
|
|
|
13 |
from .atari import register_environments
|
14 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
15 |
-
|
16 |
register_environments()
|
17 |
|
|
|
|
|
18 |
REGISTRY = {}
|
19 |
REGISTRY["sampling_wrapper"] = SettableStateEnv
|
20 |
REGISTRY["base_env"] = BaseEnv
|
@@ -91,4 +94,8 @@ REGISTRY["RepresentedPong_basic_policies"] = [
|
|
91 |
Pong_policies.dedicated_4_policy,
|
92 |
Pong_policies.dedicated_5_policy,
|
93 |
Pong_policies.dedicated_6_policy,
|
94 |
-
]
|
|
|
|
|
|
|
|
|
|
1 |
from .base_env import BaseEnv, SettableStateEnv
|
2 |
+
|
3 |
from .classic_control import cartpole_translator, cartpole_policies
|
4 |
from .classic_control import acrobot_translator, acrobot_policies
|
5 |
from .classic_control import mountaincar_translator, mountaincar_policies
|
|
|
11 |
from .toy_text import taxi_translator, taxi_policies
|
12 |
from .toy_text import cliffwalking_translator, cliffwalking_policies
|
13 |
from .toy_text import frozenlake_translator, frozenlake_policies
|
14 |
+
|
15 |
from .atari import register_environments
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
|
|
17 |
register_environments()
|
18 |
|
19 |
+
from .mujoco import ant_translator, ant_policies
|
20 |
+
|
21 |
REGISTRY = {}
|
22 |
REGISTRY["sampling_wrapper"] = SettableStateEnv
|
23 |
REGISTRY["base_env"] = BaseEnv
|
|
|
94 |
Pong_policies.dedicated_4_policy,
|
95 |
Pong_policies.dedicated_5_policy,
|
96 |
Pong_policies.dedicated_6_policy,
|
97 |
+
]
|
98 |
+
|
99 |
+
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
100 |
+
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
101 |
+
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
envs/mujoco/__init__.py
ADDED
File without changes
|
envs/mujoco/ant_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(8)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(8)]
|
envs/mujoco/ant_translator.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BasicLevelTranslator:
|
2 |
+
def __init__(self):
|
3 |
+
pass
|
4 |
+
|
5 |
+
def translate(self, state):
|
6 |
+
(
|
7 |
+
torso_z_coordinate,
|
8 |
+
torso_x_orientation,
|
9 |
+
torso_y_orientation,
|
10 |
+
torso_z_orientation,
|
11 |
+
torso_w_orientation,
|
12 |
+
front_left_hip_angle,
|
13 |
+
front_left_link_angle,
|
14 |
+
front_right_hip_angle,
|
15 |
+
front_right_link_angle,
|
16 |
+
back_left_hip_angle,
|
17 |
+
back_left_link_angle,
|
18 |
+
back_right_hip_angle,
|
19 |
+
back_right_link_angle,
|
20 |
+
torso_x_velocity,
|
21 |
+
torso_y_velocity,
|
22 |
+
torso_z_velocity,
|
23 |
+
torso_x_angular_velocity,
|
24 |
+
torso_y_angular_velocity,
|
25 |
+
torso_z_angular_velocity,
|
26 |
+
front_left_hip_angular_velocity,
|
27 |
+
front_left_link_angular_velocity,
|
28 |
+
front_right_hip_angular_velocity,
|
29 |
+
front_right_link_angular_velocity,
|
30 |
+
back_left_hip_angular_velocity,
|
31 |
+
back_left_link_angular_velocity,
|
32 |
+
back_right_hip_angular_velocity,
|
33 |
+
back_right_link_angular_velocity,
|
34 |
+
) = state[:27]
|
35 |
+
|
36 |
+
res = (
|
37 |
+
f"Torso Z-coordinate: {torso_z_coordinate:.2f}, "
|
38 |
+
f"Torso X-orientation: {torso_x_orientation:.2f}, "
|
39 |
+
f"Torso Y-orientation: {torso_y_orientation:.2f}, "
|
40 |
+
f"Torso Z-orientation: {torso_z_orientation:.2f}, "
|
41 |
+
f"Torso W-orientation: {torso_w_orientation:.2f}, "
|
42 |
+
f"Front Left Hip Angle: {front_left_hip_angle:.2f}, "
|
43 |
+
f"Front Left Link Angle: {front_left_link_angle:.2f}, "
|
44 |
+
f"Front Right Hip Angle: {front_right_hip_angle:.2f}, "
|
45 |
+
f"Front Right Link Angle: {front_right_link_angle:.2f}, "
|
46 |
+
f"Back Left Hip Angle: {back_left_hip_angle:.2f}, "
|
47 |
+
f"Back Left Link Angle: {back_left_link_angle:.2f}, "
|
48 |
+
f"Back Right Hip Angle: {back_right_hip_angle:.2f}, "
|
49 |
+
f"Back Right Link Angle: {back_right_link_angle:.2f}, "
|
50 |
+
f"Torso X Velocity: {torso_x_velocity:.2f}, "
|
51 |
+
f"Torso Y Velocity: {torso_y_velocity:.2f}, "
|
52 |
+
f"Torso Z Velocity: {torso_z_velocity:.2f}, "
|
53 |
+
f"Torso X Angular Velocity: {torso_x_angular_velocity:.2f}, "
|
54 |
+
f"Torso Y Angular Velocity: {torso_y_angular_velocity:.2f}, "
|
55 |
+
f"Torso Z Angular Velocity: {torso_z_angular_velocity:.2f}, "
|
56 |
+
f"Front Left Hip Angular Velocity: {front_left_hip_angular_velocity:.2f}, "
|
57 |
+
f"Front Left Link Angular Velocity: {front_left_link_angular_velocity:.2f}, "
|
58 |
+
f"Front Right Hip Angular Velocity: {front_right_hip_angular_velocity:.2f}, "
|
59 |
+
f"Front Right Link Angular Velocity: {front_right_link_angular_velocity:.2f}, "
|
60 |
+
f"Back Left Hip Angular Velocity: {back_left_hip_angular_velocity:.2f}, "
|
61 |
+
f"Back Left Link Angular Velocity: {back_left_link_angular_velocity:.2f}, "
|
62 |
+
f"Back Right Hip Angular Velocity: {back_right_hip_angular_velocity:.2f}, "
|
63 |
+
f"Back Right Link Angular Velocity: {back_right_link_angular_velocity:.2f}"
|
64 |
+
)
|
65 |
+
|
66 |
+
return res
|
67 |
+
|
68 |
+
class GameDescriber:
|
69 |
+
def __init__(self, args):
|
70 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
71 |
+
self.max_episode_len = args.max_episode_len
|
72 |
+
self.action_desc_dict = {
|
73 |
+
}
|
74 |
+
self.reward_desc_dict = {
|
75 |
+
}
|
76 |
+
|
77 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
78 |
+
return ""
|
79 |
+
|
80 |
+
def translate_potential_next_state(self, state, action):
|
81 |
+
return ""
|
82 |
+
|
83 |
+
def describe_goal(self):
|
84 |
+
return "The goal is to coordinate the four legs of the ant robot to move forward."
|
85 |
+
|
86 |
+
def describe_game(self):
|
87 |
+
return (
|
88 |
+
"In the Ant environment, you control a 3D robot called the ant. The ant has a torso with four legs, "
|
89 |
+
"each consisting of two links and connected by hinge joints. Your objective is to apply torques to "
|
90 |
+
"the eight hinge joints to coordinate the four legs and make the ant move forward in the positive x-direction. "
|
91 |
+
"The environment provides observations of the ant's body parts and velocities, including the torso and leg angles, "
|
92 |
+
"orientations, and velocities. The episode ends when the ant becomes unhealthy, which can be due to various conditions."
|
93 |
+
)
|
94 |
+
|
95 |
+
def describe_action(self):
|
96 |
+
return (
|
97 |
+
"Your next move: \n Please provide a list of eight numerical values, each within the range of [-1,1], "
|
98 |
+
"representing the torques to be applied at the eight hinge joints of the ant."
|
99 |
+
)
|
100 |
+
|
101 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
102 |
+
def translate(self, infos, is_current=False):
|
103 |
+
descriptions = []
|
104 |
+
if is_current:
|
105 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
106 |
+
return state_desc
|
107 |
+
for i, info in enumerate(infos):
|
108 |
+
assert 'state' in info, "info should contain state information"
|
109 |
+
|
110 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
111 |
+
action_desc = (
|
112 |
+
"Take Action: "
|
113 |
+
"Apply Front Left Hip Torque: {:.2f}, "
|
114 |
+
"Apply Front Left Link Torque: {:.2f}, "
|
115 |
+
"Apply Front Right Hip Torque: {:.2f}, "
|
116 |
+
"Apply Front Right Link Torque: {:.2f}, "
|
117 |
+
"Apply Back Left Hip Torque: {:.2f}, "
|
118 |
+
"Apply Back Left Link Torque: {:.2f}, "
|
119 |
+
"Apply Back Right Hip Torque: {:.2f}, "
|
120 |
+
"Apply Back Right Link Torque: {:.2f}"
|
121 |
+
).format(
|
122 |
+
info['action'][0], info['action'][1], info['action'][2], info['action'][3],
|
123 |
+
info['action'][4], info['action'][5], info['action'][6], info['action'][7]
|
124 |
+
)
|
125 |
+
|
126 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}, "
|
127 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
128 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
|
129 |
+
return descriptions
|
main_reflexion.py
CHANGED
@@ -221,7 +221,7 @@ if __name__ == "__main__":
|
|
221 |
help="The actor used to select action",
|
222 |
)
|
223 |
parser.add_argument(
|
224 |
-
"--gpt_version", type=str, default="gpt-
|
225 |
)
|
226 |
parser.add_argument(
|
227 |
"--render", type=str, default="rgb_array", help="The render mode"
|
@@ -296,6 +296,7 @@ if __name__ == "__main__":
|
|
296 |
"--api_type",
|
297 |
type=str,
|
298 |
default="azure",
|
|
|
299 |
help="choose api type, now support azure and openai"
|
300 |
)
|
301 |
args = parser.parse_args()
|
|
|
221 |
help="The actor used to select action",
|
222 |
)
|
223 |
parser.add_argument(
|
224 |
+
"--gpt_version", type=str, default="gpt-3.5-turbo", help="The version of GPT to use"
|
225 |
)
|
226 |
parser.add_argument(
|
227 |
"--render", type=str, default="rgb_array", help="The render mode"
|
|
|
296 |
"--api_type",
|
297 |
type=str,
|
298 |
default="azure",
|
299 |
+
choices=["azure", "openai"],
|
300 |
help="choose api type, now support azure and openai"
|
301 |
)
|
302 |
args = parser.parse_args()
|
shell/test_mujoco_ant.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# exe
|
2 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
3 |
+
python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
|
4 |
+
python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
5 |
+
python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
|
6 |
+
python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|