Jarvis commited on
Commit
13ed679
·
2 Parent(s): 8e1f27f 0a92014

Merge branch 'mujoco-env' into master

Browse files
README.md CHANGED
@@ -62,4 +62,27 @@ Or you can also test this by copying a command from a .sh script
62
  ```
63
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
64
  ```
65
- If you use openai key, please add "--api_type openai" at the end of the command!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  ```
63
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
64
  ```
65
+
66
+ If you use openai key, please add "--api_type openai" at the end of the command!
67
+
68
+ ### Install Mujoco Environment
69
+ 1. Download the MuJoCo, recommand [mujoco210](https://github.com/google-deepmind/mujoco/releases/tag/2.1.0), for Linux, it is `mujoco210-linux-x86_64.tar.gz
70
+ `, then
71
+ - make new file `mkdir ~/.mujoco`
72
+ - move the dowload file into the file `cp mujoco210-linux-x86_64.tar.gz ~/.mujoco` and extract it by `tar -zxvf mujoco210-linux-x86_64.tar.gz`
73
+ - `vim ~/.bashrc` and add the following line into the `.bashrc`:
74
+ `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/<user>/.mujoco/mujoco210/bin
75
+ `
76
+
77
+ 2. install mujoco_py which allows using MuJoCo from Python
78
+ ```
79
+ sudo apt install libosmesa6-dev libgl1-mesa-glx libglfw3
80
+ sudo apt-get install libglew-dev
81
+
82
+ pip install mujoco-py==2.1.2.14
83
+ pip install cython==0.29.37
84
+ ```
85
+
86
+ 3. install gym[mujoco]
87
+ `pip install gym[mujoco]`
88
+
envs/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
  from .base_env import BaseEnv, SettableStateEnv
 
2
  from .classic_control import cartpole_translator, cartpole_policies
3
  from .classic_control import acrobot_translator, acrobot_policies
4
  from .classic_control import mountaincar_translator, mountaincar_policies
@@ -10,11 +11,13 @@ from .toy_text import blackjack_translator, blackjack_policies
10
  from .toy_text import taxi_translator, taxi_policies
11
  from .toy_text import cliffwalking_translator, cliffwalking_policies
12
  from .toy_text import frozenlake_translator, frozenlake_policies
 
13
  from .atari import register_environments
14
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
15
-
16
  register_environments()
17
 
 
 
18
  REGISTRY = {}
19
  REGISTRY["sampling_wrapper"] = SettableStateEnv
20
  REGISTRY["base_env"] = BaseEnv
@@ -91,4 +94,8 @@ REGISTRY["RepresentedPong_basic_policies"] = [
91
  Pong_policies.dedicated_4_policy,
92
  Pong_policies.dedicated_5_policy,
93
  Pong_policies.dedicated_6_policy,
94
- ]
 
 
 
 
 
1
  from .base_env import BaseEnv, SettableStateEnv
2
+
3
  from .classic_control import cartpole_translator, cartpole_policies
4
  from .classic_control import acrobot_translator, acrobot_policies
5
  from .classic_control import mountaincar_translator, mountaincar_policies
 
11
  from .toy_text import taxi_translator, taxi_policies
12
  from .toy_text import cliffwalking_translator, cliffwalking_policies
13
  from .toy_text import frozenlake_translator, frozenlake_policies
14
+
15
  from .atari import register_environments
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
 
17
  register_environments()
18
 
19
+ from .mujoco import ant_translator, ant_policies
20
+
21
  REGISTRY = {}
22
  REGISTRY["sampling_wrapper"] = SettableStateEnv
23
  REGISTRY["base_env"] = BaseEnv
 
94
  Pong_policies.dedicated_4_policy,
95
  Pong_policies.dedicated_5_policy,
96
  Pong_policies.dedicated_6_policy,
97
+ ]
98
+
99
+ REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
100
+ REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
101
+ REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
envs/mujoco/__init__.py ADDED
File without changes
envs/mujoco/ant_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(8)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(8)]
envs/mujoco/ant_translator.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BasicLevelTranslator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ def translate(self, state):
6
+ (
7
+ torso_z_coordinate,
8
+ torso_x_orientation,
9
+ torso_y_orientation,
10
+ torso_z_orientation,
11
+ torso_w_orientation,
12
+ front_left_hip_angle,
13
+ front_left_link_angle,
14
+ front_right_hip_angle,
15
+ front_right_link_angle,
16
+ back_left_hip_angle,
17
+ back_left_link_angle,
18
+ back_right_hip_angle,
19
+ back_right_link_angle,
20
+ torso_x_velocity,
21
+ torso_y_velocity,
22
+ torso_z_velocity,
23
+ torso_x_angular_velocity,
24
+ torso_y_angular_velocity,
25
+ torso_z_angular_velocity,
26
+ front_left_hip_angular_velocity,
27
+ front_left_link_angular_velocity,
28
+ front_right_hip_angular_velocity,
29
+ front_right_link_angular_velocity,
30
+ back_left_hip_angular_velocity,
31
+ back_left_link_angular_velocity,
32
+ back_right_hip_angular_velocity,
33
+ back_right_link_angular_velocity,
34
+ ) = state[:27]
35
+
36
+ res = (
37
+ f"Torso Z-coordinate: {torso_z_coordinate:.2f}, "
38
+ f"Torso X-orientation: {torso_x_orientation:.2f}, "
39
+ f"Torso Y-orientation: {torso_y_orientation:.2f}, "
40
+ f"Torso Z-orientation: {torso_z_orientation:.2f}, "
41
+ f"Torso W-orientation: {torso_w_orientation:.2f}, "
42
+ f"Front Left Hip Angle: {front_left_hip_angle:.2f}, "
43
+ f"Front Left Link Angle: {front_left_link_angle:.2f}, "
44
+ f"Front Right Hip Angle: {front_right_hip_angle:.2f}, "
45
+ f"Front Right Link Angle: {front_right_link_angle:.2f}, "
46
+ f"Back Left Hip Angle: {back_left_hip_angle:.2f}, "
47
+ f"Back Left Link Angle: {back_left_link_angle:.2f}, "
48
+ f"Back Right Hip Angle: {back_right_hip_angle:.2f}, "
49
+ f"Back Right Link Angle: {back_right_link_angle:.2f}, "
50
+ f"Torso X Velocity: {torso_x_velocity:.2f}, "
51
+ f"Torso Y Velocity: {torso_y_velocity:.2f}, "
52
+ f"Torso Z Velocity: {torso_z_velocity:.2f}, "
53
+ f"Torso X Angular Velocity: {torso_x_angular_velocity:.2f}, "
54
+ f"Torso Y Angular Velocity: {torso_y_angular_velocity:.2f}, "
55
+ f"Torso Z Angular Velocity: {torso_z_angular_velocity:.2f}, "
56
+ f"Front Left Hip Angular Velocity: {front_left_hip_angular_velocity:.2f}, "
57
+ f"Front Left Link Angular Velocity: {front_left_link_angular_velocity:.2f}, "
58
+ f"Front Right Hip Angular Velocity: {front_right_hip_angular_velocity:.2f}, "
59
+ f"Front Right Link Angular Velocity: {front_right_link_angular_velocity:.2f}, "
60
+ f"Back Left Hip Angular Velocity: {back_left_hip_angular_velocity:.2f}, "
61
+ f"Back Left Link Angular Velocity: {back_left_link_angular_velocity:.2f}, "
62
+ f"Back Right Hip Angular Velocity: {back_right_hip_angular_velocity:.2f}, "
63
+ f"Back Right Link Angular Velocity: {back_right_link_angular_velocity:.2f}"
64
+ )
65
+
66
+ return res
67
+
68
+ class GameDescriber:
69
+ def __init__(self, args):
70
+ self.is_only_local_obs = args.is_only_local_obs == 1
71
+ self.max_episode_len = args.max_episode_len
72
+ self.action_desc_dict = {
73
+ }
74
+ self.reward_desc_dict = {
75
+ }
76
+
77
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
78
+ return ""
79
+
80
+ def translate_potential_next_state(self, state, action):
81
+ return ""
82
+
83
+ def describe_goal(self):
84
+ return "The goal is to coordinate the four legs of the ant robot to move forward."
85
+
86
+ def describe_game(self):
87
+ return (
88
+ "In the Ant environment, you control a 3D robot called the ant. The ant has a torso with four legs, "
89
+ "each consisting of two links and connected by hinge joints. Your objective is to apply torques to "
90
+ "the eight hinge joints to coordinate the four legs and make the ant move forward in the positive x-direction. "
91
+ "The environment provides observations of the ant's body parts and velocities, including the torso and leg angles, "
92
+ "orientations, and velocities. The episode ends when the ant becomes unhealthy, which can be due to various conditions."
93
+ )
94
+
95
+ def describe_action(self):
96
+ return (
97
+ "Your next move: \n Please provide a list of eight numerical values, each within the range of [-1,1], "
98
+ "representing the torques to be applied at the eight hinge joints of the ant."
99
+ )
100
+
101
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
102
+ def translate(self, infos, is_current=False):
103
+ descriptions = []
104
+ if is_current:
105
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
106
+ return state_desc
107
+ for i, info in enumerate(infos):
108
+ assert 'state' in info, "info should contain state information"
109
+
110
+ state_desc = BasicLevelTranslator().translate(info['state'])
111
+ action_desc = (
112
+ "Take Action: "
113
+ "Apply Front Left Hip Torque: {:.2f}, "
114
+ "Apply Front Left Link Torque: {:.2f}, "
115
+ "Apply Front Right Hip Torque: {:.2f}, "
116
+ "Apply Front Right Link Torque: {:.2f}, "
117
+ "Apply Back Left Hip Torque: {:.2f}, "
118
+ "Apply Back Left Link Torque: {:.2f}, "
119
+ "Apply Back Right Hip Torque: {:.2f}, "
120
+ "Apply Back Right Link Torque: {:.2f}"
121
+ ).format(
122
+ info['action'][0], info['action'][1], info['action'][2], info['action'][3],
123
+ info['action'][4], info['action'][5], info['action'][6], info['action'][7]
124
+ )
125
+
126
+ reward_desc = f"Result: Reward of {info['reward']:.2f}, "
127
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
128
+ descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
129
+ return descriptions
main_reflexion.py CHANGED
@@ -221,7 +221,7 @@ if __name__ == "__main__":
221
  help="The actor used to select action",
222
  )
223
  parser.add_argument(
224
- "--gpt_version", type=str, default="gpt-35-turbo", help="The version of GPT to use"
225
  )
226
  parser.add_argument(
227
  "--render", type=str, default="rgb_array", help="The render mode"
@@ -296,6 +296,7 @@ if __name__ == "__main__":
296
  "--api_type",
297
  type=str,
298
  default="azure",
 
299
  help="choose api type, now support azure and openai"
300
  )
301
  args = parser.parse_args()
 
221
  help="The actor used to select action",
222
  )
223
  parser.add_argument(
224
+ "--gpt_version", type=str, default="gpt-3.5-turbo", help="The version of GPT to use"
225
  )
226
  parser.add_argument(
227
  "--render", type=str, default="rgb_array", help="The render mode"
 
296
  "--api_type",
297
  type=str,
298
  default="azure",
299
+ choices=["azure", "openai"],
300
  help="choose api type, now support azure and openai"
301
  )
302
  args = parser.parse_args()
shell/test_mujoco_ant.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # exe
2
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
3
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
4
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
5
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
6
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator