Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

ewanlee commited on Jan 9, 2024

Commit

a2afd48

1 Parent(s): 5f8fd68

Synced repo using 'sync_with_huggingface' Github Action

Browse files

Files changed (14) hide show

app.py +7 -0
deciders/act.py +2 -2
deciders/cot.py +2 -2
deciders/exe.py +2 -2
deciders/reflexion.py +2 -2
deciders/self_consistency.py +2 -2
deciders/selfask.py +2 -2
deciders/spp.py +2 -2
deciders/utils.py +26 -4
envs/__init__.py +16 -56
envs/mujoco/invertedDoublePendulum_translator.py +14 -19
envs/mujoco/invertedPendulum_translator.py +19 -14
record_reflexion.csv +1 -2
shell/test_reflexion.sh +28 -28

app.py CHANGED Viewed

@@ -260,6 +260,7 @@ def main_progress(
 if __name__ == "__main__":
     # install Atari ROMs
     subprocess.run(['AutoROM', '--accept-license'])
@@ -357,6 +358,12 @@ if __name__ == "__main__":
                  "FrozenLake-v1",
                  "MountainCarContinuous-v0",
                  "Ant-v4",
                  "RepresentedBoxing-v0",
                  "RepresentedPong-v0",
                  "RepresentedMsPacman-v0",

 if __name__ == "__main__":
+    # Github action test 8
     # install Atari ROMs
     subprocess.run(['AutoROM', '--accept-license'])
                  "FrozenLake-v1",
                  "MountainCarContinuous-v0",
                  "Ant-v4",
+                 "HalfCheetah-v4",
+                 "Hopper-v4",
+                 "Walker2d-v4",
+                 "Swimmer-v4",
+                 "Reacher-v4",
+                 "Pusher-v4",
                  "RepresentedBoxing-v0",
                  "RepresentedPong-v0",
                  "RepresentedMsPacman-v0",

deciders/act.py CHANGED Viewed

@@ -26,7 +26,7 @@ class RandomAct():
         return action, '', '', '', 0, 0
 class NaiveAct(gpt):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=2048, logger=None):
         self.action_space = action_space
         self.temperature = temperature
         self.action_desc_dict = args.action_desc_dict
@@ -39,7 +39,7 @@ class NaiveAct(gpt):
         else:
             model = args.gpt_version
         self.encoding = tiktoken.encoding_for_model(model)
-        super().__init__(args)
         self.distiller = distiller
         self.fewshot_example_initialization(args.prompt_level, args.prompt_path, distiller = self.distiller)
         if isinstance(self.action_space, Discrete):

         return action, '', '', '', 0, 0
 class NaiveAct(gpt):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0.0, max_tokens=2048, logger=None):
         self.action_space = action_space
         self.temperature = temperature
         self.action_desc_dict = args.action_desc_dict
         else:
             model = args.gpt_version
         self.encoding = tiktoken.encoding_for_model(model)
+        super().__init__(args, openai_key)
         self.distiller = distiller
         self.fewshot_example_initialization(args.prompt_level, args.prompt_path, distiller = self.distiller)
         if isinstance(self.action_space, Discrete):

deciders/cot.py CHANGED Viewed

@@ -17,8 +17,8 @@ from .utils import run_chain
 class ChainOfThought(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens,logger)
     def act(
         self,

 class ChainOfThought(NaiveAct):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
+        super().__init__(openai_key, action_space, args, prompts, distiller, temperature, max_tokens,logger)
     def act(
         self,

deciders/exe.py CHANGED Viewed

@@ -20,8 +20,8 @@ from loguru import logger
 class EXE(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0., max_tokens=None, logger=None, fixed_suggestion=None, fixed_insight=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
         self.pre_memory = []
         self.post_memory = []
         self.is_first = True

 class EXE(NaiveAct):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0., max_tokens=None, logger=None, fixed_suggestion=None, fixed_insight=None):
+        super().__init__(openai_key, action_space, args, prompts, distiller, temperature, max_tokens, logger)
         self.pre_memory = []
         self.post_memory = []
         self.is_first = True

deciders/reflexion.py CHANGED Viewed

@@ -19,8 +19,8 @@ from .utils import run_chain
 class Reflexion(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
     def num_tokens_from_string(self,string: str) -> int:
         """Returns the number of tokens in a text string."""

 class Reflexion(NaiveAct):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
+        super().__init__(openai_key, action_space, args, prompts, distiller, temperature, max_tokens, logger)
     def num_tokens_from_string(self,string: str) -> int:
         """Returns the number of tokens in a text string."""

deciders/self_consistency.py CHANGED Viewed

@@ -17,9 +17,9 @@ from .utils import run_chain
 class SelfConsistency(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
         temperature = 0.7
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
         self.temperature = temperature
     def act(

 class SelfConsistency(NaiveAct):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
         temperature = 0.7
+        super().__init__(openai_key, action_space, args, prompts, distiller, temperature, max_tokens, logger)
         self.temperature = temperature
     def act(

deciders/selfask.py CHANGED Viewed

@@ -17,8 +17,8 @@ from .utils import run_chain
 class SelfAskAct(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens,logger)
     def act(
         self,

 class SelfAskAct(NaiveAct):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
+        super().__init__(openai_key, action_space, args, prompts, distiller, temperature, max_tokens,logger)
     def act(
         self,

deciders/spp.py CHANGED Viewed

@@ -16,8 +16,8 @@ from .act import NaiveAct
 from .utils import run_chain
 class SPP(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
     def act(
         self,

 from .utils import run_chain
 class SPP(NaiveAct):
+    def __init__(self, openai_key, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
+        super().__init__(openai_key, action_space, args, prompts, distiller, temperature, max_tokens, logger)
     def act(
         self,

deciders/utils.py CHANGED Viewed

@@ -19,8 +19,30 @@ Model = Literal["gpt-4", "gpt-35-turbo", "text-davinci-003"]
 # from .gpt import gpt
 # gpt().__init__()
-import timeout_decorator
-@timeout_decorator.timeout(30)
 def run_chain(chain, *args, **kwargs):
     return chain.run(*args, **kwargs)
@@ -54,6 +76,7 @@ def get_completion(prompt: str, api_type: str = "azure", engine: str = "gpt-35-t
             temperature=temperature,
             # request_timeout = 1
         )
         return response.choices[0]["message"]["content"]
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
@@ -85,5 +108,4 @@ def get_chat(prompt: str, api_type: str = "azure", model: str = "gpt-35-turbo",
             temperature=temperature,
             # request_timeout = 1
         )
-        return response.choices[0]["message"]["content"]

 # from .gpt import gpt
 # gpt().__init__()
+# import timeout_decorator
+# @timeout_decorator.timeout(30)
+# def run_chain(chain, *args, **kwargs):
+#     return chain.run(*args, **kwargs)
+import concurrent.futures
+def timeout_decorator(timeout):
+    def decorator(function):
+        def wrapper(*args, **kwargs):
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+                future = executor.submit(function, *args, **kwargs)
+                try:
+                    return future.result(timeout)
+                except concurrent.futures.TimeoutError:
+                    raise RuntimeError(
+                        f"Function '{function.__name__}' timed out after {timeout} seconds"
+                    )
+                except Exception as e:
+                    raise e
+        return wrapper
+    return decorator
+@timeout_decorator(30)
 def run_chain(chain, *args, **kwargs):
     return chain.run(*args, **kwargs)
             temperature=temperature,
             # request_timeout = 1
         )
+        import pdb; pdb.set_trace()
         return response.choices[0]["message"]["content"]
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
             temperature=temperature,
             # request_timeout = 1
         )
+        return response.choices[0]["message"]["content"]

envs/__init__.py CHANGED Viewed

@@ -18,24 +18,25 @@ from .atari import mspacman_policies, mspacman_translator
 from .atari import montezumarevenge_policies, montezumarevenge_translator
 register_environments()
 REGISTRY = {}
 REGISTRY["sampling_wrapper"] = SettableStateEnv
 REGISTRY["base_env"] = BaseEnv
-REGISTRY["cart_init_translator"] = cartpole_translator.GameDescriber
-REGISTRY["cart_basic_translator"] = cartpole_translator.BasicStateSequenceTranslator
 REGISTRY["acrobot_init_translator"] = acrobot_translator.GameDescriber
 REGISTRY["acrobot_basic_translator"] = acrobot_translator.BasicStateSequenceTranslator
 REGISTRY["mountaincar_init_translator"] = mountaincar_translator.GameDescriber
 REGISTRY["mountaincar_basic_translator"] = mountaincar_translator.BasicStateSequenceTranslator
-REGISTRY["cart_policies"] = [cartpole_policies.dedicated_1_policy, cartpole_policies.dedicated_2_policy, cartpole_policies.pseudo_random_policy, cartpole_policies.real_random_policy]
 REGISTRY["acrobot_policies"] = [acrobot_policies.dedicated_1_policy, acrobot_policies.dedicated_2_policy, acrobot_policies.dedicated_3_policy, acrobot_policies.pseudo_random_policy, acrobot_policies.real_random_policy]
 REGISTRY["mountaincar_policies"] = [mountaincar_policies.dedicated_1_policy, mountaincar_policies.dedicated_2_policy, mountaincar_policies.dedicated_3_policy, mountaincar_policies.pseudo_random_policy, mountaincar_policies.real_random_policy]
-REGISTRY["lunarLander_init_translator"] = LunarLander_translator.GameDescriber
-REGISTRY["lunarLander_basic_translator"] = LunarLander_translator.BasicStateSequenceTranslator
-REGISTRY["lunarLander_policies"] = [LunarLander_policies.dedicated_1_policy, LunarLander_policies.dedicated_2_policy, LunarLander_policies.dedicated_3_policy,LunarLander_policies.dedicated_4_policy, LunarLander_policies.pseudo_random_policy, LunarLander_policies.real_random_policy]
 REGISTRY["blackjack_init_translator"] = blackjack_translator.GameDescriber
 REGISTRY["blackjack_basic_translator"] = blackjack_translator.BasicStateSequenceTranslator
@@ -54,9 +55,9 @@ REGISTRY["frozenlake_basic_translator"] = frozenlake_translator.BasicStateSequen
 REGISTRY["frozenlake_policies"] = [frozenlake_policies.dedicated_1_policy, frozenlake_policies.dedicated_2_policy, frozenlake_policies.dedicated_3_policy, frozenlake_policies.dedicated_4_policy, frozenlake_policies.pseudo_random_policy, frozenlake_policies.real_random_policy]
-REGISTRY["mountaincarContinuous_init_translator"] = mountaincarContinuous_translator.GameDescriber
-REGISTRY["mountaincarContinuous_basic_translator"] = mountaincarContinuous_translator.BasicStateSequenceTranslator
-REGISTRY["mountaincarContinuous_policies"] = [mountaincarContinuous_policies.pseudo_random_policy, mountaincarContinuous_policies.real_random_policy]
 REGISTRY["RepresentedBoxing_init_translator"] = Boxing_translator.GameDescriber
@@ -138,47 +139,6 @@ REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
     montezumarevenge_policies.dedicated_18_policy,
 ]
-REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
-REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
-REGISTRY["RepresentedMsPacman_basic_policies"] = [
-    mspacman_policies.real_random_policy,
-    mspacman_policies.pseudo_random_policy,
-    mspacman_policies.dedicated_1_policy,
-    mspacman_policies.dedicated_2_policy,
-    mspacman_policies.dedicated_3_policy,
-    mspacman_policies.dedicated_4_policy,
-    mspacman_policies.dedicated_5_policy,
-    mspacman_policies.dedicated_6_policy,
-    mspacman_policies.dedicated_7_policy,
-    mspacman_policies.dedicated_8_policy,
-    mspacman_policies.dedicated_9_policy,
-]
-REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
-REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
-REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
-    montezumarevenge_policies.real_random_policy,
-    montezumarevenge_policies.pseudo_random_policy,
-    montezumarevenge_policies.dedicated_1_policy,
-    montezumarevenge_policies.dedicated_2_policy,
-    montezumarevenge_policies.dedicated_3_policy,
-    montezumarevenge_policies.dedicated_4_policy,
-    montezumarevenge_policies.dedicated_5_policy,
-    montezumarevenge_policies.dedicated_6_policy,
-    montezumarevenge_policies.dedicated_7_policy,
-    montezumarevenge_policies.dedicated_8_policy,
-    montezumarevenge_policies.dedicated_9_policy,
-    montezumarevenge_policies.dedicated_10_policy,
-    montezumarevenge_policies.dedicated_11_policy,
-    montezumarevenge_policies.dedicated_12_policy,
-    montezumarevenge_policies.dedicated_13_policy,
-    montezumarevenge_policies.dedicated_14_policy,
-    montezumarevenge_policies.dedicated_15_policy,
-    montezumarevenge_policies.dedicated_16_policy,
-    montezumarevenge_policies.dedicated_17_policy,
-    montezumarevenge_policies.dedicated_18_policy,
-]
 ## For mujoco env
@@ -196,12 +156,12 @@ from .mujoco import walker2d_translator, walker2d_policies
-REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
-REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
-REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
-REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
-REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
-REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
 REGISTRY["swimmer_init_translator"] = swimmer_translator.GameDescriber

 from .atari import montezumarevenge_policies, montezumarevenge_translator
 register_environments()
+from .mujoco import ant_translator, ant_policies
 REGISTRY = {}
 REGISTRY["sampling_wrapper"] = SettableStateEnv
 REGISTRY["base_env"] = BaseEnv
+REGISTRY["cartpole_init_translator"] = cartpole_translator.GameDescriber
+REGISTRY["cartpole_basic_translator"] = cartpole_translator.BasicStateSequenceTranslator
 REGISTRY["acrobot_init_translator"] = acrobot_translator.GameDescriber
 REGISTRY["acrobot_basic_translator"] = acrobot_translator.BasicStateSequenceTranslator
 REGISTRY["mountaincar_init_translator"] = mountaincar_translator.GameDescriber
 REGISTRY["mountaincar_basic_translator"] = mountaincar_translator.BasicStateSequenceTranslator
+REGISTRY["cartpole_policies"] = [cartpole_policies.dedicated_1_policy, cartpole_policies.dedicated_2_policy, cartpole_policies.pseudo_random_policy, cartpole_policies.real_random_policy]
 REGISTRY["acrobot_policies"] = [acrobot_policies.dedicated_1_policy, acrobot_policies.dedicated_2_policy, acrobot_policies.dedicated_3_policy, acrobot_policies.pseudo_random_policy, acrobot_policies.real_random_policy]
 REGISTRY["mountaincar_policies"] = [mountaincar_policies.dedicated_1_policy, mountaincar_policies.dedicated_2_policy, mountaincar_policies.dedicated_3_policy, mountaincar_policies.pseudo_random_policy, mountaincar_policies.real_random_policy]
+REGISTRY["lunarlander_init_translator"] = LunarLander_translator.GameDescriber
+REGISTRY["lunarlander_basic_translator"] = LunarLander_translator.BasicStateSequenceTranslator
+REGISTRY["lunarlander_policies"] = [LunarLander_policies.dedicated_1_policy, LunarLander_policies.dedicated_2_policy, LunarLander_policies.dedicated_3_policy,LunarLander_policies.dedicated_4_policy, LunarLander_policies.pseudo_random_policy, LunarLander_policies.real_random_policy]
 REGISTRY["blackjack_init_translator"] = blackjack_translator.GameDescriber
 REGISTRY["blackjack_basic_translator"] = blackjack_translator.BasicStateSequenceTranslator
 REGISTRY["frozenlake_policies"] = [frozenlake_policies.dedicated_1_policy, frozenlake_policies.dedicated_2_policy, frozenlake_policies.dedicated_3_policy, frozenlake_policies.dedicated_4_policy, frozenlake_policies.pseudo_random_policy, frozenlake_policies.real_random_policy]
+REGISTRY["mountaincarcontinuous_init_translator"] = mountaincarContinuous_translator.GameDescriber
+REGISTRY["mountaincarcontinuous_basic_translator"] = mountaincarContinuous_translator.BasicStateSequenceTranslator
+REGISTRY["mountaincarcontinuous_policies"] = [mountaincarContinuous_policies.pseudo_random_policy, mountaincarContinuous_policies.real_random_policy]
 REGISTRY["RepresentedBoxing_init_translator"] = Boxing_translator.GameDescriber
     montezumarevenge_policies.dedicated_18_policy,
 ]
 ## For mujoco env
+REGISTRY["invertedpendulum_init_translator"] = invertedPendulum_translator.GameDescriber
+REGISTRY["invertedpendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
+REGISTRY["invertedpendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
+REGISTRY["inverteddoublependulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
+REGISTRY["inverteddoublependulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
+REGISTRY["inverteddoublependulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
 REGISTRY["swimmer_init_translator"] = swimmer_translator.GameDescriber

envs/mujoco/invertedDoublePendulum_translator.py CHANGED Viewed

@@ -7,16 +7,9 @@ class BasicLevelTranslator:
     def translate(self, state):
         res = (
             f"Position of the cart: {state[0]:.2f} m\n"
-            f"Sine of the angle between cart and first pole: {state[1]:.2f}\n"
-            f"Sine of the angle between two poles: {state[2]:.2f}\n"
-            f"Cosine of the angle between cart and first pole: {state[3]:.2f}\n"
-            f"Cosine of the angle between two poles: {state[4]:.2f}\n"
-            f"Velocity of the cart: {state[5]:.2f} m/s\n"
-            f"Angular velocity of angle between cart and first pole: {state[6]:.2f} rad/s\n"
-            f"Angular velocity of angle between two poles: {state[7]:.2f} rad/s\n"
-            f"Constraint Force 1: {state[8]:.2f} N\n"
-            f"Constraint Force 2: {state[9]:.2f} N\n"
-            f"Constraint Force 3: {state[10]:.2f} N"
         )
         return res
@@ -25,7 +18,7 @@ class GameDescriber:
         self.is_only_local_obs = args.is_only_local_obs == 1
         self.max_episode_len = args.max_episode_len
         self.action_desc_dict = {
-            0: "Apply a force in the range [-3, 3] to the cart to control its motion.",
         }
         self.reward_desc_dict = {}
@@ -37,22 +30,24 @@ class GameDescriber:
     def describe_goal(self):
         return (
-            "The goal in the InvertedDoublePendulum environment is to balance the two poles "\
-            "on top of the cart by applying continuous forces on the cart."
         )
     def describe_game(self):
         return (
-            "In the InvertedDoublePendulum environment, you control a system with a cart and two poles. "\
-            "Your objective is to balance the two poles on top of the cart by applying continuous forces "\
-            "to the cart. The environment provides observations of the cart's position, angles of the poles, "\
-            "and their angular velocities. The episode ends when certain termination conditions are met."
         )
     def describe_action(self):
         return (
-            "Your next move: \n Please provide a numerical value within the range of [-3,3], "\
-            "representing the force to be applied to the cart."
         )
 class BasicStateSequenceTranslator(BasicLevelTranslator):

     def translate(self, state):
         res = (
             f"Position of the cart: {state[0]:.2f} m\n"
+            f"Vertical angle of the pole: {state[1]:.2f} rad\n"
+            f"Linear velocity of the cart: {state[2]:.2f} m/s\n"
+            f"Angular velocity of the pole: {state[3]:.2f} rad/s"
         )
         return res
         self.is_only_local_obs = args.is_only_local_obs == 1
         self.max_episode_len = args.max_episode_len
         self.action_desc_dict = {
+            0: "Apply a force in the range [-1, 1] to the cart to control its motion.",
         }
         self.reward_desc_dict = {}
     def describe_goal(self):
         return (
+            "The goal in the Inverted Pendulum environment is to balance the pole on top of the cart "\
+            "by applying continuous forces to the cart, keeping it upright."
         )
     def describe_game(self):
         return (
+            "In the Inverted Pendulum environment, you control a cart that can move linearly with a pole "\
+            "attached to it. Your objective is to balance the pole on top of the cart by applying forces "\
+            "to the cart in a way that keeps the pole upright. "\
+            "The environment provides observations of the cart's position, pole angle, velocities, "\
+            "and angular velocities. The goal is to maintain balance as long as possible."
         )
     def describe_action(self):
         return (
+            "Your next move: \n Please provide a numerical value for the force to be applied to the cart. "\
+            "This value should be within the range of [-3, 3], where a positive value indicates applying force "\
+            "in the right direction, and a negative value indicates applying force in the left direction."
         )
 class BasicStateSequenceTranslator(BasicLevelTranslator):

envs/mujoco/invertedPendulum_translator.py CHANGED Viewed

@@ -7,9 +7,16 @@ class BasicLevelTranslator:
     def translate(self, state):
         res = (
             f"Position of the cart: {state[0]:.2f} m\n"
-            f"Vertical angle of the pole: {state[1]:.2f} rad\n"
-            f"Linear velocity of the cart: {state[2]:.2f} m/s\n"
-            f"Angular velocity of the pole: {state[3]:.2f} rad/s"
         )
         return res
@@ -18,7 +25,7 @@ class GameDescriber:
         self.is_only_local_obs = args.is_only_local_obs == 1
         self.max_episode_len = args.max_episode_len
         self.action_desc_dict = {
-            0: "Apply a force in the range [-1, 1] to the cart to control its motion.",
         }
         self.reward_desc_dict = {}
@@ -30,24 +37,22 @@ class GameDescriber:
     def describe_goal(self):
         return (
-            "The goal in the Inverted Pendulum environment is to balance the pole on top of the cart "\
-            "by applying continuous forces to the cart, keeping it upright."
         )
     def describe_game(self):
         return (
-            "In the Inverted Pendulum environment, you control a cart that can move linearly with a pole "\
-            "attached to it. Your objective is to balance the pole on top of the cart by applying forces "\
-            "to the cart in a way that keeps the pole upright. "\
-            "The environment provides observations of the cart's position, pole angle, velocities, "\
-            "and angular velocities. The goal is to maintain balance as long as possible."
         )
     def describe_action(self):
         return (
-            "Your next move: \n Please provide a numerical value for the force to be applied to the cart. "\
-            "This value should be within the range of [-3, 3], where a positive value indicates applying force "\
-            "in the right direction, and a negative value indicates applying force in the left direction."
         )
 class BasicStateSequenceTranslator(BasicLevelTranslator):

     def translate(self, state):
         res = (
             f"Position of the cart: {state[0]:.2f} m\n"
+            f"Sine of the angle between cart and first pole: {state[1]:.2f}\n"
+            f"Sine of the angle between two poles: {state[2]:.2f}\n"
+            f"Cosine of the angle between cart and first pole: {state[3]:.2f}\n"
+            f"Cosine of the angle between two poles: {state[4]:.2f}\n"
+            f"Velocity of the cart: {state[5]:.2f} m/s\n"
+            f"Angular velocity of angle between cart and first pole: {state[6]:.2f} rad/s\n"
+            f"Angular velocity of angle between two poles: {state[7]:.2f} rad/s\n"
+            f"Constraint Force 1: {state[8]:.2f} N\n"
+            f"Constraint Force 2: {state[9]:.2f} N\n"
+            f"Constraint Force 3: {state[10]:.2f} N"
         )
         return res
         self.is_only_local_obs = args.is_only_local_obs == 1
         self.max_episode_len = args.max_episode_len
         self.action_desc_dict = {
+            0: "Apply a force in the range [-3, 3] to the cart to control its motion.",
         }
         self.reward_desc_dict = {}
     def describe_goal(self):
         return (
+            "The goal in the InvertedDoublePendulum environment is to balance the two poles "\
+            "on top of the cart by applying continuous forces on the cart."
         )
     def describe_game(self):
         return (
+            "In the InvertedDoublePendulum environment, you control a system with a cart and two poles. "\
+            "Your objective is to balance the two poles on top of the cart by applying continuous forces "\
+            "to the cart. The environment provides observations of the cart's position, angles of the poles, "\
+            "and their angular velocities. The episode ends when certain termination conditions are met."
         )
     def describe_action(self):
         return (
+            "Your next move: \n Please provide a numerical value within the range of [-3,3], "\
+            "representing the force to be applied to the cart."
         )
 class BasicStateSequenceTranslator(BasicLevelTranslator):

record_reflexion.csv CHANGED Viewed

@@ -19,5 +19,4 @@ Walker2d-v4,1,expert,5000.0
 Swimmer-v4,1,expert,44.4
 Reacher-v4,1,expert,-2.6
 Pusher-v4,1,expert,-52.3
-InvertedPendulum-v4,1,expert,1000.0
-InvertedDoublePendulum-v4,1,expert,9359.5

 Swimmer-v4,1,expert,44.4
 Reacher-v4,1,expert,-2.6
 Pusher-v4,1,expert,-52.3

shell/test_reflexion.sh CHANGED Viewed

@@ -1,43 +1,43 @@
 # CartPole-v0
 # Naive Actor
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # COT
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 # REFLEXION
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 3 -num_trails 2 --distiller reflect_distiller
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
 # exe
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 3 -num_trails 2 --distiller guide_generator
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # CartPole-v0
 # Naive Actor
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider naive_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # COT
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider cot_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider self_consistency_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider selfask_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider spp_actor --prompt_level 3 -num_trails 2 --distiller traj_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 # REFLEXION
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider reflexion_actor --prompt_level 3 -num_trails 2 --distiller reflect_distiller
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
 # exe
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider exe_actor --prompt_level 3 -num_trails 2 --distiller guide_generator
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cartpole_init_translator --curr_summarizer cartpole_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator