Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

Jarvis-K commited on Jan 5

Commit

5f98914

•

1 Parent(s): d731338

add support to multi-dim con

Browse files

Files changed (5) hide show

deciders/act.py +16 -8
deciders/parser.py +35 -12
envs/base_env.py +5 -3
main_reflexion.py +4 -8
record_reflexion.csv +1 -0

deciders/act.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import openai
 from .gpt import gpt
 from loguru import logger
-from .parser import PARSERS
 from langchain.output_parsers import PydanticOutputParser
 from langchain.output_parsers import OutputFixingParser
 from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
@@ -12,13 +12,18 @@ import tiktoken
 import json
 import re
 from .utils import run_chain
 class RandomAct():
     def __init__(self, action_space):
         self.action_space = action_space
     def act(self, state_description, action_description, env_info, game_description=None, goal_description=None):
-        return self.action_space.sample()+1, '', '', '', 0, 0
 class NaiveAct(gpt):
     def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=2048, logger=None):
@@ -37,7 +42,10 @@ class NaiveAct(gpt):
         super().__init__(args)
         self.distiller = distiller
         self.fewshot_example_initialization(args.prompt_level, args.prompt_path, distiller = self.distiller)
-        self.default_action = 1
         self.parser = self._parser_initialization()
         self.irr_game_description = ''
         self.memory = []
@@ -82,11 +90,12 @@ class NaiveAct(gpt):
     def _parser_initialization(self):
-        if hasattr(self.action_space, 'n'):
-            assert self.action_space.n in PARSERS.keys(), f'Action space {self.action_space} is not supported.'
             num_action = self.action_space.n
-        else:
-            num_action = 1
         if self.args.api_type == "azure":
             autofixing_chat = AzureChatOpenAI(
@@ -204,7 +213,6 @@ class NaiveAct(gpt):
             prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
             action_str = res.choices[0].text.strip()
             print(f'my anwser is {action_str}')
-            # import pdb; pdb.set_trace()
             try:
                 if "Continuous" in self.args.env_name:
                     action = float(re.findall(r"[-+]?\d*\.\d+", action_str)[0])

 import openai
 from .gpt import gpt
 from loguru import logger
+from .parser import DISPARSERS, CONPARSERS
 from langchain.output_parsers import PydanticOutputParser
 from langchain.output_parsers import OutputFixingParser
 from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
 import json
 import re
 from .utils import run_chain
+from gym.spaces import Discrete
 class RandomAct():
     def __init__(self, action_space):
         self.action_space = action_space
     def act(self, state_description, action_description, env_info, game_description=None, goal_description=None):
+        if isinstance(self.action_space, Discrete):
+            action = self.action_space.sample()+1
+        else:
+            action = self.action_space.sample()
+        return action, '', '', '', 0, 0
 class NaiveAct(gpt):
     def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=2048, logger=None):
         super().__init__(args)
         self.distiller = distiller
         self.fewshot_example_initialization(args.prompt_level, args.prompt_path, distiller = self.distiller)
+        if isinstance(self.action_space, Discrete):
+            self.default_action = 1
+        else:
+            self.default_action = [0 for ind in range(self.action_space.shape[0])]
         self.parser = self._parser_initialization()
         self.irr_game_description = ''
         self.memory = []
     def _parser_initialization(self):
+        if isinstance(self.action_space, Discrete):
+            PARSERS = DISPARSERS
             num_action = self.action_space.n
+        else:
+            PARSERS = CONPARSERS
+            num_action = self.action_space.shape[0]
         if self.args.api_type == "azure":
             autofixing_chat = AzureChatOpenAI(
             prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
             action_str = res.choices[0].text.strip()
             print(f'my anwser is {action_str}')
             try:
                 if "Continuous" in self.args.env_name:
                     action = float(re.findall(r"[-+]?\d*\.\d+", action_str)[0])

deciders/parser.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from pydantic import BaseModel, Field, validator
 class DisActionModel(BaseModel):
     action: int = Field(description="the chosen action to perform")
@@ -17,15 +18,37 @@ def generate_action_class(max_action):
     return type(f"{max_action}Action", (DisActionModel,), {'action_is_valid': DisActionModel.create_validator(max_action)})
 # Dictionary of parsers with dynamic class generation
-PARSERS = {num: generate_action_class(num) for num in [2, 3, 4, 6, 9, 18]}
-# class ContinuousAction(BaseModel):
-#     action: float = Field(description="the choosed action to perform")
-#     # You can add custom validation logic easily with Pydantic.
-#     @validator('action')
-#     def action_is_valid(cls, field):
-#         if not (field >= -1 and field <= 1):
-#             raise ValueError("Action is not valid ([-1,1])!")
-#         return field
-# PARSERS = {1:ContinuousAction, 2: TwoAction, 3: ThreeAction, 4: FourAction, 6: SixAction, 9:NineAction, 18: FullAtariAction}

 from pydantic import BaseModel, Field, validator
+from typing import List
 class DisActionModel(BaseModel):
     action: int = Field(description="the chosen action to perform")
     return type(f"{max_action}Action", (DisActionModel,), {'action_is_valid': DisActionModel.create_validator(max_action)})
 # Dictionary of parsers with dynamic class generation
+DISPARSERS = {num: generate_action_class(num) for num in [2, 3, 4, 6, 9, 18]}
+class ContinuousActionBase(BaseModel):
+    action: List[float] = Field(description="the chosen continuous actions to perform")
+    @classmethod
+    def set_expected_length(cls, length):
+        cls.expected_length = length
+    @validator('action', pre=True)
+    def validate_length(cls, action):
+        if len(action) != cls.expected_length:
+            raise ValueError(f"The action list must have exactly {cls.expected_length} items.")
+        return action
+    @validator('action', each_item=True)
+    def action_is_valid(cls, item):
+        if not -1 <= item <= 1:
+            raise ValueError("Each action dimension must be in the range [-1, 1]!")
+        return item
+# Generate classes dynamically
+def generate_continuous_action_class(expected_length):
+    NewClass = type(
+        f"{expected_length}DContinuousAction",
+        (ContinuousActionBase,),
+        {}
+    )
+    NewClass.set_expected_length(expected_length)
+    return NewClass
+# Dictionary of parsers with dynamic class generation
+CONPARSERS = {length: generate_continuous_action_class(length) for length in range(1, 17)}

envs/base_env.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # This file contains functions for interacting with the CartPole environment
 import gym
 class SettableStateEnv(gym.Wrapper):
     def __init__(self, env):
@@ -55,10 +56,11 @@ class BaseEnv(gym.Wrapper):
     def step_llm(self, action):
         potential_next_state = self.get_potential_next_state(action)
-        if "Continuous" in self.env_name:
-            state, reward, terminated, _, info = super().step(action)
-        else:
             state, reward, terminated, _, info = super().step(action-1)
         self.transition_data['action'] = action
         self.transition_data['next_state'] = state
         self.transition_data['reward'] = reward

 # This file contains functions for interacting with the CartPole environment
 import gym
+from gym.spaces import Discrete
 class SettableStateEnv(gym.Wrapper):
     def __init__(self, env):
     def step_llm(self, action):
         potential_next_state = self.get_potential_next_state(action)
+        if isinstance(self.action_space, Discrete):
             state, reward, terminated, _, info = super().step(action-1)
+        else:
+            state, reward, terminated, _, info = super().step(action)
         self.transition_data['action'] = action
         self.transition_data['next_state'] = state
         self.transition_data['reward'] = reward

main_reflexion.py CHANGED Viewed

@@ -17,6 +17,9 @@ import random
 import numpy as np
 import datetime
 from loguru import logger
 def set_seed(seed):
@@ -109,9 +112,6 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
                     logfile
                 )
-                if "Continuous" in args.env_name:
-                    action = [action]
                 state_description, reward, termination, truncation, env_info = environment.step_llm(
                     action
                 )
@@ -137,10 +137,6 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
                     logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
                 continue
         if error_flag:
-            if "Continuous" in args.env_name:
-                action = [decider.default_action]
-            else:
-                action = decider.default_action
             state_description, reward, termination, truncation, env_info = environment.step_llm(
                     action
                 )
@@ -164,7 +160,7 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
             logger.info(f"current_total_cost: {current_total_cost}")
             logger.info(f"Now it is round {round}.")
-        frames.append(environment.render())
         if termination or truncation:
             if logger:
                 logger.info(f"Terminated!")

 import numpy as np
 import datetime
 from loguru import logger
+from gym.spaces import Discrete
 def set_seed(seed):
                     logfile
                 )
                 state_description, reward, termination, truncation, env_info = environment.step_llm(
                     action
                 )
                     logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
                 continue
         if error_flag:
             state_description, reward, termination, truncation, env_info = environment.step_llm(
                     action
                 )
             logger.info(f"current_total_cost: {current_total_cost}")
             logger.info(f"Now it is round {round}.")
+        # frames.append(environment.render())
         if termination or truncation:
             if logger:
                 logger.info(f"Terminated!")

record_reflexion.csv CHANGED Viewed

@@ -10,4 +10,5 @@ FrozenLake-v1,1,expert,200.0
 MountainCarContinuous-v0,1,expert,200.0
 RepresentedBoxing-v0,1,expert,200.0
 RepresentedPong-v0,1,expert,200.0

 MountainCarContinuous-v0,1,expert,200.0
 RepresentedBoxing-v0,1,expert,200.0
 RepresentedPong-v0,1,expert,200.0
+Ant-v4,1,expert,100