Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

Jarvis commited on Jan 6

Commit

841d805

•

2 Parent(s): 59cf660 3ffd15b

Merge pull request #15 from mail-ecnu/feat-proxy

Browse files

Files changed (3) hide show

deciders/act.py +13 -10
deciders/utils.py +1 -0
envs/mujoco/ant_translator.py +1 -2

deciders/act.py CHANGED Viewed

@@ -11,7 +11,7 @@ from memory.env_history import EnvironmentHistory
 import tiktoken
 import json
 import re
-from .utils import run_chain
 from gym.spaces import Discrete
 class RandomAct():
@@ -156,12 +156,15 @@ class NaiveAct(gpt):
             prompt = f"{game_description}\n{goal_description}\n{fewshot_examples}\nCurrent {state_description}\n{action_description} "
         prompt += "Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Your Action is: "
         print(f"prompt is {prompt}")
-        res = openai.Completion.create(
-                engine=self.args.gpt_version,
-                prompt=prompt,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-            )
         return prompt, res
     def _add_history_before_action(self, game_description, goal_description, state_description):
@@ -210,8 +213,8 @@ class NaiveAct(gpt):
                 my_mem += f"{self.env_history.get_histories(self.mem_num)}"
-        prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
-        action_str = res.choices[0].text.strip()
         print(f'my anwser is {action_str}')
         action = self.parser.parse(response).action
         self._add_history_after_action(action)
@@ -219,7 +222,7 @@ class NaiveAct(gpt):
         self.logger.info(f'The optimal action is: {action}.')
         if env_info.get('history'):
             self.logger.info(f'History: {history_to_str(env_info["history"])}')
-        return action, prompt, res, 0, 0
     def _read_mem(self, ):
         memory = self.memory

 import tiktoken
 import json
 import re
+from .utils import run_chain, get_completion, get_chat
 from gym.spaces import Discrete
 class RandomAct():
             prompt = f"{game_description}\n{goal_description}\n{fewshot_examples}\nCurrent {state_description}\n{action_description} "
         prompt += "Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Your Action is: "
         print(f"prompt is {prompt}")
+        # res = get_chat(prompt, self.args.api_type, self.args.gpt_version, self.temperature, self.max_tokens)
+        res = get_chat(prompt, api_type=self.args.api_type, model=self.args.gpt_version, engine=self.args.gpt_version, temperature=self.temperature, max_tokens=self.max_tokens)
+        # openai.ChatCompletion.create(
+        #         engine=self.args.gpt_version,
+        #         # model=self.args.gpt_version,
+        #         prompt=prompt,
+        #         temperature=self.temperature,
+        #         max_tokens=self.max_tokens,
+        #     )
         return prompt, res
     def _add_history_before_action(self, game_description, goal_description, state_description):
                 my_mem += f"{self.env_history.get_histories(self.mem_num)}"
+        prompt, response = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
+        action_str = response
         print(f'my anwser is {action_str}')
         action = self.parser.parse(response).action
         self._add_history_after_action(action)
         self.logger.info(f'The optimal action is: {action}.')
         if env_info.get('history'):
             self.logger.info(f'History: {history_to_str(env_info["history"])}')
+        return action, prompt, response, 0, 0
     def _read_mem(self, ):
         memory = self.memory

deciders/utils.py CHANGED Viewed

@@ -54,6 +54,7 @@ def get_completion(prompt: str, api_type: str = "azure", engine: str = "gpt-35-t
             temperature=temperature,
             # request_timeout = 1
         )
         return response.choices[0]["message"]["content"]
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))

             temperature=temperature,
             # request_timeout = 1
         )
+        import pdb; pdb.set_trace()
         return response.choices[0]["message"]["content"]
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))

envs/mujoco/ant_translator.py CHANGED Viewed

@@ -94,8 +94,7 @@ class GameDescriber:
     def describe_action(self):
         return (
-            "Your next move: \n Please provide a list of eight numerical values, each within the range of [-1,1], "
-            "representing the torques to be applied at the eight hinge joints of the ant."
         )
 class BasicStateSequenceTranslator(BasicLevelTranslator):

     def describe_action(self):
         return (
+            "Your next move: \n Please choose your action which applies torques at the eight hinge joints of the ant. It be a list of eight numerical values and  each value is within the range of [-1,1]."
         )
 class BasicStateSequenceTranslator(BasicLevelTranslator):