Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

hzxwonder commited on Nov 29, 2023

Commit

c640769

1 Parent(s): 30becf4

update

Browse files

Files changed (4) hide show

deciders/utils.py +26 -25
distillers/guider.py +4 -4
distillers/self_reflection.py +1 -1
distillers/traj_prompt_summarizer.py +1 -1

deciders/utils.py CHANGED Viewed

@@ -25,23 +25,23 @@ def run_chain(chain, *args, **kwargs):
     return chain.run(*args, **kwargs)
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None) -> str:
-    response = openai.Completion.create(
-                model=engine,
-                engine=engine,
-                prompt=prompt,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                top_p=1,
-                frequency_penalty=0.0,
-                presence_penalty=0.0,
-                stop=stop_strs,
-                # request_timeout = 1
-            )
-    return response.choices[0].text
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None, is_batched: bool = False) -> str:
     assert model != "text-davinci-003"
     messages = [
         {
@@ -49,14 +49,15 @@ def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-tur
             "content": prompt
         }
     ]
-    response = openai.ChatCompletion.create(
-        model=model,
-        engine=engine,
-        messages=messages,
-        max_tokens=max_tokens,
-        stop=stop_strs,
-        temperature=temperature,
-        # request_timeout = 1
-    )
-    return response.choices[0]["message"]["content"]

     return chain.run(*args, **kwargs)
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def get_completion(prompt: str, api_type: str = "azure", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None) -> str:
+    if api_type == "azure":
+        response = openai.Completion.create(
+                    engine=engine,
+                    prompt=prompt,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=1,
+                    frequency_penalty=0.0,
+                    presence_penalty=0.0,
+                    stop=stop_strs,
+                    # request_timeout = 1
+                )
+        return response.choices[0].text
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def get_chat(prompt: str, api_type: str = "azure", model: str = "gpt-35-turbo", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None, is_batched: bool = False) -> str:
     assert model != "text-davinci-003"
     messages = [
         {
             "content": prompt
         }
     ]
+    if api_type == "azure":
+        response = openai.ChatCompletion.create(
+            model=model,
+            engine=engine,
+            messages=messages,
+            max_tokens=max_tokens,
+            stop=stop_strs,
+            temperature=temperature,
+            # request_timeout = 1
+        )
+        return response.choices[0]["message"]["content"]

distillers/guider.py CHANGED Viewed

@@ -67,7 +67,7 @@ class Guidance_Generator():
     def generate_summary(self, traj, post_memory):
         query = self._generate_summary_query(traj, post_memory)
-        summary = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The summary prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The summary response is: {summary}.')
         return summary
@@ -78,7 +78,7 @@ class Guidance_Generator():
             for i, m in enumerate(post_memory):
                 query += f'Episode #{i}: {m}\n'
         query += '\n Identify and summarize the key information that can be exploited to improve performance of the player.'
-        insight = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
         # import pdb;pdb.set_trace()
         logger.info(f'[Reflexion Memory]The insight prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The insight response is: {insight}.')
@@ -96,7 +96,7 @@ class Guidance_Generator():
         query += f"\n The main aim for you is to help the human player win the game in the last episode. He has only {max(max_num_trials-len(post_memory), 1)} episodes left to try.You can give suggestions before each episode. Then what is your suggestion for his next episode? Please provide simple, concise answers suitable for a six-year-old child, focusing on the following in item list format: 1. What game-relevant knowledge is critical to determine the optimal policy. Notice that the knowledge should be obtainable by interacting with the environment and helpful for the decisions.\n 2. How should the player conduct exploration in the next episode to acquire this information?\n3. How can the player exploit the information obtained to achieve higher performance in subsequent episodes?\n 4. How should exploration and exploitation be balanced to improve performance in the next episode?\n"
         # TODO: consider the inconsistency between past suggestion and past memory.
-        suggestion = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
         self.suggestion = suggestion
         logger.info(f'[Reflexion Memory]The suggestion prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The suggestion response is: {suggestion}.')
@@ -107,7 +107,7 @@ class Guidance_Generator():
             reflection_query = self._generate_summary_query(traj, memory[-max_len_mem:])
         else:
             reflection_query = self._generate_summary_query(traj, memory)
-        reflection = get_completion(reflection_query, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
         logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection

     def generate_summary(self, traj, post_memory):
         query = self._generate_summary_query(traj, post_memory)
+        summary = get_chat(query, api_type=self.args.api_type, model=self.args.gpt_version, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The summary prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The summary response is: {summary}.')
         return summary
             for i, m in enumerate(post_memory):
                 query += f'Episode #{i}: {m}\n'
         query += '\n Identify and summarize the key information that can be exploited to improve performance of the player.'
+        insight = get_chat(query, api_type=self.args.api_type, model=self.args.gpt_version, engine=self.args.gpt_version)
         # import pdb;pdb.set_trace()
         logger.info(f'[Reflexion Memory]The insight prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The insight response is: {insight}.')
         query += f"\n The main aim for you is to help the human player win the game in the last episode. He has only {max(max_num_trials-len(post_memory), 1)} episodes left to try.You can give suggestions before each episode. Then what is your suggestion for his next episode? Please provide simple, concise answers suitable for a six-year-old child, focusing on the following in item list format: 1. What game-relevant knowledge is critical to determine the optimal policy. Notice that the knowledge should be obtainable by interacting with the environment and helpful for the decisions.\n 2. How should the player conduct exploration in the next episode to acquire this information?\n3. How can the player exploit the information obtained to achieve higher performance in subsequent episodes?\n 4. How should exploration and exploitation be balanced to improve performance in the next episode?\n"
         # TODO: consider the inconsistency between past suggestion and past memory.
+        suggestion = get_chat(query, api_type=self.args.api_type, model=self.args.gpt_version, engine=self.args.gpt_version)
         self.suggestion = suggestion
         logger.info(f'[Reflexion Memory]The suggestion prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The suggestion response is: {suggestion}.')
             reflection_query = self._generate_summary_query(traj, memory[-max_len_mem:])
         else:
             reflection_query = self._generate_summary_query(traj, memory)
+        reflection = get_completion(reflection_query, api_type=self.args.api_type, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
         logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection

distillers/self_reflection.py CHANGED Viewed

@@ -53,7 +53,7 @@ class RefletionGenerator():
             reflection_query = self._generate_reflection_query(traj, memory[-max_len_mem:])
         else:
             reflection_query = self._generate_reflection_query(traj, memory)
-        reflection = get_completion(reflection_query, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
         logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection

             reflection_query = self._generate_reflection_query(traj, memory[-max_len_mem:])
         else:
             reflection_query = self._generate_reflection_query(traj, memory)
+        reflection = get_completion(reflection_query, api_type=self.args.api_type, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
         logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection

distillers/traj_prompt_summarizer.py CHANGED Viewed

@@ -54,7 +54,7 @@ class TrajPromptSummarizer():
             reflection_query = self._generate_summary_query(traj, memory[-max_len_mem:])
         else:
             reflection_query = self._generate_summary_query(traj, memory)
-        reflection = get_completion(reflection_query, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
         logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection

             reflection_query = self._generate_summary_query(traj, memory[-max_len_mem:])
         else:
             reflection_query = self._generate_summary_query(traj, memory)
+        reflection = get_completion(reflection_query, api_type=self.args.api_type, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
         logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection