import time from openai.error import RateLimitError from autogpt import token_counter from autogpt.config import Config from autogpt.llm_utils import create_chat_completion from autogpt.logs import logger cfg = Config() def create_chat_message(role, content): """ Create a chat message with the given role and content. Args: role (str): The role of the message sender, e.g., "system", "user", or "assistant". content (str): The content of the message. Returns: dict: A dictionary containing the role and content of the message. """ return {"role": role, "content": content} def generate_context(prompt, relevant_memory, full_message_history, model): current_context = [ create_chat_message("system", prompt), create_chat_message( "system", f"The current time and date is {time.strftime('%c')}" ), create_chat_message( "system", f"This reminds you of these events from your past:\n{relevant_memory}\n\n", ), ] # Add messages from the full message history until we reach the token limit next_message_to_add_index = len(full_message_history) - 1 insertion_index = len(current_context) # Count the currently used tokens current_tokens_used = token_counter.count_message_tokens(current_context, model) return ( next_message_to_add_index, current_tokens_used, insertion_index, current_context, ) # TODO: Change debug from hardcode to argument def chat_with_ai( prompt, user_input, full_message_history, permanent_memory, token_limit ): """Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory.""" while True: try: """ Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory. Args: prompt (str): The prompt explaining the rules to the AI. user_input (str): The input from the user. full_message_history (list): The list of all messages sent between the user and the AI. permanent_memory (Obj): The memory object containing the permanent memory. token_limit (int): The maximum number of tokens allowed in the API call. Returns: str: The AI's response. """ model = cfg.fast_llm_model # TODO: Change model from hardcode to argument # Reserve 1000 tokens for the response logger.debug(f"Token limit: {token_limit}") send_token_limit = token_limit - 1000 relevant_memory = ( "" if len(full_message_history) == 0 else permanent_memory.get_relevant(str(full_message_history[-9:]), 10) ) logger.debug(f"Memory Stats: {permanent_memory.get_stats()}") ( next_message_to_add_index, current_tokens_used, insertion_index, current_context, ) = generate_context(prompt, relevant_memory, full_message_history, model) while current_tokens_used > 2500: # remove memories until we are under 2500 tokens relevant_memory = relevant_memory[:-1] ( next_message_to_add_index, current_tokens_used, insertion_index, current_context, ) = generate_context( prompt, relevant_memory, full_message_history, model ) current_tokens_used += token_counter.count_message_tokens( [create_chat_message("user", user_input)], model ) # Account for user input (appended later) while next_message_to_add_index >= 0: # print (f"CURRENT TOKENS USED: {current_tokens_used}") message_to_add = full_message_history[next_message_to_add_index] tokens_to_add = token_counter.count_message_tokens( [message_to_add], model ) if current_tokens_used + tokens_to_add > send_token_limit: break # Add the most recent message to the start of the current context, # after the two system prompts. current_context.insert( insertion_index, full_message_history[next_message_to_add_index] ) # Count the currently used tokens current_tokens_used += tokens_to_add # Move to the next most recent message in the full message history next_message_to_add_index -= 1 # Append user input, the length of this is accounted for above current_context.extend([create_chat_message("user", user_input)]) # Calculate remaining tokens tokens_remaining = token_limit - current_tokens_used # assert tokens_remaining >= 0, "Tokens remaining is negative. # This should never happen, please submit a bug report at # https://www.github.com/Torantulino/Auto-GPT" # Debug print the current context logger.debug(f"Token limit: {token_limit}") logger.debug(f"Send Token Count: {current_tokens_used}") logger.debug(f"Tokens remaining for response: {tokens_remaining}") logger.debug("------------ CONTEXT SENT TO AI ---------------") for message in current_context: # Skip printing the prompt if message["role"] == "system" and message["content"] == prompt: continue logger.debug(f"{message['role'].capitalize()}: {message['content']}") logger.debug("") logger.debug("----------- END OF CONTEXT ----------------") # TODO: use a model defined elsewhere, so that model can contain # temperature and other settings we care about assistant_reply = create_chat_completion( model=model, messages=current_context, max_tokens=tokens_remaining, ) # Update full message history full_message_history.append(create_chat_message("user", user_input)) full_message_history.append( create_chat_message("assistant", assistant_reply) ) return assistant_reply except RateLimitError: # TODO: When we switch to langchain, this is built in print("Error: ", "API Rate Limit Reached. Waiting 10 seconds...") time.sleep(10)