specialized-agents-api / llm_utils.py
pvanand's picture
Update llm_utils.py
be1349b verified
from openai import OpenAI
from config import get_api_keys
import logging
import tiktoken
import time
import asyncio
logger = logging.getLogger(__name__)
api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
# Token encoding
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
def limit_tokens(input_string, token_limit=6000):
return encoding.decode(encoding.encode(input_string)[:token_limit])
def calculate_tokens(msgs):
return sum(len(encoding.encode(str(m))) for m in msgs)
# In-memory storage for conversations
conversations = {}
last_activity = {}
async def clear_inactive_conversations():
while True:
current_time = time.time()
inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
if current_time - last_time > 3600*24] # 24 hour
for conv_id in inactive_convos:
if conv_id in conversations:
del conversations[conv_id]
if conv_id in last_activity:
del last_activity[conv_id]
logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
await asyncio.sleep(600) # Check every hour
def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
logger.info(f"Starting chat with model: {model}")
while calculate_tokens(messages) > (8000 - max_output_tokens):
if len(messages) > max_llm_history:
messages = [messages[0]] + messages[-max_llm_history:]
else:
max_llm_history -= 1
if max_llm_history < 2:
error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
logger.error(error_message)
raise Exception(error_message)
try:
response = or_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_output_tokens,
stream=True
)
full_response = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content
full_response += content
yield content
# After streaming, add the full response to the conversation history
messages.append({"role": "assistant", "content": full_response})
logger.info("Chat completed successfully")
except Exception as e:
logger.error(f"Error in model response: {str(e)}")
raise Exception(f"Error in model response: {str(e)}")