specialized-agents-api

Runtime error

App Files Files Community

specialized-agents-api / llm_utils.py

pvanand

Update llm_utils.py

be1349b verified 10 months ago

raw

history blame contribute delete

2.67 kB

	from openai import OpenAI
	from config import get_api_keys
	import logging
	import tiktoken
	import time
	import asyncio

	logger = logging.getLogger(__name__)
	api_keys = get_api_keys()
	or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")


	# Token encoding
	encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

	def limit_tokens(input_string, token_limit=6000):
	return encoding.decode(encoding.encode(input_string)[:token_limit])

	def calculate_tokens(msgs):
	return sum(len(encoding.encode(str(m))) for m in msgs)

	# In-memory storage for conversations
	conversations = {}
	last_activity = {}

	async def clear_inactive_conversations():
	while True:
	current_time = time.time()
	inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
	if current_time - last_time > 3600*24] # 24 hour
	for conv_id in inactive_convos:
	if conv_id in conversations:
	del conversations[conv_id]
	if conv_id in last_activity:
	del last_activity[conv_id]
	logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
	await asyncio.sleep(600) # Check every hour


	def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
	logger.info(f"Starting chat with model: {model}")
	while calculate_tokens(messages) > (8000 - max_output_tokens):
	if len(messages) > max_llm_history:
	messages = [messages[0]] + messages[-max_llm_history:]
	else:
	max_llm_history -= 1
	if max_llm_history < 2:
	error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
	logger.error(error_message)
	raise Exception(error_message)

	try:
	response = or_client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=max_output_tokens,
	stream=True
	)

	full_response = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	content = chunk.choices[0].delta.content
	full_response += content
	yield content

	# After streaming, add the full response to the conversation history
	messages.append({"role": "assistant", "content": full_response})
	logger.info("Chat completed successfully")
	except Exception as e:
	logger.error(f"Error in model response: {str(e)}")
	raise Exception(f"Error in model response: {str(e)}")