from openai import OpenAI from config import get_api_keys import logging import tiktoken import time import asyncio logger = logging.getLogger(__name__) api_keys = get_api_keys() or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1") # Token encoding encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") def limit_tokens(input_string, token_limit=6000): return encoding.decode(encoding.encode(input_string)[:token_limit]) def calculate_tokens(msgs): return sum(len(encoding.encode(str(m))) for m in msgs) # In-memory storage for conversations conversations = {} last_activity = {} async def clear_inactive_conversations(): while True: current_time = time.time() inactive_convos = [conv_id for conv_id, last_time in last_activity.items() if current_time - last_time > 3600*24] # 24 hour for conv_id in inactive_convos: if conv_id in conversations: del conversations[conv_id] if conv_id in last_activity: del last_activity[conv_id] logger.info(f"Cleared {len(inactive_convos)} inactive conversations") await asyncio.sleep(600) # Check every hour def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500): logger.info(f"Starting chat with model: {model}") while calculate_tokens(messages) > (8000 - max_output_tokens): if len(messages) > max_llm_history: messages = [messages[0]] + messages[-max_llm_history:] else: max_llm_history -= 1 if max_llm_history < 2: error_message = "Token limit exceeded. Please shorten your input or start a new conversation." logger.error(error_message) raise Exception(error_message) try: response = or_client.chat.completions.create( model=model, messages=messages, max_tokens=max_output_tokens, stream=True ) full_response = "" for chunk in response: if chunk.choices[0].delta.content is not None: content = chunk.choices[0].delta.content full_response += content yield content # After streaming, add the full response to the conversation history messages.append({"role": "assistant", "content": full_response}) logger.info("Chat completed successfully") except Exception as e: logger.error(f"Error in model response: {str(e)}") raise Exception(f"Error in model response: {str(e)}")