Spaces:

arthuroe
/

askmydocs

Sleeping

App Files Files Community

askmydocs / openrouter_llm.py

arthuroe

Create openrouter_llm.py

e85f548 verified 7 months ago

raw

history blame contribute delete

13.7 kB

	import os
	import logging
	import json
	import requests
	from typing import List, Dict, Any, Optional, Union

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class OpenRouterFreeAdapter:
	"""Adapter for accessing only free LLMs through OpenRouter.ai API"""

	def __init__(
	self,
	api_key: str = None,
	base_url: str = "https://openrouter.ai/api/v1"
	):
	"""
	Initialize the OpenRouter adapter for free models only.

	Args:
	api_key: OpenRouter API key. If None, will try to load from environment.
	base_url: Base URL for the OpenRouter API.
	"""
	self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
	if not self.api_key:
	logger.warning(
	"No OpenRouter API key provided. Using limited free access.")

	self.base_url = base_url
	self.app_url = ""

	# Get app info for better tracking
	self.app_name = os.getenv("APP_NAME", "AskMyDocs")

	self.update_best_free_model()

	def update_best_free_model(self) -> bool:
	"""
	Find and set the best available free model.

	Returns:
	Boolean indicating success.
	"""
	free_models = self.list_free_models()

	if not free_models:
	# If API call fails, use fallback list of known free models
	logger.warning(
	"Could not retrieve free models list. Using fallback models.")
	self.model = self._get_fallback_model()
	return False

	# Sort models by preference:
	# 1. Llama 4 models (highest priority)
	# 2. Gemini models
	# 3. Mistral models
	# 4. DeepSeek models
	# 5. Others
	ranked_models = self._rank_free_models(free_models)

	if ranked_models:
	self.model = ranked_models[0]["id"]
	logger.info(f"Selected free model: {self.model}")
	return True
	else:
	self.model = self._get_fallback_model()
	logger.warning(
	f"No suitable free models found. Using fallback: {self.model}")
	return False

	def _rank_free_models(self, free_models: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	"""
	Rank free models by preference for document QA tasks.

	Args:
	free_models: List of free model dictionaries.

	Returns:
	Sorted list of models by preference.
	"""
	# Define preference tiers
	tier_1_patterns = ["llama-4", "llama4"]
	tier_2_patterns = ["gemini", "claude"]
	tier_3_patterns = ["mistral", "mixtral"]
	tier_4_patterns = ["deepseek"]

	# Helper function to determine tier
	def get_model_tier(model_id: str) -> int:
	model_id_lower = model_id.lower()

	# Check for free tag/suffix
	is_free = ":free" in model_id_lower or "-free" in model_id_lower
	if not is_free:
	return 99 # Deprioritize non-free models

	# Check pattern matches
	for pattern in tier_1_patterns:
	if pattern in model_id_lower:
	return 1

	for pattern in tier_2_patterns:
	if pattern in model_id_lower:
	return 2

	for pattern in tier_3_patterns:
	if pattern in model_id_lower:
	return 3

	for pattern in tier_4_patterns:
	if pattern in model_id_lower:
	return 4

	return 5 # Other free models

	# Sort by tier, then by context length (longer is better)
	ranked_models = sorted(
	free_models,
	key=lambda m: (
	get_model_tier(m["id"]),
	# Negative to sort in descending order
	-m.get("context_length", 0)
	)
	)

	return ranked_models

	def _get_fallback_model(self) -> str:
	"""
	Get a fallback model if API calls fail.

	Returns:
	Model ID string for a known free model.
	"""
	# List of known free models, ordered by preference
	fallback_models = [
	"meta-llama/llama-4-scout:free",
	"google/gemini-2.5-pro-exp-03-25:free",
	"mistralai/mistral-small-3.1-24b-instruct:free",
	"deepseek/deepseek-v3-base:free",
	"nousresearch/deephermes-3-llama-3-8b-preview:free",
	"huggingfaceh4/zephyr-7b-beta" # Always fallback to this older but reliable one
	]

	return fallback_models[0]

	def _get_headers(self) -> Dict[str, str]:
	"""
	Get headers for OpenRouter API requests.

	Returns:
	Dictionary of headers.
	"""
	headers = {
	"Content-Type": "application/json"
	}

	# Add API key if available
	if self.api_key:
	headers["Authorization"] = f"Bearer {self.api_key}"

	headers["HTTP-Referer"] = self.app_url
	headers["X-Title"] = self.app_name

	return headers

	def list_models(self) -> List[Dict[str, Any]]:
	"""
	List available models on OpenRouter.

	Returns:
	List of model information dictionaries.
	"""
	try:
	headers = self._get_headers()

	response = requests.get(
	f"{self.base_url}/models",
	headers=headers
	)

	if response.status_code == 200:
	return response.json().get("data", [])
	else:
	logger.error(
	f"Error listing models: {response.status_code} - {response.text}"
	)
	return []

	except Exception as e:
	logger.error(f"Exception listing models: {str(e)}")
	return []

	def list_free_models(self) -> List[Dict[str, Any]]:
	"""
	List models that are free to use on OpenRouter.

	Returns:
	List of free model information dictionaries.
	"""
	# Get all models
	models = self.list_models()

	# Filter for free models - looking for multiple indicators
	free_models = []
	for model in models:
	model_id = model.get("id", "").lower()
	pricing = model.get("pricing", {})

	# Check various indicators that a model is free
	is_free = False

	# Check for explicit free tag in model ID
	if ":free" in model_id or "-free" in model_id:
	is_free = True

	# Check for zero pricing
	elif (pricing.get("prompt") == 0 and pricing.get("completion") == 0):
	is_free = True

	# Check for free_tier indicator if present
	elif model.get("free_tier", False):
	is_free = True

	if is_free:
	free_models.append(model)

	# Log the number of free models found
	logger.info(f"Found {len(free_models)} free models on OpenRouter")

	return free_models

	def _handle_streaming_response(self, response):
	"""
	Handle streaming response from OpenRouter API.

	Args:
	response: Response object from requests.

	Returns:
	Combined text from streaming response.
	"""
	result = ""

	for line in response.iter_lines():
	if line:
	line_text = line.decode('utf-8')

	# Remove the "data: " prefix
	if line_text.startswith("data: "):
	line_text = line_text[6:]

	# Skip keep-alive lines
	if line_text.strip() == "[DONE]":
	break

	try:
	# Parse the JSON
	json_data = json.loads(line_text)

	# Extract the text
	if "choices" in json_data and json_data["choices"]:
	delta = json_data["choices"][0].get("delta", {})
	if "content" in delta:
	result += delta["content"]
	except json.JSONDecodeError:
	pass

	return result

	def generate(
	self,
	prompt: str,
	temperature: float = 0.0,
	max_tokens: int = 1000,
	stream: bool = False
	) -> str:
	"""
	Generate text using OpenRouter API with a free model.

	Args:
	prompt: The prompt to send to the model.
	temperature: Controls randomness. Lower is more deterministic.
	max_tokens: Maximum number of tokens to generate.
	stream: Whether to stream the response.

	Returns:
	Generated text from the model.
	"""
	# Ensure we have a model selected
	if not self.model:
	self.update_best_free_model()

	# If still no model, return error
	if not self.model:
	return "Error: No free models available on OpenRouter."

	try:
	headers = self._get_headers()

	# Use OpenAI-compatible format for the request
	payload = {
	"model": self.model,
	"messages": [
	{"role": "user", "content": prompt}
	],
	"temperature": temperature,
	"max_tokens": max_tokens,
	"stream": stream
	}

	response = requests.post(
	f"{self.base_url}/chat/completions",
	headers=headers,
	json=payload
	)

	if response.status_code == 200:
	if stream:
	# Handle streaming response
	return self._handle_streaming_response(response)
	else:
	# Handle regular response
	content = response.json(
	)["choices"][0]["message"]["content"]
	# Log model usage for tracking
	usage = response.json().get("usage", {})
	logger.info(
	f"Used model {self.model} - Input: {usage.get('prompt_tokens', 0)}, Output: {usage.get('completion_tokens', 0)}")
	return content
	else:
	error_info = f"Error {response.status_code}"
	try:
	error_detail = response.json()
	error_message = error_detail.get(
	"error", {}).get("message", "Unknown error")
	error_info = f"{error_info}: {error_message}"
	except:
	error_info = f"{error_info}: {response.text}"

	logger.error(f"Error generating text: {error_info}")

	# Check for specific error cases
	if "rate limit" in error_info.lower():
	return "Error: Rate limit exceeded for this free model. Please try again later or try a different model."

	# If there's an issue with the model, try to get a different one
	if "model" in error_info.lower() or "no endpoints" in error_info.lower():
	prev_model = self.model
	if self.update_best_free_model() and self.model != prev_model:
	logger.info(
	f"Retrying with different free model: {self.model}")
	return self.generate(prompt, temperature, max_tokens, stream)

	return f"Error: Failed to generate response. {error_info}"

	except Exception as e:
	logger.error(f"Exception during text generation: {str(e)}")
	return f"Error: {str(e)}"


	class OpenRouterFreeChain:
	"""Chain for handling Q&A with OpenRouter free LLMs"""

	def __init__(self, adapter: OpenRouterFreeAdapter):
	"""
	Initialize the OpenRouter free chain.

	Args:
	adapter: An initialized OpenRouterFreeAdapter.
	"""
	self.adapter = adapter

	def create_prompt(self, query: str, context: List[str]) -> str:
	"""
	Create a prompt for the LLM based on the query and context.

	Args:
	query: The user's question.
	context: List of document contents to provide as context.

	Returns:
	Formatted prompt string.
	"""
	context_str = "\n\n".join(
	[f"Document {i+1}:\n{doc}" for i, doc in enumerate(context)])

	prompt = f"""You are an AI assistant answering questions based on the provided documents.

	Context information:
	{context_str}

	Based on the above context, please answer the following question:
	{query}

	If the information to answer the question is not contained in the provided documents, respond with: "I don't have enough information in the provided documents to answer this question."

	Answer:"""

	return prompt

	def run(self, query: str, context: List[str]) -> str:
	"""
	Run the chain to get an answer.

	Args:
	query: The user's question.
	context: List of document contents to provide as context.

	Returns:
	Answer from the model.
	"""
	prompt = self.create_prompt(query, context)
	return self.adapter.generate(prompt)


	def get_best_free_model() -> str:
	"""
	Get the best available free model from OpenRouter.

	Returns:
	Model ID string for the recommended free model.
	"""
	adapter = OpenRouterFreeAdapter()
	adapter.update_best_free_model()
	return adapter.model