|
|
|
import threading, requests |
|
from typing import Callable, List, Optional, Dict, Union, Any |
|
from litellm.caching import Cache |
|
from litellm._logging import set_verbose |
|
from litellm.proxy._types import KeyManagementSystem |
|
import httpx |
|
|
|
input_callback: List[Union[str, Callable]] = [] |
|
success_callback: List[Union[str, Callable]] = [] |
|
failure_callback: List[Union[str, Callable]] = [] |
|
callbacks: List[Callable] = [] |
|
_async_input_callback: List[ |
|
Callable |
|
] = [] |
|
_async_success_callback: List[ |
|
Union[str, Callable] |
|
] = [] |
|
_async_failure_callback: List[ |
|
Callable |
|
] = [] |
|
pre_call_rules: List[Callable] = [] |
|
post_call_rules: List[Callable] = [] |
|
email: Optional[ |
|
str |
|
] = None |
|
token: Optional[ |
|
str |
|
] = None |
|
telemetry = True |
|
max_tokens = 256 |
|
drop_params = False |
|
retry = True |
|
api_key: Optional[str] = None |
|
openai_key: Optional[str] = None |
|
azure_key: Optional[str] = None |
|
anthropic_key: Optional[str] = None |
|
replicate_key: Optional[str] = None |
|
cohere_key: Optional[str] = None |
|
maritalk_key: Optional[str] = None |
|
ai21_key: Optional[str] = None |
|
openrouter_key: Optional[str] = None |
|
huggingface_key: Optional[str] = None |
|
vertex_project: Optional[str] = None |
|
vertex_location: Optional[str] = None |
|
togetherai_api_key: Optional[str] = None |
|
cloudflare_api_key: Optional[str] = None |
|
baseten_key: Optional[str] = None |
|
aleph_alpha_key: Optional[str] = None |
|
nlp_cloud_key: Optional[str] = None |
|
use_client: bool = False |
|
logging: bool = True |
|
caching: bool = False |
|
caching_with_models: bool = False |
|
cache: Optional[ |
|
Cache |
|
] = None |
|
model_alias_map: Dict[str, str] = {} |
|
model_group_alias_map: Dict[str, str] = {} |
|
max_budget: float = 0.0 |
|
_openai_completion_params = [ |
|
"functions", |
|
"function_call", |
|
"temperature", |
|
"temperature", |
|
"top_p", |
|
"n", |
|
"stream", |
|
"stop", |
|
"max_tokens", |
|
"presence_penalty", |
|
"frequency_penalty", |
|
"logit_bias", |
|
"user", |
|
"request_timeout", |
|
"api_base", |
|
"api_version", |
|
"api_key", |
|
"deployment_id", |
|
"organization", |
|
"base_url", |
|
"default_headers", |
|
"timeout", |
|
"response_format", |
|
"seed", |
|
"tools", |
|
"tool_choice", |
|
"max_retries", |
|
] |
|
_litellm_completion_params = [ |
|
"metadata", |
|
"acompletion", |
|
"caching", |
|
"mock_response", |
|
"api_key", |
|
"api_version", |
|
"api_base", |
|
"force_timeout", |
|
"logger_fn", |
|
"verbose", |
|
"custom_llm_provider", |
|
"litellm_logging_obj", |
|
"litellm_call_id", |
|
"use_client", |
|
"id", |
|
"fallbacks", |
|
"azure", |
|
"headers", |
|
"model_list", |
|
"num_retries", |
|
"context_window_fallback_dict", |
|
"roles", |
|
"final_prompt_value", |
|
"bos_token", |
|
"eos_token", |
|
"request_timeout", |
|
"complete_response", |
|
"self", |
|
"client", |
|
"rpm", |
|
"tpm", |
|
"input_cost_per_token", |
|
"output_cost_per_token", |
|
"hf_model_name", |
|
"model_info", |
|
"proxy_server_request", |
|
"preset_cache_key", |
|
] |
|
_current_cost = 0 |
|
error_logs: Dict = {} |
|
add_function_to_prompt: bool = False |
|
client_session: Optional[httpx.Client] = None |
|
aclient_session: Optional[httpx.AsyncClient] = None |
|
model_fallbacks: Optional[List] = None |
|
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" |
|
suppress_debug_info = False |
|
dynamodb_table_name: Optional[str] = None |
|
s3_callback_params: Optional[Dict] = None |
|
|
|
request_timeout: Optional[float] = 6000 |
|
num_retries: Optional[int] = None |
|
fallbacks: Optional[List] = None |
|
context_window_fallbacks: Optional[List] = None |
|
allowed_fails: int = 0 |
|
num_retries_per_request: Optional[ |
|
int |
|
] = None |
|
|
|
secret_manager_client: Optional[ |
|
Any |
|
] = None |
|
_google_kms_resource_name: Optional[str] = None |
|
_key_management_system: Optional[KeyManagementSystem] = None |
|
|
|
|
|
|
|
def get_model_cost_map(url: str): |
|
try: |
|
with requests.get( |
|
url, timeout=5 |
|
) as response: |
|
response.raise_for_status() |
|
content = response.json() |
|
return content |
|
except Exception as e: |
|
import importlib.resources |
|
import json |
|
|
|
with importlib.resources.open_text( |
|
"litellm", "model_prices_and_context_window_backup.json" |
|
) as f: |
|
content = json.load(f) |
|
return content |
|
|
|
|
|
model_cost = get_model_cost_map(url=model_cost_map_url) |
|
custom_prompt_dict: Dict[str, dict] = {} |
|
|
|
|
|
|
|
class MyLocal(threading.local): |
|
def __init__(self): |
|
self.user = "Hello World" |
|
|
|
|
|
_thread_context = MyLocal() |
|
|
|
|
|
def identify(event_details): |
|
|
|
if "user" in event_details: |
|
_thread_context.user = event_details["user"] |
|
|
|
|
|
|
|
api_base = None |
|
headers = None |
|
api_version = None |
|
organization = None |
|
config_path = None |
|
|
|
open_ai_chat_completion_models: List = [] |
|
open_ai_text_completion_models: List = [] |
|
cohere_models: List = [] |
|
anthropic_models: List = [] |
|
openrouter_models: List = [] |
|
vertex_language_models: List = [] |
|
vertex_vision_models: List = [] |
|
vertex_chat_models: List = [] |
|
vertex_code_chat_models: List = [] |
|
vertex_text_models: List = [] |
|
vertex_code_text_models: List = [] |
|
ai21_models: List = [] |
|
nlp_cloud_models: List = [] |
|
aleph_alpha_models: List = [] |
|
bedrock_models: List = [] |
|
deepinfra_models: List = [] |
|
perplexity_models: List = [] |
|
for key, value in model_cost.items(): |
|
if value.get("litellm_provider") == "openai": |
|
open_ai_chat_completion_models.append(key) |
|
elif value.get("litellm_provider") == "text-completion-openai": |
|
open_ai_text_completion_models.append(key) |
|
elif value.get("litellm_provider") == "cohere": |
|
cohere_models.append(key) |
|
elif value.get("litellm_provider") == "anthropic": |
|
anthropic_models.append(key) |
|
elif value.get("litellm_provider") == "openrouter": |
|
openrouter_models.append(key) |
|
elif value.get("litellm_provider") == "vertex_ai-text-models": |
|
vertex_text_models.append(key) |
|
elif value.get("litellm_provider") == "vertex_ai-code-text-models": |
|
vertex_code_text_models.append(key) |
|
elif value.get("litellm_provider") == "vertex_ai-language-models": |
|
vertex_language_models.append(key) |
|
elif value.get("litellm_provider") == "vertex_ai-vision-models": |
|
vertex_vision_models.append(key) |
|
elif value.get("litellm_provider") == "vertex_ai-chat-models": |
|
vertex_chat_models.append(key) |
|
elif value.get("litellm_provider") == "vertex_ai-code-chat-models": |
|
vertex_code_chat_models.append(key) |
|
elif value.get("litellm_provider") == "ai21": |
|
ai21_models.append(key) |
|
elif value.get("litellm_provider") == "nlp_cloud": |
|
nlp_cloud_models.append(key) |
|
elif value.get("litellm_provider") == "aleph_alpha": |
|
aleph_alpha_models.append(key) |
|
elif value.get("litellm_provider") == "bedrock": |
|
bedrock_models.append(key) |
|
elif value.get("litellm_provider") == "deepinfra": |
|
deepinfra_models.append(key) |
|
elif value.get("litellm_provider") == "perplexity": |
|
perplexity_models.append(key) |
|
|
|
|
|
openai_compatible_endpoints: List = [ |
|
"api.perplexity.ai", |
|
"api.endpoints.anyscale.com/v1", |
|
"api.deepinfra.com/v1/openai", |
|
"api.mistral.ai/v1", |
|
] |
|
|
|
|
|
openai_compatible_providers: List = [ |
|
"anyscale", |
|
"mistral", |
|
"deepinfra", |
|
"perplexity", |
|
"xinference", |
|
] |
|
|
|
|
|
|
|
replicate_models: List = [ |
|
|
|
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", |
|
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", |
|
"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", |
|
|
|
"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", |
|
"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", |
|
|
|
"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f" |
|
|
|
"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", |
|
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", |
|
] |
|
|
|
huggingface_models: List = [ |
|
"meta-llama/Llama-2-7b-hf", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"meta-llama/Llama-2-13b-hf", |
|
"meta-llama/Llama-2-13b-chat-hf", |
|
"meta-llama/Llama-2-70b-hf", |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"meta-llama/Llama-2-7b", |
|
"meta-llama/Llama-2-7b-chat", |
|
"meta-llama/Llama-2-13b", |
|
"meta-llama/Llama-2-13b-chat", |
|
"meta-llama/Llama-2-70b", |
|
"meta-llama/Llama-2-70b-chat", |
|
] |
|
|
|
together_ai_models: List = [ |
|
|
|
"togethercomputer/llama-2-70b-chat", |
|
|
|
"togethercomputer/llama-2-70b", |
|
"togethercomputer/LLaMA-2-7B-32K", |
|
"togethercomputer/Llama-2-7B-32K-Instruct", |
|
"togethercomputer/llama-2-7b", |
|
|
|
"togethercomputer/falcon-40b-instruct", |
|
"togethercomputer/falcon-7b-instruct", |
|
|
|
"togethercomputer/alpaca-7b", |
|
|
|
"HuggingFaceH4/starchat-alpha", |
|
|
|
"togethercomputer/CodeLlama-34b", |
|
"togethercomputer/CodeLlama-34b-Instruct", |
|
"togethercomputer/CodeLlama-34b-Python", |
|
"defog/sqlcoder", |
|
"NumbersStation/nsql-llama-2-7B", |
|
"WizardLM/WizardCoder-15B-V1.0", |
|
"WizardLM/WizardCoder-Python-34B-V1.0", |
|
|
|
"NousResearch/Nous-Hermes-Llama2-13b", |
|
"Austism/chronos-hermes-13b", |
|
"upstage/SOLAR-0-70b-16bit", |
|
"WizardLM/WizardLM-70B-V1.0", |
|
] |
|
|
|
|
|
baseten_models: List = [ |
|
"qvv0xeq", |
|
"q841o8w", |
|
"31dxrj3", |
|
] |
|
|
|
|
|
|
|
|
|
|
|
azure_llms = { |
|
"gpt-35-turbo": "azure/gpt-35-turbo", |
|
"gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", |
|
"gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", |
|
} |
|
|
|
azure_embedding_models = { |
|
"ada": "azure/ada", |
|
} |
|
|
|
petals_models = [ |
|
"petals-team/StableBeluga2", |
|
] |
|
|
|
ollama_models = ["llama2"] |
|
|
|
maritalk_models = ["maritalk"] |
|
|
|
model_list = ( |
|
open_ai_chat_completion_models |
|
+ open_ai_text_completion_models |
|
+ cohere_models |
|
+ anthropic_models |
|
+ replicate_models |
|
+ openrouter_models |
|
+ huggingface_models |
|
+ vertex_chat_models |
|
+ vertex_text_models |
|
+ ai21_models |
|
+ together_ai_models |
|
+ baseten_models |
|
+ aleph_alpha_models |
|
+ nlp_cloud_models |
|
+ ollama_models |
|
+ bedrock_models |
|
+ deepinfra_models |
|
+ perplexity_models |
|
+ maritalk_models |
|
) |
|
|
|
provider_list: List = [ |
|
"openai", |
|
"custom_openai", |
|
"text-completion-openai", |
|
"cohere", |
|
"anthropic", |
|
"replicate", |
|
"huggingface", |
|
"together_ai", |
|
"openrouter", |
|
"vertex_ai", |
|
"palm", |
|
"gemini", |
|
"ai21", |
|
"baseten", |
|
"azure", |
|
"sagemaker", |
|
"bedrock", |
|
"vllm", |
|
"nlp_cloud", |
|
"petals", |
|
"oobabooga", |
|
"ollama", |
|
"ollama_chat", |
|
"deepinfra", |
|
"perplexity", |
|
"anyscale", |
|
"mistral", |
|
"maritalk", |
|
"voyage", |
|
"cloudflare", |
|
"xinference", |
|
"custom", |
|
] |
|
|
|
models_by_provider: dict = { |
|
"openai": open_ai_chat_completion_models + open_ai_text_completion_models, |
|
"cohere": cohere_models, |
|
"anthropic": anthropic_models, |
|
"replicate": replicate_models, |
|
"huggingface": huggingface_models, |
|
"together_ai": together_ai_models, |
|
"baseten": baseten_models, |
|
"openrouter": openrouter_models, |
|
"vertex_ai": vertex_chat_models + vertex_text_models, |
|
"ai21": ai21_models, |
|
"bedrock": bedrock_models, |
|
"petals": petals_models, |
|
"ollama": ollama_models, |
|
"deepinfra": deepinfra_models, |
|
"perplexity": perplexity_models, |
|
"maritalk": maritalk_models, |
|
} |
|
|
|
|
|
longer_context_model_fallback_dict: dict = { |
|
|
|
"gpt-3.5-turbo": "gpt-3.5-turbo-16k", |
|
"gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", |
|
"gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", |
|
"gpt-4": "gpt-4-32k", |
|
"gpt-4-0314": "gpt-4-32k-0314", |
|
"gpt-4-0613": "gpt-4-32k-0613", |
|
|
|
"claude-instant-1": "claude-2", |
|
"claude-instant-1.2": "claude-2", |
|
|
|
"chat-bison": "chat-bison-32k", |
|
"chat-bison@001": "chat-bison-32k", |
|
"codechat-bison": "codechat-bison-32k", |
|
"codechat-bison@001": "codechat-bison-32k", |
|
|
|
"openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", |
|
"openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", |
|
} |
|
|
|
|
|
open_ai_embedding_models: List = ["text-embedding-ada-002"] |
|
cohere_embedding_models: List = [ |
|
"embed-english-v3.0", |
|
"embed-english-light-v3.0", |
|
"embed-multilingual-v3.0", |
|
"embed-english-v2.0", |
|
"embed-english-light-v2.0", |
|
"embed-multilingual-v2.0", |
|
] |
|
bedrock_embedding_models: List = [ |
|
"amazon.titan-embed-text-v1", |
|
"cohere.embed-english-v3", |
|
"cohere.embed-multilingual-v3", |
|
] |
|
|
|
all_embedding_models = ( |
|
open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models |
|
) |
|
|
|
|
|
openai_image_generation_models = ["dall-e-2", "dall-e-3"] |
|
|
|
|
|
from .timeout import timeout |
|
from .utils import ( |
|
client, |
|
exception_type, |
|
get_optional_params, |
|
modify_integration, |
|
token_counter, |
|
cost_per_token, |
|
completion_cost, |
|
get_litellm_params, |
|
Logging, |
|
acreate, |
|
get_model_list, |
|
get_max_tokens, |
|
get_model_info, |
|
register_prompt_template, |
|
validate_environment, |
|
check_valid_key, |
|
get_llm_provider, |
|
register_model, |
|
encode, |
|
decode, |
|
_calculate_retry_after, |
|
_should_retry, |
|
get_secret, |
|
) |
|
from .llms.huggingface_restapi import HuggingfaceConfig |
|
from .llms.anthropic import AnthropicConfig |
|
from .llms.replicate import ReplicateConfig |
|
from .llms.cohere import CohereConfig |
|
from .llms.ai21 import AI21Config |
|
from .llms.together_ai import TogetherAIConfig |
|
from .llms.cloudflare import CloudflareConfig |
|
from .llms.palm import PalmConfig |
|
from .llms.gemini import GeminiConfig |
|
from .llms.nlp_cloud import NLPCloudConfig |
|
from .llms.aleph_alpha import AlephAlphaConfig |
|
from .llms.petals import PetalsConfig |
|
from .llms.vertex_ai import VertexAIConfig |
|
from .llms.sagemaker import SagemakerConfig |
|
from .llms.ollama import OllamaConfig |
|
from .llms.maritalk import MaritTalkConfig |
|
from .llms.bedrock import ( |
|
AmazonTitanConfig, |
|
AmazonAI21Config, |
|
AmazonAnthropicConfig, |
|
AmazonCohereConfig, |
|
AmazonLlamaConfig, |
|
) |
|
from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig |
|
from .llms.azure import AzureOpenAIConfig, AzureOpenAIError |
|
from .main import * |
|
from .integrations import * |
|
from .exceptions import ( |
|
AuthenticationError, |
|
InvalidRequestError, |
|
BadRequestError, |
|
NotFoundError, |
|
RateLimitError, |
|
ServiceUnavailableError, |
|
OpenAIError, |
|
ContextWindowExceededError, |
|
ContentPolicyViolationError, |
|
BudgetExceededError, |
|
APIError, |
|
Timeout, |
|
APIConnectionError, |
|
APIResponseValidationError, |
|
UnprocessableEntityError, |
|
) |
|
from .budget_manager import BudgetManager |
|
from .proxy.proxy_cli import run_server |
|
from .router import Router |
|
|