### INIT VARIABLES ### import threading, requests from typing import Callable, List, Optional, Dict, Union, Any from litellm.caching import Cache from litellm._logging import set_verbose from litellm.proxy._types import KeyManagementSystem import httpx input_callback: List[Union[str, Callable]] = [] success_callback: List[Union[str, Callable]] = [] failure_callback: List[Union[str, Callable]] = [] callbacks: List[Callable] = [] _async_input_callback: List[ Callable ] = [] # internal variable - async custom callbacks are routed here. _async_success_callback: List[ Union[str, Callable] ] = [] # internal variable - async custom callbacks are routed here. _async_failure_callback: List[ Callable ] = [] # internal variable - async custom callbacks are routed here. pre_call_rules: List[Callable] = [] post_call_rules: List[Callable] = [] email: Optional[ str ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 token: Optional[ str ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 telemetry = True max_tokens = 256 # OpenAI Defaults drop_params = False retry = True api_key: Optional[str] = None openai_key: Optional[str] = None azure_key: Optional[str] = None anthropic_key: Optional[str] = None replicate_key: Optional[str] = None cohere_key: Optional[str] = None maritalk_key: Optional[str] = None ai21_key: Optional[str] = None openrouter_key: Optional[str] = None huggingface_key: Optional[str] = None vertex_project: Optional[str] = None vertex_location: Optional[str] = None togetherai_api_key: Optional[str] = None cloudflare_api_key: Optional[str] = None baseten_key: Optional[str] = None aleph_alpha_key: Optional[str] = None nlp_cloud_key: Optional[str] = None use_client: bool = False logging: bool = True caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 cache: Optional[ Cache ] = None # cache object <- use this - https://docs.litellm.ai/docs/caching model_alias_map: Dict[str, str] = {} model_group_alias_map: Dict[str, str] = {} max_budget: float = 0.0 # set the max budget across all providers _openai_completion_params = [ "functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries", ] _litellm_completion_params = [ "metadata", "acompletion", "caching", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name", "model_info", "proxy_server_request", "preset_cache_key", ] _current_cost = 0 # private variable, used if max budget is set error_logs: Dict = {} add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt client_session: Optional[httpx.Client] = None aclient_session: Optional[httpx.AsyncClient] = None model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks' model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" suppress_debug_info = False dynamodb_table_name: Optional[str] = None s3_callback_params: Optional[Dict] = None #### RELIABILITY #### request_timeout: Optional[float] = 6000 num_retries: Optional[int] = None # per model endpoint fallbacks: Optional[List] = None context_window_fallbacks: Optional[List] = None allowed_fails: int = 0 num_retries_per_request: Optional[ int ] = None # for the request overall (incl. fallbacks + model retries) ####### SECRET MANAGERS ##################### secret_manager_client: Optional[ Any ] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc. _google_kms_resource_name: Optional[str] = None _key_management_system: Optional[KeyManagementSystem] = None ############################################# def get_model_cost_map(url: str): try: with requests.get( url, timeout=5 ) as response: # set a 5 second timeout for the get request response.raise_for_status() # Raise an exception if the request is unsuccessful content = response.json() return content except Exception as e: import importlib.resources import json with importlib.resources.open_text( "litellm", "model_prices_and_context_window_backup.json" ) as f: content = json.load(f) return content model_cost = get_model_cost_map(url=model_cost_map_url) custom_prompt_dict: Dict[str, dict] = {} ####### THREAD-SPECIFIC DATA ################### class MyLocal(threading.local): def __init__(self): self.user = "Hello World" _thread_context = MyLocal() def identify(event_details): # Store user in thread local data if "user" in event_details: _thread_context.user = event_details["user"] ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc. api_base = None headers = None api_version = None organization = None config_path = None ####### COMPLETION MODELS ################### open_ai_chat_completion_models: List = [] open_ai_text_completion_models: List = [] cohere_models: List = [] anthropic_models: List = [] openrouter_models: List = [] vertex_language_models: List = [] vertex_vision_models: List = [] vertex_chat_models: List = [] vertex_code_chat_models: List = [] vertex_text_models: List = [] vertex_code_text_models: List = [] ai21_models: List = [] nlp_cloud_models: List = [] aleph_alpha_models: List = [] bedrock_models: List = [] deepinfra_models: List = [] perplexity_models: List = [] for key, value in model_cost.items(): if value.get("litellm_provider") == "openai": open_ai_chat_completion_models.append(key) elif value.get("litellm_provider") == "text-completion-openai": open_ai_text_completion_models.append(key) elif value.get("litellm_provider") == "cohere": cohere_models.append(key) elif value.get("litellm_provider") == "anthropic": anthropic_models.append(key) elif value.get("litellm_provider") == "openrouter": openrouter_models.append(key) elif value.get("litellm_provider") == "vertex_ai-text-models": vertex_text_models.append(key) elif value.get("litellm_provider") == "vertex_ai-code-text-models": vertex_code_text_models.append(key) elif value.get("litellm_provider") == "vertex_ai-language-models": vertex_language_models.append(key) elif value.get("litellm_provider") == "vertex_ai-vision-models": vertex_vision_models.append(key) elif value.get("litellm_provider") == "vertex_ai-chat-models": vertex_chat_models.append(key) elif value.get("litellm_provider") == "vertex_ai-code-chat-models": vertex_code_chat_models.append(key) elif value.get("litellm_provider") == "ai21": ai21_models.append(key) elif value.get("litellm_provider") == "nlp_cloud": nlp_cloud_models.append(key) elif value.get("litellm_provider") == "aleph_alpha": aleph_alpha_models.append(key) elif value.get("litellm_provider") == "bedrock": bedrock_models.append(key) elif value.get("litellm_provider") == "deepinfra": deepinfra_models.append(key) elif value.get("litellm_provider") == "perplexity": perplexity_models.append(key) # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary openai_compatible_endpoints: List = [ "api.perplexity.ai", "api.endpoints.anyscale.com/v1", "api.deepinfra.com/v1/openai", "api.mistral.ai/v1", ] # this is maintained for Exception Mapping openai_compatible_providers: List = [ "anyscale", "mistral", "deepinfra", "perplexity", "xinference", ] # well supported replicate llms replicate_models: List = [ # llama replicate supported LLMs "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", # Vicuna "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", # Flan T-5 "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f" # Others "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", ] huggingface_models: List = [ "meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-hf", "meta-llama/Llama-2-13b-chat-hf", "meta-llama/Llama-2-70b-hf", "meta-llama/Llama-2-70b-chat-hf", "meta-llama/Llama-2-7b", "meta-llama/Llama-2-7b-chat", "meta-llama/Llama-2-13b", "meta-llama/Llama-2-13b-chat", "meta-llama/Llama-2-70b", "meta-llama/Llama-2-70b-chat", ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers together_ai_models: List = [ # llama llms - chat "togethercomputer/llama-2-70b-chat", # llama llms - language / instruct "togethercomputer/llama-2-70b", "togethercomputer/LLaMA-2-7B-32K", "togethercomputer/Llama-2-7B-32K-Instruct", "togethercomputer/llama-2-7b", # falcon llms "togethercomputer/falcon-40b-instruct", "togethercomputer/falcon-7b-instruct", # alpaca "togethercomputer/alpaca-7b", # chat llms "HuggingFaceH4/starchat-alpha", # code llms "togethercomputer/CodeLlama-34b", "togethercomputer/CodeLlama-34b-Instruct", "togethercomputer/CodeLlama-34b-Python", "defog/sqlcoder", "NumbersStation/nsql-llama-2-7B", "WizardLM/WizardCoder-15B-V1.0", "WizardLM/WizardCoder-Python-34B-V1.0", # language llms "NousResearch/Nous-Hermes-Llama2-13b", "Austism/chronos-hermes-13b", "upstage/SOLAR-0-70b-16bit", "WizardLM/WizardLM-70B-V1.0", ] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...) baseten_models: List = [ "qvv0xeq", "q841o8w", "31dxrj3", ] # FALCON 7B # WizardLM # Mosaic ML # used for Cost Tracking & Token counting # https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/ # Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting azure_llms = { "gpt-35-turbo": "azure/gpt-35-turbo", "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", } azure_embedding_models = { "ada": "azure/ada", } petals_models = [ "petals-team/StableBeluga2", ] ollama_models = ["llama2"] maritalk_models = ["maritalk"] model_list = ( open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models + replicate_models + openrouter_models + huggingface_models + vertex_chat_models + vertex_text_models + ai21_models + together_ai_models + baseten_models + aleph_alpha_models + nlp_cloud_models + ollama_models + bedrock_models + deepinfra_models + perplexity_models + maritalk_models ) provider_list: List = [ "openai", "custom_openai", "text-completion-openai", "cohere", "anthropic", "replicate", "huggingface", "together_ai", "openrouter", "vertex_ai", "palm", "gemini", "ai21", "baseten", "azure", "sagemaker", "bedrock", "vllm", "nlp_cloud", "petals", "oobabooga", "ollama", "ollama_chat", "deepinfra", "perplexity", "anyscale", "mistral", "maritalk", "voyage", "cloudflare", "xinference", "custom", # custom apis ] models_by_provider: dict = { "openai": open_ai_chat_completion_models + open_ai_text_completion_models, "cohere": cohere_models, "anthropic": anthropic_models, "replicate": replicate_models, "huggingface": huggingface_models, "together_ai": together_ai_models, "baseten": baseten_models, "openrouter": openrouter_models, "vertex_ai": vertex_chat_models + vertex_text_models, "ai21": ai21_models, "bedrock": bedrock_models, "petals": petals_models, "ollama": ollama_models, "deepinfra": deepinfra_models, "perplexity": perplexity_models, "maritalk": maritalk_models, } # mapping for those models which have larger equivalents longer_context_model_fallback_dict: dict = { # openai chat completion models "gpt-3.5-turbo": "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", "gpt-4": "gpt-4-32k", "gpt-4-0314": "gpt-4-32k-0314", "gpt-4-0613": "gpt-4-32k-0613", # anthropic "claude-instant-1": "claude-2", "claude-instant-1.2": "claude-2", # vertexai "chat-bison": "chat-bison-32k", "chat-bison@001": "chat-bison-32k", "codechat-bison": "codechat-bison-32k", "codechat-bison@001": "codechat-bison-32k", # openrouter "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", } ####### EMBEDDING MODELS ################### open_ai_embedding_models: List = ["text-embedding-ada-002"] cohere_embedding_models: List = [ "embed-english-v3.0", "embed-english-light-v3.0", "embed-multilingual-v3.0", "embed-english-v2.0", "embed-english-light-v2.0", "embed-multilingual-v2.0", ] bedrock_embedding_models: List = [ "amazon.titan-embed-text-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3", ] all_embedding_models = ( open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models ) ####### IMAGE GENERATION MODELS ################### openai_image_generation_models = ["dall-e-2", "dall-e-3"] from .timeout import timeout from .utils import ( client, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, get_litellm_params, Logging, acreate, get_model_list, get_max_tokens, get_model_info, register_prompt_template, validate_environment, check_valid_key, get_llm_provider, register_model, encode, decode, _calculate_retry_after, _should_retry, get_secret, ) from .llms.huggingface_restapi import HuggingfaceConfig from .llms.anthropic import AnthropicConfig from .llms.replicate import ReplicateConfig from .llms.cohere import CohereConfig from .llms.ai21 import AI21Config from .llms.together_ai import TogetherAIConfig from .llms.cloudflare import CloudflareConfig from .llms.palm import PalmConfig from .llms.gemini import GeminiConfig from .llms.nlp_cloud import NLPCloudConfig from .llms.aleph_alpha import AlephAlphaConfig from .llms.petals import PetalsConfig from .llms.vertex_ai import VertexAIConfig from .llms.sagemaker import SagemakerConfig from .llms.ollama import OllamaConfig from .llms.maritalk import MaritTalkConfig from .llms.bedrock import ( AmazonTitanConfig, AmazonAI21Config, AmazonAnthropicConfig, AmazonCohereConfig, AmazonLlamaConfig, ) from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig from .llms.azure import AzureOpenAIConfig, AzureOpenAIError from .main import * # type: ignore from .integrations import * from .exceptions import ( AuthenticationError, InvalidRequestError, BadRequestError, NotFoundError, RateLimitError, ServiceUnavailableError, OpenAIError, ContextWindowExceededError, ContentPolicyViolationError, BudgetExceededError, APIError, Timeout, APIConnectionError, APIResponseValidationError, UnprocessableEntityError, ) from .budget_manager import BudgetManager from .proxy.proxy_cli import run_server from .router import Router