Spaces:
Paused
Paused
| import json | |
| import time | |
| import uuid | |
| from enum import Enum | |
| from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union | |
| from aiohttp import FormData | |
| from openai._models import BaseModel as OpenAIObject | |
| from openai.types.audio.transcription_create_params import FileTypes # type: ignore | |
| from openai.types.chat.chat_completion import ChatCompletion | |
| from openai.types.completion_usage import ( | |
| CompletionTokensDetails, | |
| CompletionUsage, | |
| PromptTokensDetails, | |
| ) | |
| from openai.types.moderation import ( | |
| Categories, | |
| CategoryAppliedInputTypes, | |
| CategoryScores, | |
| ) | |
| from openai.types.moderation_create_response import Moderation, ModerationCreateResponse | |
| from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator | |
| from typing_extensions import Callable, Dict, Required, TypedDict, override | |
| import litellm | |
| from ..litellm_core_utils.core_helpers import map_finish_reason | |
| from .guardrails import GuardrailEventHooks | |
| from .llms.openai import ( | |
| Batch, | |
| ChatCompletionAnnotation, | |
| ChatCompletionRedactedThinkingBlock, | |
| ChatCompletionThinkingBlock, | |
| ChatCompletionToolCallChunk, | |
| ChatCompletionUsageBlock, | |
| FileSearchTool, | |
| OpenAIChatCompletionChunk, | |
| OpenAIFileObject, | |
| OpenAIRealtimeStreamList, | |
| WebSearchOptions, | |
| ) | |
| from .rerank import RerankResponse | |
| def _generate_id(): # private helper function | |
| return "chatcmpl-" + str(uuid.uuid4()) | |
| class LiteLLMPydanticObjectBase(BaseModel): | |
| """ | |
| Implements default functions, all pydantic objects should have. | |
| """ | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump(**kwargs) # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict(**kwargs) | |
| def fields_set(self): | |
| try: | |
| return self.model_fields_set # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.__fields_set__ | |
| model_config = ConfigDict(protected_namespaces=()) | |
| class LiteLLMCommonStrings(Enum): | |
| redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'" | |
| llm_provider_not_provided = "Unmapped LLM provider for this endpoint. You passed model={model}, custom_llm_provider={custom_llm_provider}. Check supported provider and route: https://docs.litellm.ai/docs/providers" | |
| SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"] | |
| class CostPerToken(TypedDict): | |
| input_cost_per_token: float | |
| output_cost_per_token: float | |
| class ProviderField(TypedDict): | |
| field_name: str | |
| field_type: Literal["string"] | |
| field_description: str | |
| field_value: str | |
| class ProviderSpecificModelInfo(TypedDict, total=False): | |
| supports_system_messages: Optional[bool] | |
| supports_response_schema: Optional[bool] | |
| supports_vision: Optional[bool] | |
| supports_function_calling: Optional[bool] | |
| supports_tool_choice: Optional[bool] | |
| supports_assistant_prefill: Optional[bool] | |
| supports_prompt_caching: Optional[bool] | |
| supports_audio_input: Optional[bool] | |
| supports_embedding_image_input: Optional[bool] | |
| supports_audio_output: Optional[bool] | |
| supports_pdf_input: Optional[bool] | |
| supports_native_streaming: Optional[bool] | |
| supports_parallel_function_calling: Optional[bool] | |
| supports_web_search: Optional[bool] | |
| supports_reasoning: Optional[bool] | |
| class SearchContextCostPerQuery(TypedDict, total=False): | |
| search_context_size_low: float | |
| search_context_size_medium: float | |
| search_context_size_high: float | |
| class ModelInfoBase(ProviderSpecificModelInfo, total=False): | |
| key: Required[str] # the key in litellm.model_cost which is returned | |
| max_tokens: Required[Optional[int]] | |
| max_input_tokens: Required[Optional[int]] | |
| max_output_tokens: Required[Optional[int]] | |
| input_cost_per_token: Required[float] | |
| cache_creation_input_token_cost: Optional[float] | |
| cache_read_input_token_cost: Optional[float] | |
| input_cost_per_character: Optional[float] # only for vertex ai models | |
| input_cost_per_audio_token: Optional[float] | |
| input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models | |
| input_cost_per_token_above_200k_tokens: Optional[ | |
| float | |
| ] # only for vertex ai gemini-2.5-pro models | |
| input_cost_per_character_above_128k_tokens: Optional[ | |
| float | |
| ] # only for vertex ai models | |
| input_cost_per_query: Optional[float] # only for rerank models | |
| input_cost_per_image: Optional[float] # only for vertex ai models | |
| input_cost_per_audio_per_second: Optional[float] # only for vertex ai models | |
| input_cost_per_video_per_second: Optional[float] # only for vertex ai models | |
| input_cost_per_second: Optional[float] # for OpenAI Speech models | |
| input_cost_per_token_batches: Optional[float] | |
| output_cost_per_token_batches: Optional[float] | |
| output_cost_per_token: Required[float] | |
| output_cost_per_character: Optional[float] # only for vertex ai models | |
| output_cost_per_audio_token: Optional[float] | |
| output_cost_per_token_above_128k_tokens: Optional[ | |
| float | |
| ] # only for vertex ai models | |
| output_cost_per_token_above_200k_tokens: Optional[ | |
| float | |
| ] # only for vertex ai gemini-2.5-pro models | |
| output_cost_per_character_above_128k_tokens: Optional[ | |
| float | |
| ] # only for vertex ai models | |
| output_cost_per_image: Optional[float] | |
| output_vector_size: Optional[int] | |
| output_cost_per_reasoning_token: Optional[float] | |
| output_cost_per_video_per_second: Optional[float] # only for vertex ai models | |
| output_cost_per_audio_per_second: Optional[float] # only for vertex ai models | |
| output_cost_per_second: Optional[float] # for OpenAI Speech models | |
| search_context_cost_per_query: Optional[ | |
| SearchContextCostPerQuery | |
| ] # Cost for using web search tool | |
| litellm_provider: Required[str] | |
| mode: Required[ | |
| Literal[ | |
| "completion", "embedding", "image_generation", "chat", "audio_transcription" | |
| ] | |
| ] | |
| tpm: Optional[int] | |
| rpm: Optional[int] | |
| class ModelInfo(ModelInfoBase, total=False): | |
| """ | |
| Model info for a given model, this is information found in litellm.model_prices_and_context_window.json | |
| """ | |
| supported_openai_params: Required[Optional[List[str]]] | |
| class GenericStreamingChunk(TypedDict, total=False): | |
| text: Required[str] | |
| tool_use: Optional[ChatCompletionToolCallChunk] | |
| is_finished: Required[bool] | |
| finish_reason: Required[str] | |
| usage: Required[Optional[ChatCompletionUsageBlock]] | |
| index: int | |
| # use this dict if you want to return any provider specific fields in the response | |
| provider_specific_fields: Optional[Dict[str, Any]] | |
| from enum import Enum | |
| class CallTypes(Enum): | |
| embedding = "embedding" | |
| aembedding = "aembedding" | |
| completion = "completion" | |
| acompletion = "acompletion" | |
| atext_completion = "atext_completion" | |
| text_completion = "text_completion" | |
| image_generation = "image_generation" | |
| aimage_generation = "aimage_generation" | |
| moderation = "moderation" | |
| amoderation = "amoderation" | |
| atranscription = "atranscription" | |
| transcription = "transcription" | |
| aspeech = "aspeech" | |
| speech = "speech" | |
| rerank = "rerank" | |
| arerank = "arerank" | |
| arealtime = "_arealtime" | |
| create_batch = "create_batch" | |
| acreate_batch = "acreate_batch" | |
| aretrieve_batch = "aretrieve_batch" | |
| retrieve_batch = "retrieve_batch" | |
| pass_through = "pass_through_endpoint" | |
| anthropic_messages = "anthropic_messages" | |
| get_assistants = "get_assistants" | |
| aget_assistants = "aget_assistants" | |
| create_assistants = "create_assistants" | |
| acreate_assistants = "acreate_assistants" | |
| delete_assistant = "delete_assistant" | |
| adelete_assistant = "adelete_assistant" | |
| acreate_thread = "acreate_thread" | |
| create_thread = "create_thread" | |
| aget_thread = "aget_thread" | |
| get_thread = "get_thread" | |
| a_add_message = "a_add_message" | |
| add_message = "add_message" | |
| aget_messages = "aget_messages" | |
| get_messages = "get_messages" | |
| arun_thread = "arun_thread" | |
| run_thread = "run_thread" | |
| arun_thread_stream = "arun_thread_stream" | |
| run_thread_stream = "run_thread_stream" | |
| afile_retrieve = "afile_retrieve" | |
| file_retrieve = "file_retrieve" | |
| afile_delete = "afile_delete" | |
| file_delete = "file_delete" | |
| afile_list = "afile_list" | |
| file_list = "file_list" | |
| acreate_file = "acreate_file" | |
| create_file = "create_file" | |
| afile_content = "afile_content" | |
| file_content = "file_content" | |
| create_fine_tuning_job = "create_fine_tuning_job" | |
| acreate_fine_tuning_job = "acreate_fine_tuning_job" | |
| acancel_fine_tuning_job = "acancel_fine_tuning_job" | |
| cancel_fine_tuning_job = "cancel_fine_tuning_job" | |
| alist_fine_tuning_jobs = "alist_fine_tuning_jobs" | |
| list_fine_tuning_jobs = "list_fine_tuning_jobs" | |
| aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job" | |
| retrieve_fine_tuning_job = "retrieve_fine_tuning_job" | |
| responses = "responses" | |
| aresponses = "aresponses" | |
| CallTypesLiteral = Literal[ | |
| "embedding", | |
| "aembedding", | |
| "completion", | |
| "acompletion", | |
| "atext_completion", | |
| "text_completion", | |
| "image_generation", | |
| "aimage_generation", | |
| "moderation", | |
| "amoderation", | |
| "atranscription", | |
| "transcription", | |
| "aspeech", | |
| "speech", | |
| "rerank", | |
| "arerank", | |
| "_arealtime", | |
| "create_batch", | |
| "acreate_batch", | |
| "pass_through_endpoint", | |
| "anthropic_messages", | |
| "aretrieve_batch", | |
| "retrieve_batch", | |
| ] | |
| class PassthroughCallTypes(Enum): | |
| passthrough_image_generation = "passthrough-image-generation" | |
| class TopLogprob(OpenAIObject): | |
| token: str | |
| """The token.""" | |
| bytes: Optional[List[int]] = None | |
| """A list of integers representing the UTF-8 bytes representation of the token. | |
| Useful in instances where characters are represented by multiple tokens and | |
| their byte representations must be combined to generate the correct text | |
| representation. Can be `null` if there is no bytes representation for the token. | |
| """ | |
| logprob: float | |
| """The log probability of this token, if it is within the top 20 most likely | |
| tokens. | |
| Otherwise, the value `-9999.0` is used to signify that the token is very | |
| unlikely. | |
| """ | |
| class ChatCompletionTokenLogprob(OpenAIObject): | |
| token: str | |
| """The token.""" | |
| bytes: Optional[List[int]] = None | |
| """A list of integers representing the UTF-8 bytes representation of the token. | |
| Useful in instances where characters are represented by multiple tokens and | |
| their byte representations must be combined to generate the correct text | |
| representation. Can be `null` if there is no bytes representation for the token. | |
| """ | |
| logprob: float | |
| """The log probability of this token, if it is within the top 20 most likely | |
| tokens. | |
| Otherwise, the value `-9999.0` is used to signify that the token is very | |
| unlikely. | |
| """ | |
| top_logprobs: List[TopLogprob] | |
| """List of the most likely tokens and their log probability, at this token | |
| position. | |
| In rare cases, there may be fewer than the number of requested `top_logprobs` | |
| returned. | |
| """ | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| class ChoiceLogprobs(OpenAIObject): | |
| content: Optional[List[ChatCompletionTokenLogprob]] = None | |
| """A list of message content tokens with log probability information.""" | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| class FunctionCall(OpenAIObject): | |
| arguments: str | |
| name: Optional[str] = None | |
| class Function(OpenAIObject): | |
| arguments: str | |
| name: Optional[ | |
| str | |
| ] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None) | |
| def __init__( | |
| self, | |
| arguments: Optional[Union[Dict, str]] = None, | |
| name: Optional[str] = None, | |
| **params, | |
| ): | |
| if arguments is None: | |
| if params.get("parameters", None) is not None and isinstance( | |
| params["parameters"], dict | |
| ): | |
| arguments = json.dumps(params["parameters"]) | |
| params.pop("parameters") | |
| else: | |
| arguments = "" | |
| elif isinstance(arguments, Dict): | |
| arguments = json.dumps(arguments) | |
| else: | |
| arguments = arguments | |
| name = name | |
| # Build a dictionary with the structure your BaseModel expects | |
| data = {"arguments": arguments, "name": name} | |
| super(Function, self).__init__(**data) | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| class ChatCompletionDeltaToolCall(OpenAIObject): | |
| id: Optional[str] = None | |
| function: Function | |
| type: Optional[str] = None | |
| index: int | |
| class HiddenParams(OpenAIObject): | |
| original_response: Optional[Union[str, Any]] = None | |
| model_id: Optional[str] = None # used in Router for individual deployments | |
| api_base: Optional[str] = None # returns api base used for making completion call | |
| model_config = ConfigDict(extra="allow", protected_namespaces=()) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class ChatCompletionMessageToolCall(OpenAIObject): | |
| def __init__( | |
| self, | |
| function: Union[Dict, Function], | |
| id: Optional[str] = None, | |
| type: Optional[str] = None, | |
| **params, | |
| ): | |
| super(ChatCompletionMessageToolCall, self).__init__(**params) | |
| if isinstance(function, Dict): | |
| self.function = Function(**function) | |
| else: | |
| self.function = function | |
| if id is not None: | |
| self.id = id | |
| else: | |
| self.id = f"{uuid.uuid4()}" | |
| if type is not None: | |
| self.type = type | |
| else: | |
| self.type = "function" | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| from openai.types.chat.chat_completion_audio import ChatCompletionAudio | |
| class ChatCompletionAudioResponse(ChatCompletionAudio): | |
| def __init__( | |
| self, | |
| data: str, | |
| expires_at: int, | |
| transcript: str, | |
| id: Optional[str] = None, | |
| **params, | |
| ): | |
| if id is not None: | |
| id = id | |
| else: | |
| id = f"{uuid.uuid4()}" | |
| super(ChatCompletionAudioResponse, self).__init__( | |
| data=data, expires_at=expires_at, transcript=transcript, id=id, **params | |
| ) | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| """ | |
| Reference: | |
| ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None)) | |
| """ | |
| def add_provider_specific_fields( | |
| object: BaseModel, provider_specific_fields: Optional[Dict[str, Any]] | |
| ): | |
| if not provider_specific_fields: # set if provider_specific_fields is not empty | |
| return | |
| setattr(object, "provider_specific_fields", provider_specific_fields) | |
| class Message(OpenAIObject): | |
| content: Optional[str] | |
| role: Literal["assistant", "user", "system", "tool", "function"] | |
| tool_calls: Optional[List[ChatCompletionMessageToolCall]] | |
| function_call: Optional[FunctionCall] | |
| audio: Optional[ChatCompletionAudioResponse] = None | |
| reasoning_content: Optional[str] = None | |
| thinking_blocks: Optional[ | |
| List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] | |
| ] = None | |
| provider_specific_fields: Optional[Dict[str, Any]] = Field( | |
| default=None, exclude=True | |
| ) | |
| annotations: Optional[List[ChatCompletionAnnotation]] = None | |
| def __init__( | |
| self, | |
| content: Optional[str] = None, | |
| role: Literal["assistant"] = "assistant", | |
| function_call=None, | |
| tool_calls: Optional[list] = None, | |
| audio: Optional[ChatCompletionAudioResponse] = None, | |
| provider_specific_fields: Optional[Dict[str, Any]] = None, | |
| reasoning_content: Optional[str] = None, | |
| thinking_blocks: Optional[ | |
| List[ | |
| Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock] | |
| ] | |
| ] = None, | |
| annotations: Optional[List[ChatCompletionAnnotation]] = None, | |
| **params, | |
| ): | |
| init_values: Dict[str, Any] = { | |
| "content": content, | |
| "role": role or "assistant", # handle null input | |
| "function_call": ( | |
| FunctionCall(**function_call) if function_call is not None else None | |
| ), | |
| "tool_calls": ( | |
| [ | |
| ( | |
| ChatCompletionMessageToolCall(**tool_call) | |
| if isinstance(tool_call, dict) | |
| else tool_call | |
| ) | |
| for tool_call in tool_calls | |
| ] | |
| if tool_calls is not None and len(tool_calls) > 0 | |
| else None | |
| ), | |
| } | |
| if audio is not None: | |
| init_values["audio"] = audio | |
| if thinking_blocks is not None: | |
| init_values["thinking_blocks"] = thinking_blocks | |
| if annotations is not None: | |
| init_values["annotations"] = annotations | |
| if reasoning_content is not None: | |
| init_values["reasoning_content"] = reasoning_content | |
| super(Message, self).__init__( | |
| **init_values, # type: ignore | |
| **params, | |
| ) | |
| if audio is None: | |
| # delete audio from self | |
| # OpenAI compatible APIs like mistral API will raise an error if audio is passed in | |
| del self.audio | |
| if annotations is None: | |
| # ensure default response matches OpenAI spec | |
| # Some OpenAI compatible APIs raise an error if annotations are passed in | |
| del self.annotations | |
| if reasoning_content is None: | |
| # ensure default response matches OpenAI spec | |
| del self.reasoning_content | |
| if thinking_blocks is None: | |
| # ensure default response matches OpenAI spec | |
| del self.thinking_blocks | |
| add_provider_specific_fields(self, provider_specific_fields) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class Delta(OpenAIObject): | |
| reasoning_content: Optional[str] = None | |
| thinking_blocks: Optional[ | |
| List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] | |
| ] = None | |
| provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None) | |
| def __init__( | |
| self, | |
| content=None, | |
| role=None, | |
| function_call=None, | |
| tool_calls=None, | |
| audio: Optional[ChatCompletionAudioResponse] = None, | |
| reasoning_content: Optional[str] = None, | |
| thinking_blocks: Optional[ | |
| List[ | |
| Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock] | |
| ] | |
| ] = None, | |
| annotations: Optional[List[ChatCompletionAnnotation]] = None, | |
| **params, | |
| ): | |
| super(Delta, self).__init__(**params) | |
| add_provider_specific_fields(self, params.get("provider_specific_fields", {})) | |
| self.content = content | |
| self.role = role | |
| # Set default values and correct types | |
| self.function_call: Optional[Union[FunctionCall, Any]] = None | |
| self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None | |
| self.audio: Optional[ChatCompletionAudioResponse] = None | |
| self.annotations: Optional[List[ChatCompletionAnnotation]] = None | |
| if reasoning_content is not None: | |
| self.reasoning_content = reasoning_content | |
| else: | |
| # ensure default response matches OpenAI spec | |
| del self.reasoning_content | |
| if thinking_blocks is not None: | |
| self.thinking_blocks = thinking_blocks | |
| else: | |
| # ensure default response matches OpenAI spec | |
| del self.thinking_blocks | |
| # Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec) | |
| if annotations is not None: | |
| self.annotations = annotations | |
| else: | |
| del self.annotations | |
| if function_call is not None and isinstance(function_call, dict): | |
| self.function_call = FunctionCall(**function_call) | |
| else: | |
| self.function_call = function_call | |
| if tool_calls is not None and isinstance(tool_calls, list): | |
| self.tool_calls = [] | |
| for tool_call in tool_calls: | |
| if isinstance(tool_call, dict): | |
| if tool_call.get("index", None) is None: | |
| tool_call["index"] = 0 | |
| self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) | |
| elif isinstance(tool_call, ChatCompletionDeltaToolCall): | |
| self.tool_calls.append(tool_call) | |
| else: | |
| self.tool_calls = tool_calls | |
| self.audio = audio | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| class Choices(OpenAIObject): | |
| def __init__( | |
| self, | |
| finish_reason=None, | |
| index=0, | |
| message: Optional[Union[Message, dict]] = None, | |
| logprobs: Optional[Union[ChoiceLogprobs, dict, Any]] = None, | |
| enhancements=None, | |
| **params, | |
| ): | |
| super(Choices, self).__init__(**params) | |
| if finish_reason is not None: | |
| self.finish_reason = map_finish_reason( | |
| finish_reason | |
| ) # set finish_reason for all responses | |
| else: | |
| self.finish_reason = "stop" | |
| self.index = index | |
| if message is None: | |
| self.message = Message() | |
| else: | |
| if isinstance(message, Message): | |
| self.message = message | |
| elif isinstance(message, dict): | |
| self.message = Message(**message) | |
| if logprobs is not None: | |
| if isinstance(logprobs, dict): | |
| self.logprobs = ChoiceLogprobs(**logprobs) | |
| else: | |
| self.logprobs = logprobs | |
| if enhancements is not None: | |
| self.enhancements = enhancements | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| class CompletionTokensDetailsWrapper( | |
| CompletionTokensDetails | |
| ): # wrapper for older openai versions | |
| text_tokens: Optional[int] = None | |
| """Text tokens generated by the model.""" | |
| class PromptTokensDetailsWrapper( | |
| PromptTokensDetails | |
| ): # wrapper for older openai versions | |
| text_tokens: Optional[int] = None | |
| """Text tokens sent to the model.""" | |
| image_tokens: Optional[int] = None | |
| """Image tokens sent to the model.""" | |
| character_count: Optional[int] = None | |
| """Character count sent to the model. Used for Vertex AI multimodal embeddings.""" | |
| image_count: Optional[int] = None | |
| """Number of images sent to the model. Used for Vertex AI multimodal embeddings.""" | |
| video_length_seconds: Optional[float] = None | |
| """Length of videos sent to the model. Used for Vertex AI multimodal embeddings.""" | |
| def __init__(self, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| if self.character_count is None: | |
| del self.character_count | |
| if self.image_count is None: | |
| del self.image_count | |
| if self.video_length_seconds is None: | |
| del self.video_length_seconds | |
| class Usage(CompletionUsage): | |
| _cache_creation_input_tokens: int = PrivateAttr( | |
| 0 | |
| ) # hidden param for prompt caching. Might change, once openai introduces their equivalent. | |
| _cache_read_input_tokens: int = PrivateAttr( | |
| 0 | |
| ) # hidden param for prompt caching. Might change, once openai introduces their equivalent. | |
| def __init__( | |
| self, | |
| prompt_tokens: Optional[int] = None, | |
| completion_tokens: Optional[int] = None, | |
| total_tokens: Optional[int] = None, | |
| reasoning_tokens: Optional[int] = None, | |
| prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None, | |
| completion_tokens_details: Optional[ | |
| Union[CompletionTokensDetailsWrapper, dict] | |
| ] = None, | |
| **params, | |
| ): | |
| # handle reasoning_tokens | |
| _completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None | |
| if reasoning_tokens: | |
| text_tokens = ( | |
| completion_tokens - reasoning_tokens if completion_tokens else None | |
| ) | |
| completion_tokens_details = CompletionTokensDetailsWrapper( | |
| reasoning_tokens=reasoning_tokens, text_tokens=text_tokens | |
| ) | |
| # Ensure completion_tokens_details is properly handled | |
| if completion_tokens_details: | |
| if isinstance(completion_tokens_details, dict): | |
| _completion_tokens_details = CompletionTokensDetailsWrapper( | |
| **completion_tokens_details | |
| ) | |
| elif isinstance(completion_tokens_details, CompletionTokensDetails): | |
| _completion_tokens_details = completion_tokens_details | |
| ## DEEPSEEK MAPPING ## | |
| if "prompt_cache_hit_tokens" in params and isinstance( | |
| params["prompt_cache_hit_tokens"], int | |
| ): | |
| if prompt_tokens_details is None: | |
| prompt_tokens_details = PromptTokensDetailsWrapper( | |
| cached_tokens=params["prompt_cache_hit_tokens"] | |
| ) | |
| ## ANTHROPIC MAPPING ## | |
| if "cache_read_input_tokens" in params and isinstance( | |
| params["cache_read_input_tokens"], int | |
| ): | |
| if prompt_tokens_details is None: | |
| prompt_tokens_details = PromptTokensDetailsWrapper( | |
| cached_tokens=params["cache_read_input_tokens"] | |
| ) | |
| # handle prompt_tokens_details | |
| _prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None | |
| if prompt_tokens_details: | |
| if isinstance(prompt_tokens_details, dict): | |
| _prompt_tokens_details = PromptTokensDetailsWrapper( | |
| **prompt_tokens_details | |
| ) | |
| elif isinstance(prompt_tokens_details, PromptTokensDetails): | |
| _prompt_tokens_details = prompt_tokens_details | |
| super().__init__( | |
| prompt_tokens=prompt_tokens or 0, | |
| completion_tokens=completion_tokens or 0, | |
| total_tokens=total_tokens or 0, | |
| completion_tokens_details=_completion_tokens_details or None, | |
| prompt_tokens_details=_prompt_tokens_details or None, | |
| ) | |
| ## ANTHROPIC MAPPING ## | |
| if "cache_creation_input_tokens" in params and isinstance( | |
| params["cache_creation_input_tokens"], int | |
| ): | |
| self._cache_creation_input_tokens = params["cache_creation_input_tokens"] | |
| if "cache_read_input_tokens" in params and isinstance( | |
| params["cache_read_input_tokens"], int | |
| ): | |
| self._cache_read_input_tokens = params["cache_read_input_tokens"] | |
| ## DEEPSEEK MAPPING ## | |
| if "prompt_cache_hit_tokens" in params and isinstance( | |
| params["prompt_cache_hit_tokens"], int | |
| ): | |
| self._cache_read_input_tokens = params["prompt_cache_hit_tokens"] | |
| for k, v in params.items(): | |
| setattr(self, k, v) | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| class StreamingChoices(OpenAIObject): | |
| def __init__( | |
| self, | |
| finish_reason=None, | |
| index=0, | |
| delta: Optional[Delta] = None, | |
| logprobs=None, | |
| enhancements=None, | |
| **params, | |
| ): | |
| # Fix Perplexity return both delta and message cause OpenWebUI repect text | |
| # https://github.com/BerriAI/litellm/issues/8455 | |
| params.pop("message", None) | |
| super(StreamingChoices, self).__init__(**params) | |
| if finish_reason: | |
| self.finish_reason = map_finish_reason(finish_reason) | |
| else: | |
| self.finish_reason = None | |
| self.index = index | |
| if delta is not None: | |
| if isinstance(delta, Delta): | |
| self.delta = delta | |
| elif isinstance(delta, dict): | |
| self.delta = Delta(**delta) | |
| else: | |
| self.delta = Delta() | |
| if enhancements is not None: | |
| self.enhancements = enhancements | |
| if logprobs is not None and isinstance(logprobs, dict): | |
| self.logprobs = ChoiceLogprobs(**logprobs) | |
| else: | |
| self.logprobs = logprobs # type: ignore | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| class StreamingChatCompletionChunk(OpenAIChatCompletionChunk): | |
| def __init__(self, **kwargs): | |
| new_choices = [] | |
| for choice in kwargs["choices"]: | |
| new_choice = StreamingChoices(**choice).model_dump() | |
| new_choices.append(new_choice) | |
| kwargs["choices"] = new_choices | |
| super().__init__(**kwargs) | |
| from openai.types.chat import ChatCompletionChunk | |
| class ModelResponseBase(OpenAIObject): | |
| id: str | |
| """A unique identifier for the completion.""" | |
| created: int | |
| """The Unix timestamp (in seconds) of when the completion was created.""" | |
| model: Optional[str] = None | |
| """The model used for completion.""" | |
| object: str | |
| """The object type, which is always "text_completion" """ | |
| system_fingerprint: Optional[str] = None | |
| """This fingerprint represents the backend configuration that the model runs with. | |
| Can be used in conjunction with the `seed` request parameter to understand when | |
| backend changes have been made that might impact determinism. | |
| """ | |
| _hidden_params: dict = {} | |
| _response_headers: Optional[dict] = None | |
| class ModelResponseStream(ModelResponseBase): | |
| choices: List[StreamingChoices] | |
| provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None) | |
| def __init__( | |
| self, | |
| choices: Optional[ | |
| Union[List[StreamingChoices], Union[StreamingChoices, dict, BaseModel]] | |
| ] = None, | |
| id: Optional[str] = None, | |
| created: Optional[int] = None, | |
| provider_specific_fields: Optional[Dict[str, Any]] = None, | |
| **kwargs, | |
| ): | |
| if choices is not None and isinstance(choices, list): | |
| new_choices = [] | |
| for choice in choices: | |
| _new_choice = None | |
| if isinstance(choice, StreamingChoices): | |
| _new_choice = choice | |
| elif isinstance(choice, dict): | |
| _new_choice = StreamingChoices(**choice) | |
| elif isinstance(choice, BaseModel): | |
| _new_choice = StreamingChoices(**choice.model_dump()) | |
| new_choices.append(_new_choice) | |
| kwargs["choices"] = new_choices | |
| else: | |
| kwargs["choices"] = [StreamingChoices()] | |
| if id is None: | |
| id = _generate_id() | |
| else: | |
| id = id | |
| if created is None: | |
| created = int(time.time()) | |
| else: | |
| created = created | |
| if ( | |
| "usage" in kwargs | |
| and kwargs["usage"] is not None | |
| and isinstance(kwargs["usage"], dict) | |
| ): | |
| kwargs["usage"] = Usage(**kwargs["usage"]) | |
| kwargs["id"] = id | |
| kwargs["created"] = created | |
| kwargs["object"] = "chat.completion.chunk" | |
| kwargs["provider_specific_fields"] = provider_specific_fields | |
| super().__init__(**kwargs) | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class ModelResponse(ModelResponseBase): | |
| choices: List[Union[Choices, StreamingChoices]] | |
| """The list of completion choices the model generated for the input prompt.""" | |
| def __init__( | |
| self, | |
| id=None, | |
| choices=None, | |
| created=None, | |
| model=None, | |
| object=None, | |
| system_fingerprint=None, | |
| usage=None, | |
| stream=None, | |
| stream_options=None, | |
| response_ms=None, | |
| hidden_params=None, | |
| _response_headers=None, | |
| **params, | |
| ) -> None: | |
| if stream is not None and stream is True: | |
| object = "chat.completion.chunk" | |
| if choices is not None and isinstance(choices, list): | |
| new_choices = [] | |
| for choice in choices: | |
| _new_choice = None | |
| if isinstance(choice, StreamingChoices): | |
| _new_choice = choice | |
| elif isinstance(choice, dict): | |
| _new_choice = StreamingChoices(**choice) | |
| elif isinstance(choice, BaseModel): | |
| _new_choice = StreamingChoices(**choice.model_dump()) | |
| new_choices.append(_new_choice) | |
| choices = new_choices | |
| else: | |
| choices = [StreamingChoices()] | |
| else: | |
| object = "chat.completion" | |
| if choices is not None and isinstance(choices, list): | |
| new_choices = [] | |
| for choice in choices: | |
| if isinstance(choice, Choices): | |
| _new_choice = choice # type: ignore | |
| elif isinstance(choice, dict): | |
| _new_choice = Choices(**choice) # type: ignore | |
| else: | |
| _new_choice = choice | |
| new_choices.append(_new_choice) | |
| choices = new_choices | |
| else: | |
| choices = [Choices()] | |
| if id is None: | |
| id = _generate_id() | |
| else: | |
| id = id | |
| if created is None: | |
| created = int(time.time()) | |
| else: | |
| created = created | |
| model = model | |
| if usage is not None: | |
| if isinstance(usage, dict): | |
| usage = Usage(**usage) | |
| else: | |
| usage = usage | |
| elif stream is None or stream is False: | |
| usage = Usage() | |
| if hidden_params: | |
| self._hidden_params = hidden_params | |
| if _response_headers: | |
| self._response_headers = _response_headers | |
| init_values = { | |
| "id": id, | |
| "choices": choices, | |
| "created": created, | |
| "model": model, | |
| "object": object, | |
| "system_fingerprint": system_fingerprint, | |
| } | |
| if usage is not None: | |
| init_values["usage"] = usage | |
| super().__init__( | |
| **init_values, | |
| **params, | |
| ) | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class Embedding(OpenAIObject): | |
| embedding: Union[list, str] = [] | |
| index: int | |
| object: Literal["embedding"] | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| class EmbeddingResponse(OpenAIObject): | |
| model: Optional[str] = None | |
| """The model used for embedding.""" | |
| data: List | |
| """The actual embedding value""" | |
| object: Literal["list"] | |
| """The object type, which is always "list" """ | |
| usage: Optional[Usage] = None | |
| """Usage statistics for the embedding request.""" | |
| _hidden_params: dict = {} | |
| _response_headers: Optional[Dict] = None | |
| _response_ms: Optional[float] = None | |
| def __init__( | |
| self, | |
| model: Optional[str] = None, | |
| usage: Optional[Usage] = None, | |
| response_ms=None, | |
| data: Optional[Union[List, List[Embedding]]] = None, | |
| hidden_params=None, | |
| _response_headers=None, | |
| **params, | |
| ): | |
| object = "list" | |
| if response_ms: | |
| _response_ms = response_ms | |
| else: | |
| _response_ms = None | |
| if data: | |
| data = data | |
| else: | |
| data = [] | |
| if usage: | |
| usage = usage | |
| else: | |
| usage = Usage() | |
| if _response_headers: | |
| self._response_headers = _response_headers | |
| model = model | |
| super().__init__(model=model, object=object, data=data, usage=usage) # type: ignore | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class Logprobs(OpenAIObject): | |
| text_offset: Optional[List[int]] | |
| token_logprobs: Optional[List[Union[float, None]]] | |
| tokens: Optional[List[str]] | |
| top_logprobs: Optional[List[Union[Dict[str, float], None]]] | |
| class TextChoices(OpenAIObject): | |
| def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): | |
| super(TextChoices, self).__init__(**params) | |
| if finish_reason: | |
| self.finish_reason = map_finish_reason(finish_reason) | |
| else: | |
| self.finish_reason = None | |
| self.index = index | |
| if text is not None: | |
| self.text = text | |
| else: | |
| self.text = None | |
| if logprobs is None: | |
| self.logprobs = None | |
| else: | |
| if isinstance(logprobs, dict): | |
| self.logprobs = Logprobs(**logprobs) | |
| else: | |
| self.logprobs = logprobs | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class TextCompletionResponse(OpenAIObject): | |
| """ | |
| { | |
| "id": response["id"], | |
| "object": "text_completion", | |
| "created": response["created"], | |
| "model": response["model"], | |
| "choices": [ | |
| { | |
| "text": response["choices"][0]["message"]["content"], | |
| "index": response["choices"][0]["index"], | |
| "logprobs": transformed_logprobs, | |
| "finish_reason": response["choices"][0]["finish_reason"] | |
| } | |
| ], | |
| "usage": response["usage"] | |
| } | |
| """ | |
| id: str | |
| object: str | |
| created: int | |
| model: Optional[str] | |
| choices: List[TextChoices] | |
| usage: Optional[Usage] | |
| _response_ms: Optional[int] = None | |
| _hidden_params: HiddenParams | |
| def __init__( | |
| self, | |
| id=None, | |
| choices=None, | |
| created=None, | |
| model=None, | |
| usage=None, | |
| stream=False, | |
| response_ms=None, | |
| object=None, | |
| **params, | |
| ): | |
| if stream: | |
| object = "text_completion.chunk" | |
| choices = [TextChoices()] | |
| else: | |
| object = "text_completion" | |
| if choices is not None and isinstance(choices, list): | |
| new_choices = [] | |
| for choice in choices: | |
| _new_choice = None | |
| if isinstance(choice, TextChoices): | |
| _new_choice = choice | |
| elif isinstance(choice, dict): | |
| _new_choice = TextChoices(**choice) | |
| new_choices.append(_new_choice) | |
| choices = new_choices | |
| else: | |
| choices = [TextChoices()] | |
| if object is not None: | |
| object = object | |
| if id is None: | |
| id = _generate_id() | |
| else: | |
| id = id | |
| if created is None: | |
| created = int(time.time()) | |
| else: | |
| created = created | |
| model = model | |
| if usage: | |
| usage = usage | |
| else: | |
| usage = Usage() | |
| super(TextCompletionResponse, self).__init__( | |
| id=id, # type: ignore | |
| object=object, # type: ignore | |
| created=created, # type: ignore | |
| model=model, # type: ignore | |
| choices=choices, # type: ignore | |
| usage=usage, # type: ignore | |
| **params, | |
| ) | |
| if response_ms: | |
| self._response_ms = response_ms | |
| else: | |
| self._response_ms = None | |
| self._hidden_params = HiddenParams() | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| from openai.types.images_response import Image as OpenAIImage | |
| class ImageObject(OpenAIImage): | |
| """ | |
| Represents the url or the content of an image generated by the OpenAI API. | |
| Attributes: | |
| b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. | |
| url: The URL of the generated image, if response_format is url (default). | |
| revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. | |
| https://platform.openai.com/docs/api-reference/images/object | |
| """ | |
| b64_json: Optional[str] = None | |
| url: Optional[str] = None | |
| revised_prompt: Optional[str] = None | |
| def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs): | |
| super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) # type: ignore | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| from openai.types.images_response import ImagesResponse as OpenAIImageResponse | |
| class ImageResponse(OpenAIImageResponse): | |
| _hidden_params: dict = {} | |
| usage: Usage | |
| def __init__( | |
| self, | |
| created: Optional[int] = None, | |
| data: Optional[List[ImageObject]] = None, | |
| response_ms=None, | |
| usage: Optional[Usage] = None, | |
| hidden_params: Optional[dict] = None, | |
| ): | |
| if response_ms: | |
| _response_ms = response_ms | |
| else: | |
| _response_ms = None | |
| if data: | |
| data = data | |
| else: | |
| data = [] | |
| if created: | |
| created = created | |
| else: | |
| created = int(time.time()) | |
| _data: List[OpenAIImage] = [] | |
| for d in data: | |
| if isinstance(d, dict): | |
| _data.append(ImageObject(**d)) | |
| elif isinstance(d, BaseModel): | |
| _data.append(ImageObject(**d.model_dump())) | |
| _usage = usage or Usage( | |
| prompt_tokens=0, | |
| completion_tokens=0, | |
| total_tokens=0, | |
| ) | |
| super().__init__(created=created, data=_data, usage=_usage) # type: ignore | |
| self._hidden_params = hidden_params or {} | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class TranscriptionResponse(OpenAIObject): | |
| text: Optional[str] = None | |
| _hidden_params: dict = {} | |
| _response_headers: Optional[dict] = None | |
| def __init__(self, text=None): | |
| super().__init__(text=text) # type: ignore | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def __setitem__(self, key, value): | |
| # Allow dictionary-style assignment of attributes | |
| setattr(self, key, value) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class GenericImageParsingChunk(TypedDict): | |
| type: str | |
| media_type: str | |
| data: str | |
| class ResponseFormatChunk(TypedDict, total=False): | |
| type: Required[Literal["json_object", "text"]] | |
| response_schema: dict | |
| class LoggedLiteLLMParams(TypedDict, total=False): | |
| force_timeout: Optional[float] | |
| custom_llm_provider: Optional[str] | |
| api_base: Optional[str] | |
| litellm_call_id: Optional[str] | |
| model_alias_map: Optional[dict] | |
| metadata: Optional[dict] | |
| model_info: Optional[dict] | |
| proxy_server_request: Optional[dict] | |
| acompletion: Optional[bool] | |
| preset_cache_key: Optional[str] | |
| no_log: Optional[bool] | |
| input_cost_per_second: Optional[float] | |
| input_cost_per_token: Optional[float] | |
| output_cost_per_token: Optional[float] | |
| output_cost_per_second: Optional[float] | |
| cooldown_time: Optional[float] | |
| class AdapterCompletionStreamWrapper: | |
| def __init__(self, completion_stream): | |
| self.completion_stream = completion_stream | |
| def __iter__(self): | |
| return self | |
| def __aiter__(self): | |
| return self | |
| def __next__(self): | |
| try: | |
| for chunk in self.completion_stream: | |
| if chunk == "None" or chunk is None: | |
| raise Exception | |
| return chunk | |
| raise StopIteration | |
| except StopIteration: | |
| raise StopIteration | |
| except Exception as e: | |
| print(f"AdapterCompletionStreamWrapper - {e}") # noqa | |
| async def __anext__(self): | |
| try: | |
| async for chunk in self.completion_stream: | |
| if chunk == "None" or chunk is None: | |
| raise Exception | |
| return chunk | |
| raise StopIteration | |
| except StopIteration: | |
| raise StopAsyncIteration | |
| class StandardLoggingUserAPIKeyMetadata(TypedDict): | |
| user_api_key_hash: Optional[str] # hash of the litellm virtual key used | |
| user_api_key_alias: Optional[str] | |
| user_api_key_org_id: Optional[str] | |
| user_api_key_team_id: Optional[str] | |
| user_api_key_user_id: Optional[str] | |
| user_api_key_user_email: Optional[str] | |
| user_api_key_team_alias: Optional[str] | |
| user_api_key_end_user_id: Optional[str] | |
| class StandardLoggingMCPToolCall(TypedDict, total=False): | |
| name: str | |
| """ | |
| Name of the tool to call | |
| """ | |
| arguments: dict | |
| """ | |
| Arguments to pass to the tool | |
| """ | |
| result: dict | |
| """ | |
| Result of the tool call | |
| """ | |
| mcp_server_name: Optional[str] | |
| """ | |
| Name of the MCP server that the tool call was made to | |
| """ | |
| mcp_server_logo_url: Optional[str] | |
| """ | |
| Optional logo URL of the MCP server that the tool call was made to | |
| (this is to render the logo on the logs page on litellm ui) | |
| """ | |
| class StandardBuiltInToolsParams(TypedDict, total=False): | |
| """ | |
| Standard built-in OpenAItools parameters | |
| This is used to calculate the cost of built-in tools, insert any standard built-in tools parameters here | |
| OpenAI charges users based on the `web_search_options` parameter | |
| """ | |
| web_search_options: Optional[WebSearchOptions] | |
| file_search: Optional[FileSearchTool] | |
| class StandardLoggingPromptManagementMetadata(TypedDict): | |
| prompt_id: str | |
| prompt_variables: Optional[dict] | |
| prompt_integration: str | |
| class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata): | |
| """ | |
| Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management | |
| """ | |
| spend_logs_metadata: Optional[ | |
| dict | |
| ] # special param to log k,v pairs to spendlogs for a call | |
| requester_ip_address: Optional[str] | |
| requester_metadata: Optional[dict] | |
| prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata] | |
| mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] | |
| applied_guardrails: Optional[List[str]] | |
| usage_object: Optional[dict] | |
| class StandardLoggingAdditionalHeaders(TypedDict, total=False): | |
| x_ratelimit_limit_requests: int | |
| x_ratelimit_limit_tokens: int | |
| x_ratelimit_remaining_requests: int | |
| x_ratelimit_remaining_tokens: int | |
| class StandardLoggingHiddenParams(TypedDict): | |
| model_id: Optional[ | |
| str | |
| ] # id of the model in the router, separates multiple models with the same name but different credentials | |
| cache_key: Optional[str] | |
| api_base: Optional[str] | |
| response_cost: Optional[str] | |
| litellm_overhead_time_ms: Optional[float] | |
| additional_headers: Optional[StandardLoggingAdditionalHeaders] | |
| batch_models: Optional[List[str]] | |
| litellm_model_name: Optional[str] # the model name sent to the provider by litellm | |
| usage_object: Optional[dict] | |
| class StandardLoggingModelInformation(TypedDict): | |
| model_map_key: str | |
| model_map_value: Optional[ModelInfo] | |
| class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False): | |
| """ | |
| Debug information, if cost tracking fails. | |
| Avoid logging sensitive information like response or optional params | |
| """ | |
| error_str: Required[str] | |
| traceback_str: Required[str] | |
| model: str | |
| cache_hit: Optional[bool] | |
| custom_llm_provider: Optional[str] | |
| base_model: Optional[str] | |
| call_type: str | |
| custom_pricing: Optional[bool] | |
| class StandardLoggingPayloadErrorInformation(TypedDict, total=False): | |
| error_code: Optional[str] | |
| error_class: Optional[str] | |
| llm_provider: Optional[str] | |
| traceback: Optional[str] | |
| error_message: Optional[str] | |
| class StandardLoggingGuardrailInformation(TypedDict, total=False): | |
| guardrail_name: Optional[str] | |
| guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]] | |
| guardrail_response: Optional[Union[dict, str]] | |
| guardrail_status: Literal["success", "failure"] | |
| StandardLoggingPayloadStatus = Literal["success", "failure"] | |
| class StandardLoggingPayload(TypedDict): | |
| id: str | |
| trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries) | |
| call_type: str | |
| stream: Optional[bool] | |
| response_cost: float | |
| response_cost_failure_debug_info: Optional[ | |
| StandardLoggingModelCostFailureDebugInformation | |
| ] | |
| status: StandardLoggingPayloadStatus | |
| custom_llm_provider: Optional[str] | |
| total_tokens: int | |
| prompt_tokens: int | |
| completion_tokens: int | |
| startTime: float # Note: making this camelCase was a mistake, everything should be snake case | |
| endTime: float | |
| completionStartTime: float | |
| response_time: float | |
| model_map_information: StandardLoggingModelInformation | |
| model: str | |
| model_id: Optional[str] | |
| model_group: Optional[str] | |
| api_base: str | |
| metadata: StandardLoggingMetadata | |
| cache_hit: Optional[bool] | |
| cache_key: Optional[str] | |
| saved_cache_cost: float | |
| request_tags: list | |
| end_user: Optional[str] | |
| requester_ip_address: Optional[str] | |
| messages: Optional[Union[str, list, dict]] | |
| response: Optional[Union[str, list, dict]] | |
| error_str: Optional[str] | |
| error_information: Optional[StandardLoggingPayloadErrorInformation] | |
| model_parameters: dict | |
| hidden_params: StandardLoggingHiddenParams | |
| guardrail_information: Optional[StandardLoggingGuardrailInformation] | |
| standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] | |
| from typing import AsyncIterator, Iterator | |
| class CustomStreamingDecoder: | |
| async def aiter_bytes( | |
| self, iterator: AsyncIterator[bytes] | |
| ) -> AsyncIterator[ | |
| Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]] | |
| ]: | |
| raise NotImplementedError | |
| def iter_bytes( | |
| self, iterator: Iterator[bytes] | |
| ) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]: | |
| raise NotImplementedError | |
| class StandardPassThroughResponseObject(TypedDict): | |
| response: str | |
| OPENAI_RESPONSE_HEADERS = [ | |
| "x-ratelimit-remaining-requests", | |
| "x-ratelimit-remaining-tokens", | |
| "x-ratelimit-limit-requests", | |
| "x-ratelimit-limit-tokens", | |
| "x-ratelimit-reset-requests", | |
| "x-ratelimit-reset-tokens", | |
| ] | |
| class StandardCallbackDynamicParams(TypedDict, total=False): | |
| # Langfuse dynamic params | |
| langfuse_public_key: Optional[str] | |
| langfuse_secret: Optional[str] | |
| langfuse_secret_key: Optional[str] | |
| langfuse_host: Optional[str] | |
| # GCS dynamic params | |
| gcs_bucket_name: Optional[str] | |
| gcs_path_service_account: Optional[str] | |
| # Langsmith dynamic params | |
| langsmith_api_key: Optional[str] | |
| langsmith_project: Optional[str] | |
| langsmith_base_url: Optional[str] | |
| # Humanloop dynamic params | |
| humanloop_api_key: Optional[str] | |
| # Arize dynamic params | |
| arize_api_key: Optional[str] | |
| arize_space_key: Optional[str] | |
| # Logging settings | |
| turn_off_message_logging: Optional[bool] # when true will not log messages | |
| all_litellm_params = [ | |
| "metadata", | |
| "litellm_metadata", | |
| "litellm_trace_id", | |
| "tags", | |
| "acompletion", | |
| "aimg_generation", | |
| "atext_completion", | |
| "text_completion", | |
| "caching", | |
| "mock_response", | |
| "mock_timeout", | |
| "disable_add_transform_inline_image_block", | |
| "api_key", | |
| "api_version", | |
| "prompt_id", | |
| "provider_specific_header", | |
| "prompt_variables", | |
| "api_base", | |
| "force_timeout", | |
| "logger_fn", | |
| "verbose", | |
| "custom_llm_provider", | |
| "model_file_id_mapping", | |
| "litellm_logging_obj", | |
| "litellm_call_id", | |
| "use_client", | |
| "id", | |
| "fallbacks", | |
| "azure", | |
| "headers", | |
| "model_list", | |
| "num_retries", | |
| "context_window_fallback_dict", | |
| "retry_policy", | |
| "retry_strategy", | |
| "roles", | |
| "final_prompt_value", | |
| "bos_token", | |
| "eos_token", | |
| "request_timeout", | |
| "complete_response", | |
| "self", | |
| "client", | |
| "rpm", | |
| "tpm", | |
| "max_parallel_requests", | |
| "input_cost_per_token", | |
| "output_cost_per_token", | |
| "input_cost_per_second", | |
| "output_cost_per_second", | |
| "hf_model_name", | |
| "model_info", | |
| "proxy_server_request", | |
| "preset_cache_key", | |
| "caching_groups", | |
| "ttl", | |
| "cache", | |
| "no-log", | |
| "base_model", | |
| "stream_timeout", | |
| "supports_system_message", | |
| "region_name", | |
| "allowed_model_region", | |
| "model_config", | |
| "fastest_response", | |
| "cooldown_time", | |
| "cache_key", | |
| "max_retries", | |
| "azure_ad_token_provider", | |
| "tenant_id", | |
| "client_id", | |
| "azure_username", | |
| "azure_password", | |
| "client_secret", | |
| "user_continue_message", | |
| "configurable_clientside_auth_params", | |
| "weight", | |
| "ensure_alternating_roles", | |
| "assistant_continue_message", | |
| "user_continue_message", | |
| "fallback_depth", | |
| "max_fallbacks", | |
| "max_budget", | |
| "budget_duration", | |
| "use_in_pass_through", | |
| "merge_reasoning_content_in_choices", | |
| "litellm_credential_name", | |
| "allowed_openai_params", | |
| "litellm_session_id", | |
| ] + list(StandardCallbackDynamicParams.__annotations__.keys()) | |
| class KeyGenerationConfig(TypedDict, total=False): | |
| required_params: List[ | |
| str | |
| ] # specify params that must be present in the key generation request | |
| class TeamUIKeyGenerationConfig(KeyGenerationConfig): | |
| allowed_team_member_roles: List[str] | |
| class PersonalUIKeyGenerationConfig(KeyGenerationConfig): | |
| allowed_user_roles: List[str] | |
| class StandardKeyGenerationConfig(TypedDict, total=False): | |
| team_key_generation: TeamUIKeyGenerationConfig | |
| personal_key_generation: PersonalUIKeyGenerationConfig | |
| class BudgetConfig(BaseModel): | |
| max_budget: Optional[float] = None | |
| budget_duration: Optional[str] = None | |
| tpm_limit: Optional[int] = None | |
| rpm_limit: Optional[int] = None | |
| def __init__(self, **data: Any) -> None: | |
| # Map time_period to budget_duration if present | |
| if "time_period" in data: | |
| data["budget_duration"] = data.pop("time_period") | |
| # Map budget_limit to max_budget if present | |
| if "budget_limit" in data: | |
| data["max_budget"] = data.pop("budget_limit") | |
| super().__init__(**data) | |
| GenericBudgetConfigType = Dict[str, BudgetConfig] | |
| class LlmProviders(str, Enum): | |
| OPENAI = "openai" | |
| OPENAI_LIKE = "openai_like" # embedding only | |
| JINA_AI = "jina_ai" | |
| XAI = "xai" | |
| CUSTOM_OPENAI = "custom_openai" | |
| TEXT_COMPLETION_OPENAI = "text-completion-openai" | |
| COHERE = "cohere" | |
| COHERE_CHAT = "cohere_chat" | |
| CLARIFAI = "clarifai" | |
| ANTHROPIC = "anthropic" | |
| ANTHROPIC_TEXT = "anthropic_text" | |
| REPLICATE = "replicate" | |
| HUGGINGFACE = "huggingface" | |
| TOGETHER_AI = "together_ai" | |
| OPENROUTER = "openrouter" | |
| VERTEX_AI = "vertex_ai" | |
| VERTEX_AI_BETA = "vertex_ai_beta" | |
| GEMINI = "gemini" | |
| AI21 = "ai21" | |
| BASETEN = "baseten" | |
| AZURE = "azure" | |
| AZURE_TEXT = "azure_text" | |
| AZURE_AI = "azure_ai" | |
| SAGEMAKER = "sagemaker" | |
| SAGEMAKER_CHAT = "sagemaker_chat" | |
| BEDROCK = "bedrock" | |
| VLLM = "vllm" | |
| NLP_CLOUD = "nlp_cloud" | |
| PETALS = "petals" | |
| OOBABOOGA = "oobabooga" | |
| OLLAMA = "ollama" | |
| OLLAMA_CHAT = "ollama_chat" | |
| DEEPINFRA = "deepinfra" | |
| PERPLEXITY = "perplexity" | |
| MISTRAL = "mistral" | |
| GROQ = "groq" | |
| NVIDIA_NIM = "nvidia_nim" | |
| CEREBRAS = "cerebras" | |
| AI21_CHAT = "ai21_chat" | |
| VOLCENGINE = "volcengine" | |
| CODESTRAL = "codestral" | |
| TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" | |
| DEEPSEEK = "deepseek" | |
| SAMBANOVA = "sambanova" | |
| MARITALK = "maritalk" | |
| VOYAGE = "voyage" | |
| CLOUDFLARE = "cloudflare" | |
| XINFERENCE = "xinference" | |
| FIREWORKS_AI = "fireworks_ai" | |
| FRIENDLIAI = "friendliai" | |
| WATSONX = "watsonx" | |
| WATSONX_TEXT = "watsonx_text" | |
| TRITON = "triton" | |
| PREDIBASE = "predibase" | |
| DATABRICKS = "databricks" | |
| EMPOWER = "empower" | |
| GITHUB = "github" | |
| CUSTOM = "custom" | |
| LITELLM_PROXY = "litellm_proxy" | |
| HOSTED_VLLM = "hosted_vllm" | |
| LLAMAFILE = "llamafile" | |
| LM_STUDIO = "lm_studio" | |
| GALADRIEL = "galadriel" | |
| INFINITY = "infinity" | |
| DEEPGRAM = "deepgram" | |
| AIOHTTP_OPENAI = "aiohttp_openai" | |
| LANGFUSE = "langfuse" | |
| HUMANLOOP = "humanloop" | |
| TOPAZ = "topaz" | |
| ASSEMBLYAI = "assemblyai" | |
| SNOWFLAKE = "snowflake" | |
| # Create a set of all provider values for quick lookup | |
| LlmProvidersSet = {provider.value for provider in LlmProviders} | |
| class LiteLLMLoggingBaseClass: | |
| """ | |
| Base class for logging pre and post call | |
| Meant to simplify type checking for logging obj. | |
| """ | |
| def pre_call(self, input, api_key, model=None, additional_args={}): | |
| pass | |
| def post_call( | |
| self, original_response, input=None, api_key=None, additional_args={} | |
| ): | |
| pass | |
| class CustomHuggingfaceTokenizer(TypedDict): | |
| identifier: str | |
| revision: str # usually 'main' | |
| auth_token: Optional[str] | |
| class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum): | |
| """ | |
| Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint. | |
| """ | |
| OPENAI = LlmProviders.OPENAI.value | |
| TOPAZ = LlmProviders.TOPAZ.value | |
| class HttpHandlerRequestFields(TypedDict, total=False): | |
| data: dict # request body | |
| params: dict # query params | |
| files: dict # file uploads | |
| content: Any # raw content | |
| class ProviderSpecificHeader(TypedDict): | |
| custom_llm_provider: str | |
| extra_headers: dict | |
| class SelectTokenizerResponse(TypedDict): | |
| type: Literal["openai_tokenizer", "huggingface_tokenizer"] | |
| tokenizer: Any | |
| class LiteLLMBatch(Batch): | |
| _hidden_params: dict = {} | |
| usage: Optional[Usage] = None | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class LiteLLMRealtimeStreamLoggingObject(LiteLLMPydanticObjectBase): | |
| results: OpenAIRealtimeStreamList | |
| usage: Usage | |
| _hidden_params: dict = {} | |
| def __contains__(self, key): | |
| # Define custom behavior for the 'in' operator | |
| return hasattr(self, key) | |
| def get(self, key, default=None): | |
| # Custom .get() method to access attributes with a default value if the attribute doesn't exist | |
| return getattr(self, key, default) | |
| def __getitem__(self, key): | |
| # Allow dictionary-style access to attributes | |
| return getattr(self, key) | |
| def json(self, **kwargs): # type: ignore | |
| try: | |
| return self.model_dump() # noqa | |
| except Exception: | |
| # if using pydantic v1 | |
| return self.dict() | |
| class RawRequestTypedDict(TypedDict, total=False): | |
| raw_request_api_base: Optional[str] | |
| raw_request_body: Optional[dict] | |
| raw_request_headers: Optional[dict] | |
| error: Optional[str] | |
| class CredentialBase(BaseModel): | |
| credential_name: str | |
| credential_info: dict | |
| class CredentialItem(CredentialBase): | |
| credential_values: dict | |
| class CreateCredentialItem(CredentialBase): | |
| credential_values: Optional[dict] = None | |
| model_id: Optional[str] = None | |
| def check_credential_params(cls, values): | |
| if not values.get("credential_values") and not values.get("model_id"): | |
| raise ValueError("Either credential_values or model_id must be set") | |
| return values | |
| class ExtractedFileData(TypedDict): | |
| """ | |
| TypedDict for storing processed file data | |
| Attributes: | |
| filename: Name of the file if provided | |
| content: The file content in bytes | |
| content_type: MIME type of the file | |
| headers: Any additional headers for the file | |
| """ | |
| filename: Optional[str] | |
| content: bytes | |
| content_type: Optional[str] | |
| headers: Mapping[str, str] | |
| class SpecialEnums(Enum): | |
| LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy" | |
| LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}" | |
| LITELLM_MANAGED_RESPONSE_COMPLETE_STR = ( | |
| "litellm:custom_llm_provider:{};model_id:{};response_id:{}" | |
| ) | |
| LLMResponseTypes = Union[ | |
| ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject | |
| ] | |
| class DynamicPromptManagementParamLiteral(str, Enum): | |
| """ | |
| If any of these params are passed, the user is trying to use dynamic prompt management | |
| """ | |
| CACHE_CONTROL_INJECTION_POINTS = "cache_control_injection_points" | |
| KNOWLEDGE_BASES = "knowledge_bases" | |
| VECTOR_STORE_IDS = "vector_store_ids" | |
| def list_all_params(cls): | |
| return [param.value for param in cls] | |