Spaces:

Shyamnath
/

inferencing-llm

Paused

App Files Files Community

inferencing-llm / litellm /types /utils.py

Shyamnath

Push core package and essential files

469eae6 7 months ago

raw

history blame

71.7 kB

	import json
	import time
	import uuid
	from enum import Enum
	from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union

	from aiohttp import FormData
	from openai._models import BaseModel as OpenAIObject
	from openai.types.audio.transcription_create_params import FileTypes # type: ignore
	from openai.types.chat.chat_completion import ChatCompletion
	from openai.types.completion_usage import (
	CompletionTokensDetails,
	CompletionUsage,
	PromptTokensDetails,
	)
	from openai.types.moderation import (
	Categories,
	CategoryAppliedInputTypes,
	CategoryScores,
	)
	from openai.types.moderation_create_response import Moderation, ModerationCreateResponse
	from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
	from typing_extensions import Callable, Dict, Required, TypedDict, override

	import litellm

	from ..litellm_core_utils.core_helpers import map_finish_reason
	from .guardrails import GuardrailEventHooks
	from .llms.openai import (
	Batch,
	ChatCompletionAnnotation,
	ChatCompletionRedactedThinkingBlock,
	ChatCompletionThinkingBlock,
	ChatCompletionToolCallChunk,
	ChatCompletionUsageBlock,
	FileSearchTool,
	OpenAIChatCompletionChunk,
	OpenAIFileObject,
	OpenAIRealtimeStreamList,
	WebSearchOptions,
	)
	from .rerank import RerankResponse


	def _generate_id(): # private helper function
	return "chatcmpl-" + str(uuid.uuid4())


	class LiteLLMPydanticObjectBase(BaseModel):
	"""
	Implements default functions, all pydantic objects should have.
	"""

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump(**kwargs) # noqa
	except Exception:
	# if using pydantic v1
	return self.dict(**kwargs)

	def fields_set(self):
	try:
	return self.model_fields_set # noqa
	except Exception:
	# if using pydantic v1
	return self.__fields_set__

	model_config = ConfigDict(protected_namespaces=())


	class LiteLLMCommonStrings(Enum):
	redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
	llm_provider_not_provided = "Unmapped LLM provider for this endpoint. You passed model={model}, custom_llm_provider={custom_llm_provider}. Check supported provider and route: https://docs.litellm.ai/docs/providers"


	SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]


	class CostPerToken(TypedDict):
	input_cost_per_token: float
	output_cost_per_token: float


	class ProviderField(TypedDict):
	field_name: str
	field_type: Literal["string"]
	field_description: str
	field_value: str


	class ProviderSpecificModelInfo(TypedDict, total=False):
	supports_system_messages: Optional[bool]
	supports_response_schema: Optional[bool]
	supports_vision: Optional[bool]
	supports_function_calling: Optional[bool]
	supports_tool_choice: Optional[bool]
	supports_assistant_prefill: Optional[bool]
	supports_prompt_caching: Optional[bool]
	supports_audio_input: Optional[bool]
	supports_embedding_image_input: Optional[bool]
	supports_audio_output: Optional[bool]
	supports_pdf_input: Optional[bool]
	supports_native_streaming: Optional[bool]
	supports_parallel_function_calling: Optional[bool]
	supports_web_search: Optional[bool]
	supports_reasoning: Optional[bool]


	class SearchContextCostPerQuery(TypedDict, total=False):
	search_context_size_low: float
	search_context_size_medium: float
	search_context_size_high: float


	class ModelInfoBase(ProviderSpecificModelInfo, total=False):
	key: Required[str] # the key in litellm.model_cost which is returned

	max_tokens: Required[Optional[int]]
	max_input_tokens: Required[Optional[int]]
	max_output_tokens: Required[Optional[int]]
	input_cost_per_token: Required[float]
	cache_creation_input_token_cost: Optional[float]
	cache_read_input_token_cost: Optional[float]
	input_cost_per_character: Optional[float] # only for vertex ai models
	input_cost_per_audio_token: Optional[float]
	input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
	input_cost_per_token_above_200k_tokens: Optional[
	float
	] # only for vertex ai gemini-2.5-pro models
	input_cost_per_character_above_128k_tokens: Optional[
	float
	] # only for vertex ai models
	input_cost_per_query: Optional[float] # only for rerank models
	input_cost_per_image: Optional[float] # only for vertex ai models
	input_cost_per_audio_per_second: Optional[float] # only for vertex ai models
	input_cost_per_video_per_second: Optional[float] # only for vertex ai models
	input_cost_per_second: Optional[float] # for OpenAI Speech models
	input_cost_per_token_batches: Optional[float]
	output_cost_per_token_batches: Optional[float]
	output_cost_per_token: Required[float]
	output_cost_per_character: Optional[float] # only for vertex ai models
	output_cost_per_audio_token: Optional[float]
	output_cost_per_token_above_128k_tokens: Optional[
	float
	] # only for vertex ai models
	output_cost_per_token_above_200k_tokens: Optional[
	float
	] # only for vertex ai gemini-2.5-pro models
	output_cost_per_character_above_128k_tokens: Optional[
	float
	] # only for vertex ai models
	output_cost_per_image: Optional[float]
	output_vector_size: Optional[int]
	output_cost_per_reasoning_token: Optional[float]
	output_cost_per_video_per_second: Optional[float] # only for vertex ai models
	output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
	output_cost_per_second: Optional[float] # for OpenAI Speech models
	search_context_cost_per_query: Optional[
	SearchContextCostPerQuery
	] # Cost for using web search tool

	litellm_provider: Required[str]
	mode: Required[
	Literal[
	"completion", "embedding", "image_generation", "chat", "audio_transcription"
	]
	]
	tpm: Optional[int]
	rpm: Optional[int]


	class ModelInfo(ModelInfoBase, total=False):
	"""
	Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
	"""

	supported_openai_params: Required[Optional[List[str]]]


	class GenericStreamingChunk(TypedDict, total=False):
	text: Required[str]
	tool_use: Optional[ChatCompletionToolCallChunk]
	is_finished: Required[bool]
	finish_reason: Required[str]
	usage: Required[Optional[ChatCompletionUsageBlock]]
	index: int

	# use this dict if you want to return any provider specific fields in the response
	provider_specific_fields: Optional[Dict[str, Any]]


	from enum import Enum


	class CallTypes(Enum):
	embedding = "embedding"
	aembedding = "aembedding"
	completion = "completion"
	acompletion = "acompletion"
	atext_completion = "atext_completion"
	text_completion = "text_completion"
	image_generation = "image_generation"
	aimage_generation = "aimage_generation"
	moderation = "moderation"
	amoderation = "amoderation"
	atranscription = "atranscription"
	transcription = "transcription"
	aspeech = "aspeech"
	speech = "speech"
	rerank = "rerank"
	arerank = "arerank"
	arealtime = "_arealtime"
	create_batch = "create_batch"
	acreate_batch = "acreate_batch"
	aretrieve_batch = "aretrieve_batch"
	retrieve_batch = "retrieve_batch"
	pass_through = "pass_through_endpoint"
	anthropic_messages = "anthropic_messages"
	get_assistants = "get_assistants"
	aget_assistants = "aget_assistants"
	create_assistants = "create_assistants"
	acreate_assistants = "acreate_assistants"
	delete_assistant = "delete_assistant"
	adelete_assistant = "adelete_assistant"
	acreate_thread = "acreate_thread"
	create_thread = "create_thread"
	aget_thread = "aget_thread"
	get_thread = "get_thread"
	a_add_message = "a_add_message"
	add_message = "add_message"
	aget_messages = "aget_messages"
	get_messages = "get_messages"
	arun_thread = "arun_thread"
	run_thread = "run_thread"
	arun_thread_stream = "arun_thread_stream"
	run_thread_stream = "run_thread_stream"
	afile_retrieve = "afile_retrieve"
	file_retrieve = "file_retrieve"
	afile_delete = "afile_delete"
	file_delete = "file_delete"
	afile_list = "afile_list"
	file_list = "file_list"
	acreate_file = "acreate_file"
	create_file = "create_file"
	afile_content = "afile_content"
	file_content = "file_content"
	create_fine_tuning_job = "create_fine_tuning_job"
	acreate_fine_tuning_job = "acreate_fine_tuning_job"
	acancel_fine_tuning_job = "acancel_fine_tuning_job"
	cancel_fine_tuning_job = "cancel_fine_tuning_job"
	alist_fine_tuning_jobs = "alist_fine_tuning_jobs"
	list_fine_tuning_jobs = "list_fine_tuning_jobs"
	aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job"
	retrieve_fine_tuning_job = "retrieve_fine_tuning_job"
	responses = "responses"
	aresponses = "aresponses"


	CallTypesLiteral = Literal[
	"embedding",
	"aembedding",
	"completion",
	"acompletion",
	"atext_completion",
	"text_completion",
	"image_generation",
	"aimage_generation",
	"moderation",
	"amoderation",
	"atranscription",
	"transcription",
	"aspeech",
	"speech",
	"rerank",
	"arerank",
	"_arealtime",
	"create_batch",
	"acreate_batch",
	"pass_through_endpoint",
	"anthropic_messages",
	"aretrieve_batch",
	"retrieve_batch",
	]


	class PassthroughCallTypes(Enum):
	passthrough_image_generation = "passthrough-image-generation"


	class TopLogprob(OpenAIObject):
	token: str
	"""The token."""

	bytes: Optional[List[int]] = None
	"""A list of integers representing the UTF-8 bytes representation of the token.

	Useful in instances where characters are represented by multiple tokens and
	their byte representations must be combined to generate the correct text
	representation. Can be `null` if there is no bytes representation for the token.
	"""

	logprob: float
	"""The log probability of this token, if it is within the top 20 most likely
	tokens.

	Otherwise, the value `-9999.0` is used to signify that the token is very
	unlikely.
	"""


	class ChatCompletionTokenLogprob(OpenAIObject):
	token: str
	"""The token."""

	bytes: Optional[List[int]] = None
	"""A list of integers representing the UTF-8 bytes representation of the token.

	Useful in instances where characters are represented by multiple tokens and
	their byte representations must be combined to generate the correct text
	representation. Can be `null` if there is no bytes representation for the token.
	"""

	logprob: float
	"""The log probability of this token, if it is within the top 20 most likely
	tokens.

	Otherwise, the value `-9999.0` is used to signify that the token is very
	unlikely.
	"""

	top_logprobs: List[TopLogprob]
	"""List of the most likely tokens and their log probability, at this token
	position.

	In rare cases, there may be fewer than the number of requested `top_logprobs`
	returned.
	"""

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)


	class ChoiceLogprobs(OpenAIObject):
	content: Optional[List[ChatCompletionTokenLogprob]] = None
	"""A list of message content tokens with log probability information."""

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)


	class FunctionCall(OpenAIObject):
	arguments: str
	name: Optional[str] = None


	class Function(OpenAIObject):
	arguments: str
	name: Optional[
	str
	] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)

	def __init__(
	self,
	arguments: Optional[Union[Dict, str]] = None,
	name: Optional[str] = None,
	**params,
	):
	if arguments is None:
	if params.get("parameters", None) is not None and isinstance(
	params["parameters"], dict
	):
	arguments = json.dumps(params["parameters"])
	params.pop("parameters")
	else:
	arguments = ""
	elif isinstance(arguments, Dict):
	arguments = json.dumps(arguments)
	else:
	arguments = arguments

	name = name

	# Build a dictionary with the structure your BaseModel expects
	data = {"arguments": arguments, "name": name}

	super(Function, self).__init__(**data)

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	class ChatCompletionDeltaToolCall(OpenAIObject):
	id: Optional[str] = None
	function: Function
	type: Optional[str] = None
	index: int


	class HiddenParams(OpenAIObject):
	original_response: Optional[Union[str, Any]] = None
	model_id: Optional[str] = None # used in Router for individual deployments
	api_base: Optional[str] = None # returns api base used for making completion call

	model_config = ConfigDict(extra="allow", protected_namespaces=())

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class ChatCompletionMessageToolCall(OpenAIObject):
	def __init__(
	self,
	function: Union[Dict, Function],
	id: Optional[str] = None,
	type: Optional[str] = None,
	**params,
	):
	super(ChatCompletionMessageToolCall, self).__init__(**params)
	if isinstance(function, Dict):
	self.function = Function(**function)
	else:
	self.function = function

	if id is not None:
	self.id = id
	else:
	self.id = f"{uuid.uuid4()}"

	if type is not None:
	self.type = type
	else:
	self.type = "function"

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	from openai.types.chat.chat_completion_audio import ChatCompletionAudio


	class ChatCompletionAudioResponse(ChatCompletionAudio):
	def __init__(
	self,
	data: str,
	expires_at: int,
	transcript: str,
	id: Optional[str] = None,
	**params,
	):
	if id is not None:
	id = id
	else:
	id = f"{uuid.uuid4()}"
	super(ChatCompletionAudioResponse, self).__init__(
	data=data, expires_at=expires_at, transcript=transcript, id=id, **params
	)

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	"""
	Reference:
	ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
	"""


	def add_provider_specific_fields(
	object: BaseModel, provider_specific_fields: Optional[Dict[str, Any]]
	):
	if not provider_specific_fields: # set if provider_specific_fields is not empty
	return
	setattr(object, "provider_specific_fields", provider_specific_fields)


	class Message(OpenAIObject):
	content: Optional[str]
	role: Literal["assistant", "user", "system", "tool", "function"]
	tool_calls: Optional[List[ChatCompletionMessageToolCall]]
	function_call: Optional[FunctionCall]
	audio: Optional[ChatCompletionAudioResponse] = None
	reasoning_content: Optional[str] = None
	thinking_blocks: Optional[
	List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
	] = None
	provider_specific_fields: Optional[Dict[str, Any]] = Field(
	default=None, exclude=True
	)
	annotations: Optional[List[ChatCompletionAnnotation]] = None

	def __init__(
	self,
	content: Optional[str] = None,
	role: Literal["assistant"] = "assistant",
	function_call=None,
	tool_calls: Optional[list] = None,
	audio: Optional[ChatCompletionAudioResponse] = None,
	provider_specific_fields: Optional[Dict[str, Any]] = None,
	reasoning_content: Optional[str] = None,
	thinking_blocks: Optional[
	List[
	Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
	]
	] = None,
	annotations: Optional[List[ChatCompletionAnnotation]] = None,
	**params,
	):
	init_values: Dict[str, Any] = {
	"content": content,
	"role": role or "assistant", # handle null input
	"function_call": (
	FunctionCall(**function_call) if function_call is not None else None
	),
	"tool_calls": (
	[
	(
	ChatCompletionMessageToolCall(**tool_call)
	if isinstance(tool_call, dict)
	else tool_call
	)
	for tool_call in tool_calls
	]
	if tool_calls is not None and len(tool_calls) > 0
	else None
	),
	}

	if audio is not None:
	init_values["audio"] = audio

	if thinking_blocks is not None:
	init_values["thinking_blocks"] = thinking_blocks

	if annotations is not None:
	init_values["annotations"] = annotations

	if reasoning_content is not None:
	init_values["reasoning_content"] = reasoning_content

	super(Message, self).__init__(
	**init_values, # type: ignore
	**params,
	)

	if audio is None:
	# delete audio from self
	# OpenAI compatible APIs like mistral API will raise an error if audio is passed in
	del self.audio

	if annotations is None:
	# ensure default response matches OpenAI spec
	# Some OpenAI compatible APIs raise an error if annotations are passed in
	del self.annotations

	if reasoning_content is None:
	# ensure default response matches OpenAI spec
	del self.reasoning_content

	if thinking_blocks is None:
	# ensure default response matches OpenAI spec
	del self.thinking_blocks

	add_provider_specific_fields(self, provider_specific_fields)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class Delta(OpenAIObject):
	reasoning_content: Optional[str] = None
	thinking_blocks: Optional[
	List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
	] = None
	provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)

	def __init__(
	self,
	content=None,
	role=None,
	function_call=None,
	tool_calls=None,
	audio: Optional[ChatCompletionAudioResponse] = None,
	reasoning_content: Optional[str] = None,
	thinking_blocks: Optional[
	List[
	Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
	]
	] = None,
	annotations: Optional[List[ChatCompletionAnnotation]] = None,
	**params,
	):
	super(Delta, self).__init__(**params)
	add_provider_specific_fields(self, params.get("provider_specific_fields", {}))
	self.content = content
	self.role = role
	# Set default values and correct types
	self.function_call: Optional[Union[FunctionCall, Any]] = None
	self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
	self.audio: Optional[ChatCompletionAudioResponse] = None
	self.annotations: Optional[List[ChatCompletionAnnotation]] = None

	if reasoning_content is not None:
	self.reasoning_content = reasoning_content
	else:
	# ensure default response matches OpenAI spec
	del self.reasoning_content

	if thinking_blocks is not None:
	self.thinking_blocks = thinking_blocks
	else:
	# ensure default response matches OpenAI spec
	del self.thinking_blocks

	# Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec)
	if annotations is not None:
	self.annotations = annotations
	else:
	del self.annotations

	if function_call is not None and isinstance(function_call, dict):
	self.function_call = FunctionCall(**function_call)
	else:
	self.function_call = function_call
	if tool_calls is not None and isinstance(tool_calls, list):
	self.tool_calls = []
	for tool_call in tool_calls:
	if isinstance(tool_call, dict):
	if tool_call.get("index", None) is None:
	tool_call["index"] = 0
	self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
	elif isinstance(tool_call, ChatCompletionDeltaToolCall):
	self.tool_calls.append(tool_call)
	else:
	self.tool_calls = tool_calls

	self.audio = audio

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	class Choices(OpenAIObject):
	def __init__(
	self,
	finish_reason=None,
	index=0,
	message: Optional[Union[Message, dict]] = None,
	logprobs: Optional[Union[ChoiceLogprobs, dict, Any]] = None,
	enhancements=None,
	**params,
	):
	super(Choices, self).__init__(**params)
	if finish_reason is not None:
	self.finish_reason = map_finish_reason(
	finish_reason
	) # set finish_reason for all responses
	else:
	self.finish_reason = "stop"
	self.index = index
	if message is None:
	self.message = Message()
	else:
	if isinstance(message, Message):
	self.message = message
	elif isinstance(message, dict):
	self.message = Message(**message)
	if logprobs is not None:
	if isinstance(logprobs, dict):
	self.logprobs = ChoiceLogprobs(**logprobs)
	else:
	self.logprobs = logprobs
	if enhancements is not None:
	self.enhancements = enhancements

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	class CompletionTokensDetailsWrapper(
	CompletionTokensDetails
	): # wrapper for older openai versions
	text_tokens: Optional[int] = None
	"""Text tokens generated by the model."""


	class PromptTokensDetailsWrapper(
	PromptTokensDetails
	): # wrapper for older openai versions
	text_tokens: Optional[int] = None
	"""Text tokens sent to the model."""

	image_tokens: Optional[int] = None
	"""Image tokens sent to the model."""

	character_count: Optional[int] = None
	"""Character count sent to the model. Used for Vertex AI multimodal embeddings."""

	image_count: Optional[int] = None
	"""Number of images sent to the model. Used for Vertex AI multimodal embeddings."""

	video_length_seconds: Optional[float] = None
	"""Length of videos sent to the model. Used for Vertex AI multimodal embeddings."""

	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)
	if self.character_count is None:
	del self.character_count
	if self.image_count is None:
	del self.image_count
	if self.video_length_seconds is None:
	del self.video_length_seconds


	class Usage(CompletionUsage):
	_cache_creation_input_tokens: int = PrivateAttr(
	0
	) # hidden param for prompt caching. Might change, once openai introduces their equivalent.
	_cache_read_input_tokens: int = PrivateAttr(
	0
	) # hidden param for prompt caching. Might change, once openai introduces their equivalent.

	def __init__(
	self,
	prompt_tokens: Optional[int] = None,
	completion_tokens: Optional[int] = None,
	total_tokens: Optional[int] = None,
	reasoning_tokens: Optional[int] = None,
	prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None,
	completion_tokens_details: Optional[
	Union[CompletionTokensDetailsWrapper, dict]
	] = None,
	**params,
	):
	# handle reasoning_tokens
	_completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None
	if reasoning_tokens:
	text_tokens = (
	completion_tokens - reasoning_tokens if completion_tokens else None
	)
	completion_tokens_details = CompletionTokensDetailsWrapper(
	reasoning_tokens=reasoning_tokens, text_tokens=text_tokens
	)

	# Ensure completion_tokens_details is properly handled
	if completion_tokens_details:
	if isinstance(completion_tokens_details, dict):
	_completion_tokens_details = CompletionTokensDetailsWrapper(
	**completion_tokens_details
	)
	elif isinstance(completion_tokens_details, CompletionTokensDetails):
	_completion_tokens_details = completion_tokens_details

	## DEEPSEEK MAPPING ##
	if "prompt_cache_hit_tokens" in params and isinstance(
	params["prompt_cache_hit_tokens"], int
	):
	if prompt_tokens_details is None:
	prompt_tokens_details = PromptTokensDetailsWrapper(
	cached_tokens=params["prompt_cache_hit_tokens"]
	)

	## ANTHROPIC MAPPING ##
	if "cache_read_input_tokens" in params and isinstance(
	params["cache_read_input_tokens"], int
	):
	if prompt_tokens_details is None:
	prompt_tokens_details = PromptTokensDetailsWrapper(
	cached_tokens=params["cache_read_input_tokens"]
	)

	# handle prompt_tokens_details
	_prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
	if prompt_tokens_details:
	if isinstance(prompt_tokens_details, dict):
	_prompt_tokens_details = PromptTokensDetailsWrapper(
	**prompt_tokens_details
	)
	elif isinstance(prompt_tokens_details, PromptTokensDetails):
	_prompt_tokens_details = prompt_tokens_details

	super().__init__(
	prompt_tokens=prompt_tokens or 0,
	completion_tokens=completion_tokens or 0,
	total_tokens=total_tokens or 0,
	completion_tokens_details=_completion_tokens_details or None,
	prompt_tokens_details=_prompt_tokens_details or None,
	)

	## ANTHROPIC MAPPING ##
	if "cache_creation_input_tokens" in params and isinstance(
	params["cache_creation_input_tokens"], int
	):
	self._cache_creation_input_tokens = params["cache_creation_input_tokens"]

	if "cache_read_input_tokens" in params and isinstance(
	params["cache_read_input_tokens"], int
	):
	self._cache_read_input_tokens = params["cache_read_input_tokens"]

	## DEEPSEEK MAPPING ##
	if "prompt_cache_hit_tokens" in params and isinstance(
	params["prompt_cache_hit_tokens"], int
	):
	self._cache_read_input_tokens = params["prompt_cache_hit_tokens"]

	for k, v in params.items():
	setattr(self, k, v)

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	class StreamingChoices(OpenAIObject):
	def __init__(
	self,
	finish_reason=None,
	index=0,
	delta: Optional[Delta] = None,
	logprobs=None,
	enhancements=None,
	**params,
	):
	# Fix Perplexity return both delta and message cause OpenWebUI repect text
	# https://github.com/BerriAI/litellm/issues/8455
	params.pop("message", None)
	super(StreamingChoices, self).__init__(**params)
	if finish_reason:
	self.finish_reason = map_finish_reason(finish_reason)
	else:
	self.finish_reason = None
	self.index = index
	if delta is not None:
	if isinstance(delta, Delta):
	self.delta = delta
	elif isinstance(delta, dict):
	self.delta = Delta(**delta)
	else:
	self.delta = Delta()
	if enhancements is not None:
	self.enhancements = enhancements

	if logprobs is not None and isinstance(logprobs, dict):
	self.logprobs = ChoiceLogprobs(**logprobs)
	else:
	self.logprobs = logprobs # type: ignore

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
	def __init__(self, **kwargs):
	new_choices = []
	for choice in kwargs["choices"]:
	new_choice = StreamingChoices(**choice).model_dump()
	new_choices.append(new_choice)
	kwargs["choices"] = new_choices

	super().__init__(**kwargs)


	from openai.types.chat import ChatCompletionChunk


	class ModelResponseBase(OpenAIObject):
	id: str
	"""A unique identifier for the completion."""

	created: int
	"""The Unix timestamp (in seconds) of when the completion was created."""

	model: Optional[str] = None
	"""The model used for completion."""

	object: str
	"""The object type, which is always "text_completion" """

	system_fingerprint: Optional[str] = None
	"""This fingerprint represents the backend configuration that the model runs with.

	Can be used in conjunction with the `seed` request parameter to understand when
	backend changes have been made that might impact determinism.
	"""

	_hidden_params: dict = {}

	_response_headers: Optional[dict] = None


	class ModelResponseStream(ModelResponseBase):
	choices: List[StreamingChoices]
	provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)

	def __init__(
	self,
	choices: Optional[
	Union[List[StreamingChoices], Union[StreamingChoices, dict, BaseModel]]
	] = None,
	id: Optional[str] = None,
	created: Optional[int] = None,
	provider_specific_fields: Optional[Dict[str, Any]] = None,
	**kwargs,
	):
	if choices is not None and isinstance(choices, list):
	new_choices = []
	for choice in choices:
	_new_choice = None
	if isinstance(choice, StreamingChoices):
	_new_choice = choice
	elif isinstance(choice, dict):
	_new_choice = StreamingChoices(**choice)
	elif isinstance(choice, BaseModel):
	_new_choice = StreamingChoices(**choice.model_dump())
	new_choices.append(_new_choice)
	kwargs["choices"] = new_choices
	else:
	kwargs["choices"] = [StreamingChoices()]

	if id is None:
	id = _generate_id()
	else:
	id = id
	if created is None:
	created = int(time.time())
	else:
	created = created

	if (
	"usage" in kwargs
	and kwargs["usage"] is not None
	and isinstance(kwargs["usage"], dict)
	):
	kwargs["usage"] = Usage(**kwargs["usage"])

	kwargs["id"] = id
	kwargs["created"] = created
	kwargs["object"] = "chat.completion.chunk"
	kwargs["provider_specific_fields"] = provider_specific_fields

	super().__init__(**kwargs)

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class ModelResponse(ModelResponseBase):
	choices: List[Union[Choices, StreamingChoices]]
	"""The list of completion choices the model generated for the input prompt."""

	def __init__(
	self,
	id=None,
	choices=None,
	created=None,
	model=None,
	object=None,
	system_fingerprint=None,
	usage=None,
	stream=None,
	stream_options=None,
	response_ms=None,
	hidden_params=None,
	_response_headers=None,
	**params,
	) -> None:
	if stream is not None and stream is True:
	object = "chat.completion.chunk"
	if choices is not None and isinstance(choices, list):
	new_choices = []
	for choice in choices:
	_new_choice = None
	if isinstance(choice, StreamingChoices):
	_new_choice = choice
	elif isinstance(choice, dict):
	_new_choice = StreamingChoices(**choice)
	elif isinstance(choice, BaseModel):
	_new_choice = StreamingChoices(**choice.model_dump())
	new_choices.append(_new_choice)
	choices = new_choices
	else:
	choices = [StreamingChoices()]
	else:
	object = "chat.completion"
	if choices is not None and isinstance(choices, list):
	new_choices = []
	for choice in choices:
	if isinstance(choice, Choices):
	_new_choice = choice # type: ignore
	elif isinstance(choice, dict):
	_new_choice = Choices(**choice) # type: ignore
	else:
	_new_choice = choice
	new_choices.append(_new_choice)
	choices = new_choices
	else:
	choices = [Choices()]
	if id is None:
	id = _generate_id()
	else:
	id = id
	if created is None:
	created = int(time.time())
	else:
	created = created
	model = model
	if usage is not None:
	if isinstance(usage, dict):
	usage = Usage(**usage)
	else:
	usage = usage
	elif stream is None or stream is False:
	usage = Usage()
	if hidden_params:
	self._hidden_params = hidden_params

	if _response_headers:
	self._response_headers = _response_headers

	init_values = {
	"id": id,
	"choices": choices,
	"created": created,
	"model": model,
	"object": object,
	"system_fingerprint": system_fingerprint,
	}

	if usage is not None:
	init_values["usage"] = usage

	super().__init__(
	**init_values,
	**params,
	)

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class Embedding(OpenAIObject):
	embedding: Union[list, str] = []
	index: int
	object: Literal["embedding"]

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	class EmbeddingResponse(OpenAIObject):
	model: Optional[str] = None
	"""The model used for embedding."""

	data: List
	"""The actual embedding value"""

	object: Literal["list"]
	"""The object type, which is always "list" """

	usage: Optional[Usage] = None
	"""Usage statistics for the embedding request."""

	_hidden_params: dict = {}
	_response_headers: Optional[Dict] = None
	_response_ms: Optional[float] = None

	def __init__(
	self,
	model: Optional[str] = None,
	usage: Optional[Usage] = None,
	response_ms=None,
	data: Optional[Union[List, List[Embedding]]] = None,
	hidden_params=None,
	_response_headers=None,
	**params,
	):
	object = "list"
	if response_ms:
	_response_ms = response_ms
	else:
	_response_ms = None
	if data:
	data = data
	else:
	data = []

	if usage:
	usage = usage
	else:
	usage = Usage()

	if _response_headers:
	self._response_headers = _response_headers

	model = model
	super().__init__(model=model, object=object, data=data, usage=usage) # type: ignore

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class Logprobs(OpenAIObject):
	text_offset: Optional[List[int]]
	token_logprobs: Optional[List[Union[float, None]]]
	tokens: Optional[List[str]]
	top_logprobs: Optional[List[Union[Dict[str, float], None]]]


	class TextChoices(OpenAIObject):
	def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
	super(TextChoices, self).__init__(**params)
	if finish_reason:
	self.finish_reason = map_finish_reason(finish_reason)
	else:
	self.finish_reason = None
	self.index = index
	if text is not None:
	self.text = text
	else:
	self.text = None
	if logprobs is None:
	self.logprobs = None
	else:
	if isinstance(logprobs, dict):
	self.logprobs = Logprobs(**logprobs)
	else:
	self.logprobs = logprobs

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class TextCompletionResponse(OpenAIObject):
	"""
	{
	"id": response["id"],
	"object": "text_completion",
	"created": response["created"],
	"model": response["model"],
	"choices": [
	{
	"text": response["choices"][0]["message"]["content"],
	"index": response["choices"][0]["index"],
	"logprobs": transformed_logprobs,
	"finish_reason": response["choices"][0]["finish_reason"]
	}
	],
	"usage": response["usage"]
	}
	"""

	id: str
	object: str
	created: int
	model: Optional[str]
	choices: List[TextChoices]
	usage: Optional[Usage]
	_response_ms: Optional[int] = None
	_hidden_params: HiddenParams

	def __init__(
	self,
	id=None,
	choices=None,
	created=None,
	model=None,
	usage=None,
	stream=False,
	response_ms=None,
	object=None,
	**params,
	):
	if stream:
	object = "text_completion.chunk"
	choices = [TextChoices()]
	else:
	object = "text_completion"
	if choices is not None and isinstance(choices, list):
	new_choices = []
	for choice in choices:
	_new_choice = None
	if isinstance(choice, TextChoices):
	_new_choice = choice
	elif isinstance(choice, dict):
	_new_choice = TextChoices(**choice)
	new_choices.append(_new_choice)
	choices = new_choices
	else:
	choices = [TextChoices()]
	if object is not None:
	object = object
	if id is None:
	id = _generate_id()
	else:
	id = id
	if created is None:
	created = int(time.time())
	else:
	created = created

	model = model
	if usage:
	usage = usage
	else:
	usage = Usage()

	super(TextCompletionResponse, self).__init__(
	id=id, # type: ignore
	object=object, # type: ignore
	created=created, # type: ignore
	model=model, # type: ignore
	choices=choices, # type: ignore
	usage=usage, # type: ignore
	**params,
	)

	if response_ms:
	self._response_ms = response_ms
	else:
	self._response_ms = None
	self._hidden_params = HiddenParams()

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)


	from openai.types.images_response import Image as OpenAIImage


	class ImageObject(OpenAIImage):
	"""
	Represents the url or the content of an image generated by the OpenAI API.

	Attributes:
	b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
	url: The URL of the generated image, if response_format is url (default).
	revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.

	https://platform.openai.com/docs/api-reference/images/object
	"""

	b64_json: Optional[str] = None
	url: Optional[str] = None
	revised_prompt: Optional[str] = None

	def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs):
	super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) # type: ignore

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	from openai.types.images_response import ImagesResponse as OpenAIImageResponse


	class ImageResponse(OpenAIImageResponse):
	_hidden_params: dict = {}
	usage: Usage

	def __init__(
	self,
	created: Optional[int] = None,
	data: Optional[List[ImageObject]] = None,
	response_ms=None,
	usage: Optional[Usage] = None,
	hidden_params: Optional[dict] = None,
	):
	if response_ms:
	_response_ms = response_ms
	else:
	_response_ms = None
	if data:
	data = data
	else:
	data = []

	if created:
	created = created
	else:
	created = int(time.time())

	_data: List[OpenAIImage] = []
	for d in data:
	if isinstance(d, dict):
	_data.append(ImageObject(**d))
	elif isinstance(d, BaseModel):
	_data.append(ImageObject(**d.model_dump()))
	_usage = usage or Usage(
	prompt_tokens=0,
	completion_tokens=0,
	total_tokens=0,
	)
	super().__init__(created=created, data=_data, usage=_usage) # type: ignore
	self._hidden_params = hidden_params or {}

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class TranscriptionResponse(OpenAIObject):
	text: Optional[str] = None

	_hidden_params: dict = {}
	_response_headers: Optional[dict] = None

	def __init__(self, text=None):
	super().__init__(text=text) # type: ignore

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def __setitem__(self, key, value):
	# Allow dictionary-style assignment of attributes
	setattr(self, key, value)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class GenericImageParsingChunk(TypedDict):
	type: str
	media_type: str
	data: str


	class ResponseFormatChunk(TypedDict, total=False):
	type: Required[Literal["json_object", "text"]]
	response_schema: dict


	class LoggedLiteLLMParams(TypedDict, total=False):
	force_timeout: Optional[float]
	custom_llm_provider: Optional[str]
	api_base: Optional[str]
	litellm_call_id: Optional[str]
	model_alias_map: Optional[dict]
	metadata: Optional[dict]
	model_info: Optional[dict]
	proxy_server_request: Optional[dict]
	acompletion: Optional[bool]
	preset_cache_key: Optional[str]
	no_log: Optional[bool]
	input_cost_per_second: Optional[float]
	input_cost_per_token: Optional[float]
	output_cost_per_token: Optional[float]
	output_cost_per_second: Optional[float]
	cooldown_time: Optional[float]


	class AdapterCompletionStreamWrapper:
	def __init__(self, completion_stream):
	self.completion_stream = completion_stream

	def __iter__(self):
	return self

	def __aiter__(self):
	return self

	def __next__(self):
	try:
	for chunk in self.completion_stream:
	if chunk == "None" or chunk is None:
	raise Exception
	return chunk
	raise StopIteration
	except StopIteration:
	raise StopIteration
	except Exception as e:
	print(f"AdapterCompletionStreamWrapper - {e}") # noqa

	async def __anext__(self):
	try:
	async for chunk in self.completion_stream:
	if chunk == "None" or chunk is None:
	raise Exception
	return chunk
	raise StopIteration
	except StopIteration:
	raise StopAsyncIteration


	class StandardLoggingUserAPIKeyMetadata(TypedDict):
	user_api_key_hash: Optional[str] # hash of the litellm virtual key used
	user_api_key_alias: Optional[str]
	user_api_key_org_id: Optional[str]
	user_api_key_team_id: Optional[str]
	user_api_key_user_id: Optional[str]
	user_api_key_user_email: Optional[str]
	user_api_key_team_alias: Optional[str]
	user_api_key_end_user_id: Optional[str]


	class StandardLoggingMCPToolCall(TypedDict, total=False):
	name: str
	"""
	Name of the tool to call
	"""
	arguments: dict
	"""
	Arguments to pass to the tool
	"""
	result: dict
	"""
	Result of the tool call
	"""

	mcp_server_name: Optional[str]
	"""
	Name of the MCP server that the tool call was made to
	"""

	mcp_server_logo_url: Optional[str]
	"""
	Optional logo URL of the MCP server that the tool call was made to

	(this is to render the logo on the logs page on litellm ui)
	"""


	class StandardBuiltInToolsParams(TypedDict, total=False):
	"""
	Standard built-in OpenAItools parameters

	This is used to calculate the cost of built-in tools, insert any standard built-in tools parameters here

	OpenAI charges users based on the `web_search_options` parameter
	"""

	web_search_options: Optional[WebSearchOptions]
	file_search: Optional[FileSearchTool]


	class StandardLoggingPromptManagementMetadata(TypedDict):
	prompt_id: str
	prompt_variables: Optional[dict]
	prompt_integration: str


	class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
	"""
	Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management
	"""

	spend_logs_metadata: Optional[
	dict
	] # special param to log k,v pairs to spendlogs for a call
	requester_ip_address: Optional[str]
	requester_metadata: Optional[dict]
	prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
	mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
	applied_guardrails: Optional[List[str]]
	usage_object: Optional[dict]


	class StandardLoggingAdditionalHeaders(TypedDict, total=False):
	x_ratelimit_limit_requests: int
	x_ratelimit_limit_tokens: int
	x_ratelimit_remaining_requests: int
	x_ratelimit_remaining_tokens: int


	class StandardLoggingHiddenParams(TypedDict):
	model_id: Optional[
	str
	] # id of the model in the router, separates multiple models with the same name but different credentials
	cache_key: Optional[str]
	api_base: Optional[str]
	response_cost: Optional[str]
	litellm_overhead_time_ms: Optional[float]
	additional_headers: Optional[StandardLoggingAdditionalHeaders]
	batch_models: Optional[List[str]]
	litellm_model_name: Optional[str] # the model name sent to the provider by litellm
	usage_object: Optional[dict]


	class StandardLoggingModelInformation(TypedDict):
	model_map_key: str
	model_map_value: Optional[ModelInfo]


	class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False):
	"""
	Debug information, if cost tracking fails.

	Avoid logging sensitive information like response or optional params
	"""

	error_str: Required[str]
	traceback_str: Required[str]
	model: str
	cache_hit: Optional[bool]
	custom_llm_provider: Optional[str]
	base_model: Optional[str]
	call_type: str
	custom_pricing: Optional[bool]


	class StandardLoggingPayloadErrorInformation(TypedDict, total=False):
	error_code: Optional[str]
	error_class: Optional[str]
	llm_provider: Optional[str]
	traceback: Optional[str]
	error_message: Optional[str]


	class StandardLoggingGuardrailInformation(TypedDict, total=False):
	guardrail_name: Optional[str]
	guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]]
	guardrail_response: Optional[Union[dict, str]]
	guardrail_status: Literal["success", "failure"]


	StandardLoggingPayloadStatus = Literal["success", "failure"]


	class StandardLoggingPayload(TypedDict):
	id: str
	trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
	call_type: str
	stream: Optional[bool]
	response_cost: float
	response_cost_failure_debug_info: Optional[
	StandardLoggingModelCostFailureDebugInformation
	]
	status: StandardLoggingPayloadStatus
	custom_llm_provider: Optional[str]
	total_tokens: int
	prompt_tokens: int
	completion_tokens: int
	startTime: float # Note: making this camelCase was a mistake, everything should be snake case
	endTime: float
	completionStartTime: float
	response_time: float
	model_map_information: StandardLoggingModelInformation
	model: str
	model_id: Optional[str]
	model_group: Optional[str]
	api_base: str
	metadata: StandardLoggingMetadata
	cache_hit: Optional[bool]
	cache_key: Optional[str]
	saved_cache_cost: float
	request_tags: list
	end_user: Optional[str]
	requester_ip_address: Optional[str]
	messages: Optional[Union[str, list, dict]]
	response: Optional[Union[str, list, dict]]
	error_str: Optional[str]
	error_information: Optional[StandardLoggingPayloadErrorInformation]
	model_parameters: dict
	hidden_params: StandardLoggingHiddenParams
	guardrail_information: Optional[StandardLoggingGuardrailInformation]
	standard_built_in_tools_params: Optional[StandardBuiltInToolsParams]


	from typing import AsyncIterator, Iterator


	class CustomStreamingDecoder:
	async def aiter_bytes(
	self, iterator: AsyncIterator[bytes]
	) -> AsyncIterator[
	Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]
	]:
	raise NotImplementedError

	def iter_bytes(
	self, iterator: Iterator[bytes]
	) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]:
	raise NotImplementedError


	class StandardPassThroughResponseObject(TypedDict):
	response: str


	OPENAI_RESPONSE_HEADERS = [
	"x-ratelimit-remaining-requests",
	"x-ratelimit-remaining-tokens",
	"x-ratelimit-limit-requests",
	"x-ratelimit-limit-tokens",
	"x-ratelimit-reset-requests",
	"x-ratelimit-reset-tokens",
	]


	class StandardCallbackDynamicParams(TypedDict, total=False):
	# Langfuse dynamic params
	langfuse_public_key: Optional[str]
	langfuse_secret: Optional[str]
	langfuse_secret_key: Optional[str]
	langfuse_host: Optional[str]

	# GCS dynamic params
	gcs_bucket_name: Optional[str]
	gcs_path_service_account: Optional[str]

	# Langsmith dynamic params
	langsmith_api_key: Optional[str]
	langsmith_project: Optional[str]
	langsmith_base_url: Optional[str]

	# Humanloop dynamic params
	humanloop_api_key: Optional[str]

	# Arize dynamic params
	arize_api_key: Optional[str]
	arize_space_key: Optional[str]

	# Logging settings
	turn_off_message_logging: Optional[bool] # when true will not log messages


	all_litellm_params = [
	"metadata",
	"litellm_metadata",
	"litellm_trace_id",
	"tags",
	"acompletion",
	"aimg_generation",
	"atext_completion",
	"text_completion",
	"caching",
	"mock_response",
	"mock_timeout",
	"disable_add_transform_inline_image_block",
	"api_key",
	"api_version",
	"prompt_id",
	"provider_specific_header",
	"prompt_variables",
	"api_base",
	"force_timeout",
	"logger_fn",
	"verbose",
	"custom_llm_provider",
	"model_file_id_mapping",
	"litellm_logging_obj",
	"litellm_call_id",
	"use_client",
	"id",
	"fallbacks",
	"azure",
	"headers",
	"model_list",
	"num_retries",
	"context_window_fallback_dict",
	"retry_policy",
	"retry_strategy",
	"roles",
	"final_prompt_value",
	"bos_token",
	"eos_token",
	"request_timeout",
	"complete_response",
	"self",
	"client",
	"rpm",
	"tpm",
	"max_parallel_requests",
	"input_cost_per_token",
	"output_cost_per_token",
	"input_cost_per_second",
	"output_cost_per_second",
	"hf_model_name",
	"model_info",
	"proxy_server_request",
	"preset_cache_key",
	"caching_groups",
	"ttl",
	"cache",
	"no-log",
	"base_model",
	"stream_timeout",
	"supports_system_message",
	"region_name",
	"allowed_model_region",
	"model_config",
	"fastest_response",
	"cooldown_time",
	"cache_key",
	"max_retries",
	"azure_ad_token_provider",
	"tenant_id",
	"client_id",
	"azure_username",
	"azure_password",
	"client_secret",
	"user_continue_message",
	"configurable_clientside_auth_params",
	"weight",
	"ensure_alternating_roles",
	"assistant_continue_message",
	"user_continue_message",
	"fallback_depth",
	"max_fallbacks",
	"max_budget",
	"budget_duration",
	"use_in_pass_through",
	"merge_reasoning_content_in_choices",
	"litellm_credential_name",
	"allowed_openai_params",
	"litellm_session_id",
	] + list(StandardCallbackDynamicParams.__annotations__.keys())


	class KeyGenerationConfig(TypedDict, total=False):
	required_params: List[
	str
	] # specify params that must be present in the key generation request


	class TeamUIKeyGenerationConfig(KeyGenerationConfig):
	allowed_team_member_roles: List[str]


	class PersonalUIKeyGenerationConfig(KeyGenerationConfig):
	allowed_user_roles: List[str]


	class StandardKeyGenerationConfig(TypedDict, total=False):
	team_key_generation: TeamUIKeyGenerationConfig
	personal_key_generation: PersonalUIKeyGenerationConfig


	class BudgetConfig(BaseModel):
	max_budget: Optional[float] = None
	budget_duration: Optional[str] = None
	tpm_limit: Optional[int] = None
	rpm_limit: Optional[int] = None

	def __init__(self, **data: Any) -> None:
	# Map time_period to budget_duration if present
	if "time_period" in data:
	data["budget_duration"] = data.pop("time_period")

	# Map budget_limit to max_budget if present
	if "budget_limit" in data:
	data["max_budget"] = data.pop("budget_limit")

	super().__init__(**data)


	GenericBudgetConfigType = Dict[str, BudgetConfig]


	class LlmProviders(str, Enum):
	OPENAI = "openai"
	OPENAI_LIKE = "openai_like" # embedding only
	JINA_AI = "jina_ai"
	XAI = "xai"
	CUSTOM_OPENAI = "custom_openai"
	TEXT_COMPLETION_OPENAI = "text-completion-openai"
	COHERE = "cohere"
	COHERE_CHAT = "cohere_chat"
	CLARIFAI = "clarifai"
	ANTHROPIC = "anthropic"
	ANTHROPIC_TEXT = "anthropic_text"
	REPLICATE = "replicate"
	HUGGINGFACE = "huggingface"
	TOGETHER_AI = "together_ai"
	OPENROUTER = "openrouter"
	VERTEX_AI = "vertex_ai"
	VERTEX_AI_BETA = "vertex_ai_beta"
	GEMINI = "gemini"
	AI21 = "ai21"
	BASETEN = "baseten"
	AZURE = "azure"
	AZURE_TEXT = "azure_text"
	AZURE_AI = "azure_ai"
	SAGEMAKER = "sagemaker"
	SAGEMAKER_CHAT = "sagemaker_chat"
	BEDROCK = "bedrock"
	VLLM = "vllm"
	NLP_CLOUD = "nlp_cloud"
	PETALS = "petals"
	OOBABOOGA = "oobabooga"
	OLLAMA = "ollama"
	OLLAMA_CHAT = "ollama_chat"
	DEEPINFRA = "deepinfra"
	PERPLEXITY = "perplexity"
	MISTRAL = "mistral"
	GROQ = "groq"
	NVIDIA_NIM = "nvidia_nim"
	CEREBRAS = "cerebras"
	AI21_CHAT = "ai21_chat"
	VOLCENGINE = "volcengine"
	CODESTRAL = "codestral"
	TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
	DEEPSEEK = "deepseek"
	SAMBANOVA = "sambanova"
	MARITALK = "maritalk"
	VOYAGE = "voyage"
	CLOUDFLARE = "cloudflare"
	XINFERENCE = "xinference"
	FIREWORKS_AI = "fireworks_ai"
	FRIENDLIAI = "friendliai"
	WATSONX = "watsonx"
	WATSONX_TEXT = "watsonx_text"
	TRITON = "triton"
	PREDIBASE = "predibase"
	DATABRICKS = "databricks"
	EMPOWER = "empower"
	GITHUB = "github"
	CUSTOM = "custom"
	LITELLM_PROXY = "litellm_proxy"
	HOSTED_VLLM = "hosted_vllm"
	LLAMAFILE = "llamafile"
	LM_STUDIO = "lm_studio"
	GALADRIEL = "galadriel"
	INFINITY = "infinity"
	DEEPGRAM = "deepgram"
	AIOHTTP_OPENAI = "aiohttp_openai"
	LANGFUSE = "langfuse"
	HUMANLOOP = "humanloop"
	TOPAZ = "topaz"
	ASSEMBLYAI = "assemblyai"
	SNOWFLAKE = "snowflake"


	# Create a set of all provider values for quick lookup
	LlmProvidersSet = {provider.value for provider in LlmProviders}


	class LiteLLMLoggingBaseClass:
	"""
	Base class for logging pre and post call

	Meant to simplify type checking for logging obj.
	"""

	def pre_call(self, input, api_key, model=None, additional_args={}):
	pass

	def post_call(
	self, original_response, input=None, api_key=None, additional_args={}
	):
	pass


	class CustomHuggingfaceTokenizer(TypedDict):
	identifier: str
	revision: str # usually 'main'
	auth_token: Optional[str]


	class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum):
	"""
	Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint.
	"""

	OPENAI = LlmProviders.OPENAI.value
	TOPAZ = LlmProviders.TOPAZ.value


	class HttpHandlerRequestFields(TypedDict, total=False):
	data: dict # request body
	params: dict # query params
	files: dict # file uploads
	content: Any # raw content


	class ProviderSpecificHeader(TypedDict):
	custom_llm_provider: str
	extra_headers: dict


	class SelectTokenizerResponse(TypedDict):
	type: Literal["openai_tokenizer", "huggingface_tokenizer"]
	tokenizer: Any


	class LiteLLMBatch(Batch):
	_hidden_params: dict = {}
	usage: Optional[Usage] = None

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class LiteLLMRealtimeStreamLoggingObject(LiteLLMPydanticObjectBase):
	results: OpenAIRealtimeStreamList
	usage: Usage
	_hidden_params: dict = {}

	def __contains__(self, key):
	# Define custom behavior for the 'in' operator
	return hasattr(self, key)

	def get(self, key, default=None):
	# Custom .get() method to access attributes with a default value if the attribute doesn't exist
	return getattr(self, key, default)

	def __getitem__(self, key):
	# Allow dictionary-style access to attributes
	return getattr(self, key)

	def json(self, **kwargs): # type: ignore
	try:
	return self.model_dump() # noqa
	except Exception:
	# if using pydantic v1
	return self.dict()


	class RawRequestTypedDict(TypedDict, total=False):
	raw_request_api_base: Optional[str]
	raw_request_body: Optional[dict]
	raw_request_headers: Optional[dict]
	error: Optional[str]


	class CredentialBase(BaseModel):
	credential_name: str
	credential_info: dict


	class CredentialItem(CredentialBase):
	credential_values: dict


	class CreateCredentialItem(CredentialBase):
	credential_values: Optional[dict] = None
	model_id: Optional[str] = None

	@model_validator(mode="before")
	@classmethod
	def check_credential_params(cls, values):
	if not values.get("credential_values") and not values.get("model_id"):
	raise ValueError("Either credential_values or model_id must be set")
	return values


	class ExtractedFileData(TypedDict):
	"""
	TypedDict for storing processed file data

	Attributes:
	filename: Name of the file if provided
	content: The file content in bytes
	content_type: MIME type of the file
	headers: Any additional headers for the file
	"""

	filename: Optional[str]
	content: bytes
	content_type: Optional[str]
	headers: Mapping[str, str]


	class SpecialEnums(Enum):
	LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
	LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"

	LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
	"litellm:custom_llm_provider:{};model_id:{};response_id:{}"
	)


	LLMResponseTypes = Union[
	ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject
	]


	class DynamicPromptManagementParamLiteral(str, Enum):
	"""
	If any of these params are passed, the user is trying to use dynamic prompt management
	"""

	CACHE_CONTROL_INJECTION_POINTS = "cache_control_injection_points"
	KNOWLEDGE_BASES = "knowledge_bases"
	VECTOR_STORE_IDS = "vector_store_ids"

	@classmethod
	def list_all_params(cls):
	return [param.value for param in cls]