Spaces:
Sleeping
Sleeping
from typing import TYPE_CHECKING, Any, Optional, Union | |
from litellm._logging import verbose_router_logger | |
from litellm.constants import MAX_EXCEPTION_MESSAGE_LENGTH | |
from litellm.router_utils.cooldown_handlers import ( | |
_async_get_cooldown_deployments_with_debug_info, | |
) | |
from litellm.types.integrations.slack_alerting import AlertType | |
from litellm.types.router import RouterRateLimitError | |
if TYPE_CHECKING: | |
from opentelemetry.trace import Span as _Span | |
from litellm.router import Router as _Router | |
LitellmRouter = _Router | |
Span = Union[_Span, Any] | |
else: | |
LitellmRouter = Any | |
Span = Any | |
async def send_llm_exception_alert( | |
litellm_router_instance: LitellmRouter, | |
request_kwargs: dict, | |
error_traceback_str: str, | |
original_exception, | |
): | |
""" | |
Only runs if router.slack_alerting_logger is set | |
Sends a Slack / MS Teams alert for the LLM API call failure. Only if router.slack_alerting_logger is set. | |
Parameters: | |
litellm_router_instance (_Router): The LitellmRouter instance. | |
original_exception (Any): The original exception that occurred. | |
Returns: | |
None | |
""" | |
if litellm_router_instance is None: | |
return | |
if not hasattr(litellm_router_instance, "slack_alerting_logger"): | |
return | |
if litellm_router_instance.slack_alerting_logger is None: | |
return | |
if "proxy_server_request" in request_kwargs: | |
# Do not send any alert if it's a request from litellm proxy server request | |
# the proxy is already instrumented to send LLM API call failures | |
return | |
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None) | |
exception_str = str(original_exception) | |
if litellm_debug_info is not None: | |
exception_str += litellm_debug_info | |
exception_str += f"\n\n{error_traceback_str[:MAX_EXCEPTION_MESSAGE_LENGTH]}" | |
await litellm_router_instance.slack_alerting_logger.send_alert( | |
message=f"LLM API call failed: `{exception_str}`", | |
level="High", | |
alert_type=AlertType.llm_exceptions, | |
alerting_metadata={}, | |
) | |
async def async_raise_no_deployment_exception( | |
litellm_router_instance: LitellmRouter, model: str, parent_otel_span: Optional[Span] | |
): | |
""" | |
Raises a RouterRateLimitError if no deployment is found for the given model. | |
""" | |
verbose_router_logger.info( | |
f"get_available_deployment for model: {model}, No deployment available" | |
) | |
model_ids = litellm_router_instance.get_model_ids(model_name=model) | |
_cooldown_time = litellm_router_instance.cooldown_cache.get_min_cooldown( | |
model_ids=model_ids, parent_otel_span=parent_otel_span | |
) | |
_cooldown_list = await _async_get_cooldown_deployments_with_debug_info( | |
litellm_router_instance=litellm_router_instance, | |
parent_otel_span=parent_otel_span, | |
) | |
return RouterRateLimitError( | |
model=model, | |
cooldown_time=_cooldown_time, | |
enable_pre_call_checks=litellm_router_instance.enable_pre_call_checks, | |
cooldown_list=_cooldown_list, | |
) | |