Spaces:

DesertWolf
/

test3

Configuration error

App Files Files Community

test3 / tests /test_litellm /integrations /SlackAlerting /test_hanging_request_check.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 14 days ago

raw

history blame contribute delete

10.6 kB

	import json
	import os
	import sys
	from typing import Optional
	from unittest.mock import AsyncMock, MagicMock, patch

	import pytest

	# Adds the grandparent directory to sys.path to allow importing project modules
	sys.path.insert(0, os.path.abspath("../.."))

	from litellm.integrations.SlackAlerting.hanging_request_check import (
	AlertingHangingRequestCheck,
	)
	from litellm.types.integrations.slack_alerting import HangingRequestData


	class TestAlertingHangingRequestCheck:
	"""Test suite for AlertingHangingRequestCheck class"""

	@pytest.fixture
	def mock_slack_alerting(self):
	"""Create a mock SlackAlerting object for testing"""
	mock_slack = MagicMock()
	mock_slack.alerting_threshold = 300 # 5 minutes
	mock_slack.send_alert = AsyncMock()
	return mock_slack

	@pytest.fixture
	def hanging_request_checker(self, mock_slack_alerting):
	"""Create an AlertingHangingRequestCheck instance for testing"""
	return AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)

	@pytest.mark.asyncio
	async def test_init_creates_cache_with_correct_ttl(self, mock_slack_alerting):
	"""
	Test that initialization creates a hanging request cache with correct TTL.
	The TTL should be alerting_threshold + buffer time.
	"""
	checker = AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)

	# The cache should be created with TTL = alerting_threshold + buffer time
	expected_ttl = (
	mock_slack_alerting.alerting_threshold + 60
	) # HANGING_ALERT_BUFFER_TIME_SECONDS
	assert checker.hanging_request_cache.default_ttl == expected_ttl

	@pytest.mark.asyncio
	async def test_add_request_to_hanging_request_check_success(
	self, hanging_request_checker
	):
	"""
	Test successfully adding a request to the hanging request cache.
	Should extract metadata and store HangingRequestData in cache.
	"""
	request_data = {
	"litellm_call_id": "test_request_123",
	"model": "gpt-4",
	"deployment": {"litellm_params": {"api_base": "https://api.openai.com/v1"}},
	"metadata": {
	"user_api_key_alias": "test_key",
	"user_api_key_team_alias": "test_team",
	},
	}

	with patch("litellm.get_api_base", return_value="https://api.openai.com/v1"):
	await hanging_request_checker.add_request_to_hanging_request_check(
	request_data
	)

	# Verify the request was added to cache
	cached_data = (
	await hanging_request_checker.hanging_request_cache.async_get_cache(
	key="test_request_123"
	)
	)

	assert cached_data is not None
	assert isinstance(cached_data, HangingRequestData)
	assert cached_data.request_id == "test_request_123"
	assert cached_data.model == "gpt-4"
	assert cached_data.api_base == "https://api.openai.com/v1"

	@pytest.mark.asyncio
	async def test_add_request_to_hanging_request_check_none_request_data(
	self, hanging_request_checker
	):
	"""
	Test that passing None request_data returns early without error.
	Should handle gracefully when no request data is provided.
	"""
	result = await hanging_request_checker.add_request_to_hanging_request_check(
	None
	)
	assert result is None

	@pytest.mark.asyncio
	async def test_add_request_to_hanging_request_check_minimal_data(
	self, hanging_request_checker
	):
	"""
	Test adding request with minimal required data.
	Should handle cases where optional fields are missing.
	"""
	request_data = {
	"litellm_call_id": "minimal_request_456",
	"model": "gpt-3.5-turbo",
	}

	await hanging_request_checker.add_request_to_hanging_request_check(request_data)

	cached_data = (
	await hanging_request_checker.hanging_request_cache.async_get_cache(
	key="minimal_request_456"
	)
	)

	assert cached_data is not None
	assert cached_data.request_id == "minimal_request_456"
	assert cached_data.model == "gpt-3.5-turbo"
	assert cached_data.api_base is None
	assert cached_data.key_alias == ""
	assert cached_data.team_alias == ""

	@pytest.mark.asyncio
	async def test_send_hanging_request_alert(self, hanging_request_checker):
	"""
	Test sending a hanging request alert.
	Should format the alert message correctly and call slack alerting.
	"""
	hanging_request_data = HangingRequestData(
	request_id="test_hanging_request",
	model="gpt-4",
	api_base="https://api.openai.com/v1",
	key_alias="test_key",
	team_alias="test_team",
	)

	await hanging_request_checker.send_hanging_request_alert(hanging_request_data)

	# Verify slack alert was called
	hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()

	# Check the alert message format
	call_args = hanging_request_checker.slack_alerting_object.send_alert.call_args
	message = call_args[1]["message"]

	assert "Requests are hanging - 300s+ request time" in message
	assert "Request Model: `gpt-4`" in message
	assert "API Base: `https://api.openai.com/v1`" in message
	assert "Key Alias: `test_key`" in message
	assert "Team Alias: `test_team`" in message
	assert call_args[1]["level"] == "Medium"

	@pytest.mark.asyncio
	async def test_send_alerts_for_hanging_requests_no_proxy_logging(
	self, hanging_request_checker
	):
	"""
	Test send_alerts_for_hanging_requests when proxy_logging_obj.internal_usage_cache is None.
	Should return early without processing when internal usage cache is unavailable.
	"""
	with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
	mock_proxy.internal_usage_cache = None

	result = await hanging_request_checker.send_alerts_for_hanging_requests()
	assert result is None

	@pytest.mark.asyncio
	async def test_send_alerts_for_hanging_requests_with_completed_request(
	self, hanging_request_checker
	):
	"""
	Test send_alerts_for_hanging_requests when request has completed (not hanging).
	Should remove completed requests from cache and not send alerts.
	"""
	# Add a request to the hanging cache
	hanging_data = HangingRequestData(
	request_id="completed_request_789",
	model="gpt-4",
	api_base="https://api.openai.com/v1",
	)
	await hanging_request_checker.hanging_request_cache.async_set_cache(
	key="completed_request_789", value=hanging_data, ttl=300
	)

	with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
	# Mock internal usage cache to return a request status (meaning request completed)
	mock_internal_cache = AsyncMock()
	mock_internal_cache.async_get_cache.return_value = {"status": "success"}
	mock_proxy.internal_usage_cache = mock_internal_cache

	# Mock the cache method to return our test request
	hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
	AsyncMock(return_value=["completed_request_789"])
	)

	await hanging_request_checker.send_alerts_for_hanging_requests()

	# Verify no alert was sent since request completed
	hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()

	@pytest.mark.asyncio
	async def test_send_alerts_for_hanging_requests_with_actual_hanging_request(
	self, hanging_request_checker
	):
	"""
	Test send_alerts_for_hanging_requests when request is actually hanging.
	Should send alert for requests that haven't completed within threshold.
	"""
	# Add a hanging request to the cache
	hanging_data = HangingRequestData(
	request_id="hanging_request_999",
	model="gpt-4",
	api_base="https://api.openai.com/v1",
	key_alias="test_key",
	team_alias="test_team",
	)
	await hanging_request_checker.hanging_request_cache.async_set_cache(
	key="hanging_request_999", value=hanging_data, ttl=300
	)

	with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
	# Mock internal usage cache to return None (meaning request is still hanging)
	mock_internal_cache = AsyncMock()
	mock_internal_cache.async_get_cache.return_value = None
	mock_proxy.internal_usage_cache = mock_internal_cache

	# Mock the cache method to return our test request
	hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
	AsyncMock(return_value=["hanging_request_999"])
	)

	await hanging_request_checker.send_alerts_for_hanging_requests()

	# Verify alert was sent for hanging request
	hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()

	@pytest.mark.asyncio
	async def test_send_alerts_for_hanging_requests_with_missing_hanging_data(
	self, hanging_request_checker
	):
	"""
	Test send_alerts_for_hanging_requests when hanging request data is missing from cache.
	Should continue processing other requests when individual request data is missing.
	"""
	with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
	mock_internal_cache = AsyncMock()
	mock_proxy.internal_usage_cache = mock_internal_cache

	# Mock cache to return request ID but no data (simulating expired or missing data)
	hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
	AsyncMock(return_value=["missing_request_111"])
	)
	hanging_request_checker.hanging_request_cache.async_get_cache = AsyncMock(
	return_value=None
	)

	await hanging_request_checker.send_alerts_for_hanging_requests()

	# Should not crash and should not send any alerts
	hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()