|
import copy
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
from litellm.exceptions import (
|
|
APIConnectionError,
|
|
InternalServerError,
|
|
RateLimitError,
|
|
ServiceUnavailableError,
|
|
)
|
|
|
|
from openhands.core.config import LLMConfig
|
|
from openhands.core.exceptions import OperationCancelled
|
|
from openhands.core.message import Message, TextContent
|
|
from openhands.llm.llm import LLM
|
|
from openhands.llm.metrics import Metrics
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_logger(monkeypatch):
|
|
|
|
mock_logger = MagicMock()
|
|
monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger)
|
|
monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger)
|
|
monkeypatch.setattr('openhands.llm.llm.logger', mock_logger)
|
|
return mock_logger
|
|
|
|
|
|
@pytest.fixture
|
|
def default_config():
|
|
return LLMConfig(
|
|
model='gpt-4o',
|
|
api_key='test_key',
|
|
num_retries=2,
|
|
retry_min_wait=1,
|
|
retry_max_wait=2,
|
|
)
|
|
|
|
|
|
def test_llm_init_with_default_config(default_config):
|
|
llm = LLM(default_config)
|
|
assert llm.config.model == 'gpt-4o'
|
|
assert llm.config.api_key.get_secret_value() == 'test_key'
|
|
assert isinstance(llm.metrics, Metrics)
|
|
assert llm.metrics.model_name == 'gpt-4o'
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.get_model_info')
|
|
def test_llm_init_with_model_info(mock_get_model_info, default_config):
|
|
mock_get_model_info.return_value = {
|
|
'max_input_tokens': 8000,
|
|
'max_output_tokens': 2000,
|
|
}
|
|
llm = LLM(default_config)
|
|
llm.init_model_info()
|
|
assert llm.config.max_input_tokens == 8000
|
|
assert llm.config.max_output_tokens == 2000
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.get_model_info')
|
|
def test_llm_init_without_model_info(mock_get_model_info, default_config):
|
|
mock_get_model_info.side_effect = Exception('Model info not available')
|
|
llm = LLM(default_config)
|
|
llm.init_model_info()
|
|
assert llm.config.max_input_tokens == 4096
|
|
assert llm.config.max_output_tokens == 4096
|
|
|
|
|
|
def test_llm_init_with_custom_config():
|
|
custom_config = LLMConfig(
|
|
model='custom-model',
|
|
api_key='custom_key',
|
|
max_input_tokens=5000,
|
|
max_output_tokens=1500,
|
|
temperature=0.8,
|
|
top_p=0.9,
|
|
)
|
|
llm = LLM(custom_config)
|
|
assert llm.config.model == 'custom-model'
|
|
assert llm.config.api_key.get_secret_value() == 'custom_key'
|
|
assert llm.config.max_input_tokens == 5000
|
|
assert llm.config.max_output_tokens == 1500
|
|
assert llm.config.temperature == 0.8
|
|
assert llm.config.top_p == 0.9
|
|
|
|
|
|
def test_llm_init_with_metrics():
|
|
config = LLMConfig(model='gpt-4o', api_key='test_key')
|
|
metrics = Metrics()
|
|
llm = LLM(config, metrics=metrics)
|
|
assert llm.metrics is metrics
|
|
assert (
|
|
llm.metrics.model_name == 'default'
|
|
)
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
@patch('time.time')
|
|
def test_response_latency_tracking(mock_time, mock_litellm_completion):
|
|
|
|
mock_time.side_effect = [1000.0, 1002.5]
|
|
|
|
|
|
mock_response = {
|
|
'id': 'test-response-123',
|
|
'choices': [{'message': {'content': 'Test response'}}],
|
|
}
|
|
mock_litellm_completion.return_value = mock_response
|
|
|
|
|
|
config = LLMConfig(model='gpt-4o', api_key='test_key')
|
|
llm = LLM(config)
|
|
response = llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
|
|
|
|
|
|
assert len(llm.metrics.response_latencies) == 1
|
|
latency_record = llm.metrics.response_latencies[0]
|
|
assert latency_record.model == 'gpt-4o'
|
|
assert (
|
|
latency_record.latency == 2.5
|
|
)
|
|
assert latency_record.response_id == 'test-response-123'
|
|
|
|
|
|
assert response['id'] == 'test-response-123'
|
|
assert response['choices'][0]['message']['content'] == 'Test response'
|
|
|
|
|
|
mock_time.side_effect = [1000.0, 999.0]
|
|
llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
|
|
|
|
|
|
assert len(llm.metrics.response_latencies) == 2
|
|
latency_record = llm.metrics.response_latencies[-1]
|
|
assert latency_record.latency == 0.0
|
|
|
|
|
|
def test_llm_reset():
|
|
llm = LLM(LLMConfig(model='gpt-4o-mini', api_key='test_key'))
|
|
initial_metrics = copy.deepcopy(llm.metrics)
|
|
initial_metrics.add_cost(1.0)
|
|
initial_metrics.add_response_latency(0.5, 'test-id')
|
|
llm.reset()
|
|
assert llm.metrics.accumulated_cost != initial_metrics.accumulated_cost
|
|
assert llm.metrics.costs != initial_metrics.costs
|
|
assert llm.metrics.response_latencies != initial_metrics.response_latencies
|
|
assert isinstance(llm.metrics, Metrics)
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.get_model_info')
|
|
def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
|
default_config.model = 'openrouter:gpt-4o-mini'
|
|
mock_get_model_info.return_value = {
|
|
'max_input_tokens': 7000,
|
|
'max_output_tokens': 1500,
|
|
}
|
|
llm = LLM(default_config)
|
|
llm.init_model_info()
|
|
assert llm.config.max_input_tokens == 7000
|
|
assert llm.config.max_output_tokens == 1500
|
|
mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
|
|
|
|
|
|
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_with_mocked_logger(
|
|
mock_litellm_completion, default_config, mock_logger
|
|
):
|
|
mock_litellm_completion.return_value = {
|
|
'choices': [{'message': {'content': 'Test response'}}]
|
|
}
|
|
|
|
llm = LLM(config=default_config)
|
|
response = llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
|
|
assert response['choices'][0]['message']['content'] == 'Test response'
|
|
assert mock_litellm_completion.call_count == 1
|
|
|
|
mock_logger.debug.assert_called()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'exception_class,extra_args,expected_retries',
|
|
[
|
|
(
|
|
APIConnectionError,
|
|
{'llm_provider': 'test_provider', 'model': 'test_model'},
|
|
2,
|
|
),
|
|
(
|
|
InternalServerError,
|
|
{'llm_provider': 'test_provider', 'model': 'test_model'},
|
|
2,
|
|
),
|
|
(
|
|
ServiceUnavailableError,
|
|
{'llm_provider': 'test_provider', 'model': 'test_model'},
|
|
2,
|
|
),
|
|
(RateLimitError, {'llm_provider': 'test_provider', 'model': 'test_model'}, 2),
|
|
],
|
|
)
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_retries(
|
|
mock_litellm_completion,
|
|
default_config,
|
|
exception_class,
|
|
extra_args,
|
|
expected_retries,
|
|
):
|
|
mock_litellm_completion.side_effect = [
|
|
exception_class('Test error message', **extra_args),
|
|
{'choices': [{'message': {'content': 'Retry successful'}}]},
|
|
]
|
|
|
|
llm = LLM(config=default_config)
|
|
response = llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
|
|
assert response['choices'][0]['message']['content'] == 'Retry successful'
|
|
assert mock_litellm_completion.call_count == expected_retries
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_rate_limit_wait_time(mock_litellm_completion, default_config):
|
|
with patch('time.sleep') as mock_sleep:
|
|
mock_litellm_completion.side_effect = [
|
|
RateLimitError(
|
|
'Rate limit exceeded', llm_provider='test_provider', model='test_model'
|
|
),
|
|
{'choices': [{'message': {'content': 'Retry successful'}}]},
|
|
]
|
|
|
|
llm = LLM(config=default_config)
|
|
response = llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
|
|
assert response['choices'][0]['message']['content'] == 'Retry successful'
|
|
assert mock_litellm_completion.call_count == 2
|
|
|
|
mock_sleep.assert_called_once()
|
|
wait_time = mock_sleep.call_args[0][0]
|
|
assert (
|
|
default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
|
|
), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_exhausts_retries(mock_litellm_completion, default_config):
|
|
mock_litellm_completion.side_effect = APIConnectionError(
|
|
'Persistent error', llm_provider='test_provider', model='test_model'
|
|
)
|
|
|
|
llm = LLM(config=default_config)
|
|
with pytest.raises(APIConnectionError):
|
|
llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
|
|
assert mock_litellm_completion.call_count == llm.config.num_retries
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_operation_cancelled(mock_litellm_completion, default_config):
|
|
mock_litellm_completion.side_effect = OperationCancelled('Operation cancelled')
|
|
|
|
llm = LLM(config=default_config)
|
|
with pytest.raises(OperationCancelled):
|
|
llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
|
|
assert mock_litellm_completion.call_count == 1
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_keyboard_interrupt(mock_litellm_completion, default_config):
|
|
def side_effect(*args, **kwargs):
|
|
raise KeyboardInterrupt('Simulated KeyboardInterrupt')
|
|
|
|
mock_litellm_completion.side_effect = side_effect
|
|
|
|
llm = LLM(config=default_config)
|
|
with pytest.raises(OperationCancelled):
|
|
try:
|
|
llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
except KeyboardInterrupt:
|
|
raise OperationCancelled('Operation cancelled due to KeyboardInterrupt')
|
|
|
|
assert mock_litellm_completion.call_count == 1
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_keyboard_interrupt_handler(mock_litellm_completion, default_config):
|
|
global _should_exit
|
|
|
|
def side_effect(*args, **kwargs):
|
|
global _should_exit
|
|
_should_exit = True
|
|
return {'choices': [{'message': {'content': 'Simulated interrupt response'}}]}
|
|
|
|
mock_litellm_completion.side_effect = side_effect
|
|
|
|
llm = LLM(config=default_config)
|
|
result = llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
)
|
|
|
|
assert mock_litellm_completion.call_count == 1
|
|
assert result['choices'][0]['message']['content'] == 'Simulated interrupt response'
|
|
assert _should_exit
|
|
|
|
_should_exit = False
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_with_litellm_mock(mock_litellm_completion, default_config):
|
|
mock_response = {
|
|
'choices': [{'message': {'content': 'This is a mocked response.'}}]
|
|
}
|
|
mock_litellm_completion.return_value = mock_response
|
|
|
|
test_llm = LLM(config=default_config)
|
|
response = test_llm.completion(
|
|
messages=[{'role': 'user', 'content': 'Hello!'}],
|
|
stream=False,
|
|
drop_params=True,
|
|
)
|
|
|
|
|
|
assert response['choices'][0]['message']['content'] == 'This is a mocked response.'
|
|
mock_litellm_completion.assert_called_once()
|
|
|
|
|
|
call_args = mock_litellm_completion.call_args[1]
|
|
assert call_args['model'] == default_config.model
|
|
assert call_args['messages'] == [{'role': 'user', 'content': 'Hello!'}]
|
|
assert not call_args['stream']
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_completion_with_two_positional_args(mock_litellm_completion, default_config):
|
|
mock_response = {
|
|
'choices': [{'message': {'content': 'Response to positional args.'}}]
|
|
}
|
|
mock_litellm_completion.return_value = mock_response
|
|
|
|
test_llm = LLM(config=default_config)
|
|
response = test_llm.completion(
|
|
'some-model-to-be-ignored',
|
|
[{'role': 'user', 'content': 'Hello from positional args!'}],
|
|
stream=False,
|
|
)
|
|
|
|
|
|
assert (
|
|
response['choices'][0]['message']['content'] == 'Response to positional args.'
|
|
)
|
|
mock_litellm_completion.assert_called_once()
|
|
|
|
|
|
call_args, call_kwargs = mock_litellm_completion.call_args
|
|
assert (
|
|
call_kwargs['model'] == default_config.model
|
|
)
|
|
assert call_kwargs['messages'] == [
|
|
{'role': 'user', 'content': 'Hello from positional args!'}
|
|
]
|
|
assert not call_kwargs['stream']
|
|
|
|
|
|
assert (
|
|
len(call_args) == 0
|
|
)
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm_completion')
|
|
def test_llm_cloudflare_blockage(mock_litellm_completion, default_config):
|
|
from litellm.exceptions import APIError
|
|
|
|
from openhands.core.exceptions import CloudFlareBlockageError
|
|
|
|
llm = LLM(default_config)
|
|
mock_litellm_completion.side_effect = APIError(
|
|
message='Attention Required! | Cloudflare',
|
|
llm_provider='test_provider',
|
|
model='test_model',
|
|
status_code=403,
|
|
)
|
|
|
|
with pytest.raises(CloudFlareBlockageError, match='Request blocked by CloudFlare'):
|
|
llm.completion(messages=[{'role': 'user', 'content': 'Hello'}])
|
|
|
|
|
|
mock_litellm_completion.assert_called_once()
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.token_counter')
|
|
def test_get_token_count_with_dict_messages(mock_token_counter, default_config):
|
|
mock_token_counter.return_value = 42
|
|
llm = LLM(default_config)
|
|
messages = [{'role': 'user', 'content': 'Hello!'}]
|
|
|
|
token_count = llm.get_token_count(messages)
|
|
|
|
assert token_count == 42
|
|
mock_token_counter.assert_called_once_with(
|
|
model=default_config.model, messages=messages, custom_tokenizer=None
|
|
)
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.token_counter')
|
|
def test_get_token_count_with_message_objects(
|
|
mock_token_counter, default_config, mock_logger
|
|
):
|
|
llm = LLM(default_config)
|
|
|
|
|
|
message_obj = Message(role='user', content=[TextContent(text='Hello!')])
|
|
message_dict = {'role': 'user', 'content': 'Hello!'}
|
|
|
|
|
|
mock_token_counter.side_effect = [42, 42]
|
|
|
|
|
|
token_count_obj = llm.get_token_count([message_obj])
|
|
token_count_dict = llm.get_token_count([message_dict])
|
|
|
|
|
|
assert token_count_obj == token_count_dict
|
|
assert mock_token_counter.call_count == 2
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.token_counter')
|
|
@patch('openhands.llm.llm.create_pretrained_tokenizer')
|
|
def test_get_token_count_with_custom_tokenizer(
|
|
mock_create_tokenizer, mock_token_counter, default_config
|
|
):
|
|
mock_tokenizer = MagicMock()
|
|
mock_create_tokenizer.return_value = mock_tokenizer
|
|
mock_token_counter.return_value = 42
|
|
|
|
config = copy.deepcopy(default_config)
|
|
config.custom_tokenizer = 'custom/tokenizer'
|
|
llm = LLM(config)
|
|
messages = [{'role': 'user', 'content': 'Hello!'}]
|
|
|
|
token_count = llm.get_token_count(messages)
|
|
|
|
assert token_count == 42
|
|
mock_create_tokenizer.assert_called_once_with('custom/tokenizer')
|
|
mock_token_counter.assert_called_once_with(
|
|
model=config.model, messages=messages, custom_tokenizer=mock_tokenizer
|
|
)
|
|
|
|
|
|
@patch('openhands.llm.llm.litellm.token_counter')
|
|
def test_get_token_count_error_handling(
|
|
mock_token_counter, default_config, mock_logger
|
|
):
|
|
mock_token_counter.side_effect = Exception('Token counting failed')
|
|
llm = LLM(default_config)
|
|
messages = [{'role': 'user', 'content': 'Hello!'}]
|
|
|
|
token_count = llm.get_token_count(messages)
|
|
|
|
assert token_count == 0
|
|
mock_token_counter.assert_called_once()
|
|
mock_logger.error.assert_called_once_with(
|
|
'Error getting token count for\n model gpt-4o\nToken counting failed'
|
|
)
|
|
|