File size: 12,625 Bytes
9c31777 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
import io
from textwrap import dedent
from unittest.mock import MagicMock, patch
import docker
import PIL.Image
import pytest
from rich.console import Console
from smolagents.default_tools import FinalAnswerTool, WikipediaSearchTool
from smolagents.monitoring import AgentLogger, LogLevel
from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor
from smolagents.utils import AgentError
from .utils.markers import require_run_all
class TestRemotePythonExecutor:
def test_send_tools_empty_tools(self):
executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
executor.run_code_raise_errors = MagicMock()
executor.send_tools({})
assert executor.run_code_raise_errors.call_count == 1
# No new packages should be installed
assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0]
@require_run_all
def test_send_tools_with_default_wikipedia_search_tool(self):
tool = WikipediaSearchTool()
executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
executor.run_code_raise_errors = MagicMock()
executor.run_code_raise_errors.return_value = (None, "", False)
executor.send_tools({"wikipedia_search": tool})
assert executor.run_code_raise_errors.call_count == 2
assert "!pip install wikipedia-api" == executor.run_code_raise_errors.call_args_list[0].args[0]
assert "class WikipediaSearchTool(Tool)" in executor.run_code_raise_errors.call_args_list[1].args[0]
class TestE2BExecutorUnit:
def test_e2b_executor_instantiation(self):
logger = MagicMock()
with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
mock_sandbox.return_value.commands.run.return_value.error = None
mock_sandbox.return_value.run_code.return_value.error = None
executor = E2BExecutor(
additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
)
assert isinstance(executor, E2BExecutor)
assert executor.logger == logger
assert executor.sandbox == mock_sandbox.return_value
assert mock_sandbox.call_count == 1
assert mock_sandbox.call_args.kwargs == {
"api_key": "dummy-api-key",
"template": "dummy-template-id",
"timeout": 60,
}
def test_cleanup(self):
"""Test that the cleanup method properly shuts down the sandbox"""
logger = MagicMock()
with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
# Setup mock
mock_sandbox.return_value.kill = MagicMock()
# Create executor
executor = E2BExecutor(additional_imports=[], logger=logger, api_key="dummy-api-key")
# Call cleanup
executor.cleanup()
# Verify sandbox was killed
mock_sandbox.return_value.kill.assert_called_once()
assert logger.log.call_count >= 2 # Should log start and completion messages
@pytest.fixture
def e2b_executor():
executor = E2BExecutor(
additional_imports=["pillow", "numpy"],
logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
)
yield executor
executor.cleanup()
@require_run_all
class TestE2BExecutorIntegration:
@pytest.fixture(autouse=True)
def set_executor(self, e2b_executor):
self.executor = e2b_executor
@pytest.mark.parametrize(
"code_action, expected_result",
[
(
dedent('''
final_answer("""This is
a multiline
final answer""")
'''),
"This is\na multiline\nfinal answer",
),
(
dedent("""
text = '''Text containing
final_answer(5)
'''
final_answer(text)
"""),
"Text containing\nfinal_answer(5)\n",
),
(
dedent("""
num = 2
if num == 1:
final_answer("One")
elif num == 2:
final_answer("Two")
"""),
"Two",
),
],
)
def test_final_answer_patterns(self, code_action, expected_result):
self.executor.send_tools({"final_answer": FinalAnswerTool()})
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == expected_result
def test_custom_final_answer(self):
class CustomFinalAnswerTool(FinalAnswerTool):
def forward(self, answer: str) -> str:
return "CUSTOM" + answer
self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
code_action = dedent("""
final_answer(answer="_answer")
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "CUSTOM_answer"
def test_custom_final_answer_with_custom_inputs(self):
class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
inputs = {
"answer1": {"type": "string", "description": "First part of the answer."},
"answer2": {"type": "string", "description": "Second part of the answer."},
}
def forward(self, answer1: str, answer2: str) -> str:
return answer1 + "CUSTOM" + answer2
self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
code_action = dedent("""
final_answer(
answer1="answer1_",
answer2="_answer2"
)
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "answer1_CUSTOM_answer2"
@pytest.fixture
def docker_executor():
executor = DockerExecutor(
additional_imports=["pillow", "numpy"],
logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
)
yield executor
executor.delete()
@require_run_all
class TestDockerExecutorIntegration:
@pytest.fixture(autouse=True)
def set_executor(self, docker_executor):
self.executor = docker_executor
def test_initialization(self):
"""Check if DockerExecutor initializes without errors"""
assert self.executor.container is not None, "Container should be initialized"
def test_state_persistence(self):
"""Test that variables and imports form one snippet persist in the next"""
code_action = "import numpy as np; a = 2"
self.executor(code_action)
code_action = "print(np.sqrt(a))"
result, logs, final_answer = self.executor(code_action)
assert "1.41421" in logs
def test_execute_output(self):
"""Test execution that returns a string"""
code_action = 'final_answer("This is the final answer")'
result, logs, final_answer = self.executor(code_action)
assert result == "This is the final answer", "Result should be 'This is the final answer'"
def test_execute_multiline_output(self):
"""Test execution that returns a string"""
code_action = 'result = "This is the final answer"\nfinal_answer(result)'
result, logs, final_answer = self.executor(code_action)
assert result == "This is the final answer", "Result should be 'This is the final answer'"
def test_execute_image_output(self):
"""Test execution that returns a base64 image"""
code_action = dedent("""
import base64
from PIL import Image
from io import BytesIO
image = Image.new("RGB", (10, 10), (255, 0, 0))
final_answer(image)
""")
result, logs, final_answer = self.executor(code_action)
assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"
def test_syntax_error_handling(self):
"""Test handling of syntax errors"""
code_action = 'print("Missing Parenthesis' # Syntax error
with pytest.raises(AgentError) as exception_info:
self.executor(code_action)
assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"
def test_cleanup_on_deletion(self):
"""Test if Docker container stops and removes on deletion"""
container_id = self.executor.container.id
self.executor.delete() # Trigger cleanup
client = docker.from_env()
containers = [c.id for c in client.containers.list(all=True)]
assert container_id not in containers, "Container should be removed"
@pytest.mark.parametrize(
"code_action, expected_result",
[
(
dedent('''
final_answer("""This is
a multiline
final answer""")
'''),
"This is\na multiline\nfinal answer",
),
(
dedent("""
text = '''Text containing
final_answer(5)
'''
final_answer(text)
"""),
"Text containing\nfinal_answer(5)\n",
),
(
dedent("""
num = 2
if num == 1:
final_answer("One")
elif num == 2:
final_answer("Two")
"""),
"Two",
),
],
)
def test_final_answer_patterns(self, code_action, expected_result):
self.executor.send_tools({"final_answer": FinalAnswerTool()})
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == expected_result
def test_custom_final_answer(self):
class CustomFinalAnswerTool(FinalAnswerTool):
def forward(self, answer: str) -> str:
return "CUSTOM" + answer
self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
code_action = dedent("""
final_answer(answer="_answer")
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "CUSTOM_answer"
def test_custom_final_answer_with_custom_inputs(self):
class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
inputs = {
"answer1": {"type": "string", "description": "First part of the answer."},
"answer2": {"type": "string", "description": "Second part of the answer."},
}
def forward(self, answer1: str, answer2: str) -> str:
return answer1 + "CUSTOM" + answer2
self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
code_action = dedent("""
final_answer(
answer1="answer1_",
answer2="_answer2"
)
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "answer1_CUSTOM_answer2"
class TestDockerExecutorUnit:
def test_cleanup(self):
"""Test that cleanup properly stops and removes the container"""
logger = MagicMock()
with (
patch("docker.from_env") as mock_docker_client,
patch("requests.post") as mock_post,
patch("websocket.create_connection"),
):
# Setup mocks
mock_container = MagicMock()
mock_container.status = "running"
mock_container.short_id = "test123"
mock_docker_client.return_value.containers.run.return_value = mock_container
mock_docker_client.return_value.images.get.return_value = MagicMock()
mock_post.return_value.status_code = 201
mock_post.return_value.json.return_value = {"id": "test-kernel-id"}
# Create executor
executor = DockerExecutor(additional_imports=[], logger=logger, build_new_image=False)
# Call cleanup
executor.cleanup()
# Verify container was stopped and removed
mock_container.stop.assert_called_once()
mock_container.remove.assert_called_once()
|