Spaces:
Sleeping
Sleeping
| """ | |
| Docker Executor — Runs commands safely inside an isolated Docker container. | |
| Provides a sandbox for executing agent-generated shell commands with | |
| timeout enforcement and safety checking. | |
| """ | |
| from __future__ import annotations | |
| import time | |
| import subprocess | |
| from dataclasses import dataclass, field | |
| from typing import Optional, List | |
| from executor.safety import CommandSafetyChecker, SafetyCheckResult | |
| class ExecutionResult: | |
| """Result from executing a command in the Docker sandbox. | |
| Attributes: | |
| stdout: Standard output from the command. | |
| stderr: Standard error from the command. | |
| exit_code: Process exit code (0 = success). | |
| timed_out: Whether the command exceeded the timeout. | |
| blocked: Whether the command was blocked by safety checks. | |
| block_reason: Reason the command was blocked, if applicable. | |
| execution_time: Time taken to execute in seconds. | |
| """ | |
| stdout: str = "" | |
| stderr: str = "" | |
| exit_code: int = -1 | |
| timed_out: bool = False | |
| blocked: bool = False | |
| block_reason: str = "" | |
| execution_time: float = 0.0 | |
| class DockerExecutor: | |
| """Executes shell commands inside a Docker container sandbox. | |
| Each episode gets a fresh container. Commands are safety-checked | |
| before execution and subject to a configurable timeout. | |
| Usage: | |
| executor = DockerExecutor(image="devops-sandbox:latest") | |
| executor.start_container() | |
| result = executor.execute("pip install flask") | |
| executor.stop_container() | |
| """ | |
| def __init__( | |
| self, | |
| image: str = "devops-sandbox:latest", | |
| timeout: int = 30, | |
| container_name_prefix: str = "devops-sandbox", | |
| use_local_fallback: bool = True, | |
| ) -> None: | |
| """Initialize the Docker executor. | |
| Args: | |
| image: Docker image to use for the sandbox. | |
| timeout: Maximum seconds per command execution. | |
| container_name_prefix: Prefix for container names. | |
| use_local_fallback: If True, fall back to local subprocess | |
| when Docker is not available (for development/testing). | |
| """ | |
| self.image = image | |
| self.timeout = timeout | |
| self.container_name_prefix = container_name_prefix | |
| self.use_local_fallback = use_local_fallback | |
| self.safety_checker = CommandSafetyChecker() | |
| self._container_id: Optional[str] = None | |
| self._docker_available: Optional[bool] = None | |
| self._env_vars: dict = {} | |
| def docker_available(self) -> bool: | |
| """Check if Docker is available on the host.""" | |
| if self._docker_available is None: | |
| try: | |
| result = subprocess.run( | |
| ["docker", "info"], | |
| capture_output=True, timeout=5, | |
| ) | |
| self._docker_available = result.returncode == 0 | |
| except (FileNotFoundError, subprocess.TimeoutExpired): | |
| self._docker_available = False | |
| return self._docker_available | |
| def start_container(self, scenario_setup_commands: List[str] | None = None) -> str: | |
| """Start a fresh Docker container for an episode. | |
| Args: | |
| scenario_setup_commands: Commands to run to set up the broken state. | |
| Returns: | |
| Container ID or 'local-fallback' if using local mode. | |
| """ | |
| self._env_vars = {} | |
| if self.docker_available: | |
| name = f"{self.container_name_prefix}-{int(time.time())}" | |
| result = subprocess.run( | |
| ["docker", "run", "-d", "--name", name, | |
| "--memory=512m", "--cpus=1", | |
| self.image, "sleep", "3600"], | |
| capture_output=True, text=True, timeout=10, | |
| ) | |
| if result.returncode != 0: | |
| if self.use_local_fallback: | |
| self._container_id = "local-fallback" | |
| self._run_setup_commands(scenario_setup_commands) | |
| return self._container_id | |
| raise RuntimeError(f"Failed to start container: {result.stderr}") | |
| self._container_id = result.stdout.strip() | |
| # Run setup commands | |
| if scenario_setup_commands: | |
| for cmd in scenario_setup_commands: | |
| subprocess.run( | |
| ["docker", "exec", self._container_id, "bash", "-c", cmd], | |
| capture_output=True, text=True, timeout=60, | |
| ) | |
| return self._container_id | |
| else: | |
| self._container_id = "local-fallback" | |
| self._run_setup_commands(scenario_setup_commands) | |
| return self._container_id | |
| def _run_setup_commands(self, commands: List[str] | None) -> None: | |
| """Run setup commands in local fallback mode.""" | |
| if not commands: | |
| return | |
| for cmd in commands: | |
| try: | |
| subprocess.run( | |
| ["bash", "-c", cmd], | |
| capture_output=True, text=True, timeout=60, | |
| cwd="/tmp", | |
| ) | |
| except (subprocess.TimeoutExpired, Exception): | |
| pass | |
| def execute(self, command: str) -> ExecutionResult: | |
| """Execute a command in the sandbox. | |
| Args: | |
| command: Shell command to execute. | |
| Returns: | |
| ExecutionResult with stdout, stderr, exit_code, etc. | |
| """ | |
| # Safety check first | |
| safety = self.safety_checker.check(command) | |
| if not safety.is_safe: | |
| return ExecutionResult( | |
| stdout="", | |
| stderr=f"BLOCKED: {safety.reason}", | |
| exit_code=-1, | |
| blocked=True, | |
| block_reason=safety.reason, | |
| ) | |
| # Track env var exports for local fallback | |
| if command.strip().startswith("export "): | |
| parts = command.strip()[7:].split("=", 1) | |
| if len(parts) == 2: | |
| self._env_vars[parts[0]] = parts[1] | |
| start_time = time.time() | |
| try: | |
| if self._container_id and self._container_id != "local-fallback": | |
| return self._execute_docker(command, start_time) | |
| else: | |
| return self._execute_local(command, start_time) | |
| except subprocess.TimeoutExpired: | |
| return ExecutionResult( | |
| stdout="", | |
| stderr="Command timed out", | |
| exit_code=-1, | |
| timed_out=True, | |
| execution_time=self.timeout, | |
| ) | |
| except Exception as e: | |
| return ExecutionResult( | |
| stdout="", | |
| stderr=str(e), | |
| exit_code=-1, | |
| execution_time=time.time() - start_time, | |
| ) | |
| def _execute_docker(self, command: str, start_time: float) -> ExecutionResult: | |
| """Execute command in Docker container.""" | |
| # Inject tracked environment variables | |
| env_exports = "" | |
| for k, v in self._env_vars.items(): | |
| env_exports += f"export {k}='{v}'; " | |
| full_command = env_exports + command | |
| result = subprocess.run( | |
| ["docker", "exec", self._container_id, "bash", "-c", full_command], | |
| capture_output=True, text=True, timeout=self.timeout, | |
| ) | |
| return ExecutionResult( | |
| stdout=result.stdout[:5000], | |
| stderr=result.stderr[:5000], | |
| exit_code=result.returncode, | |
| execution_time=time.time() - start_time, | |
| ) | |
| def _execute_local(self, command: str, start_time: float) -> ExecutionResult: | |
| """Execute command locally (fallback for development).""" | |
| import os | |
| env = os.environ.copy() | |
| env.update(self._env_vars) | |
| # Handle PEP 668 in local fallback | |
| if "pip install" in command and "--break-system-packages" not in command: | |
| command = command.replace("pip install", "pip install --break-system-packages") | |
| elif "pip3 install" in command and "--break-system-packages" not in command: | |
| command = command.replace("pip3 install", "pip3 install --break-system-packages") | |
| result = subprocess.run( | |
| ["bash", "-c", command], | |
| capture_output=True, text=True, | |
| timeout=self.timeout, cwd="/tmp", env=env, | |
| ) | |
| return ExecutionResult( | |
| stdout=result.stdout[:5000], | |
| stderr=result.stderr[:5000], | |
| exit_code=result.returncode, | |
| execution_time=time.time() - start_time, | |
| ) | |
| def stop_container(self) -> None: | |
| """Stop and remove the current container.""" | |
| if self._container_id and self._container_id != "local-fallback": | |
| try: | |
| subprocess.run( | |
| ["docker", "rm", "-f", self._container_id], | |
| capture_output=True, timeout=10, | |
| ) | |
| except Exception: | |
| pass | |
| self._container_id = None | |
| self._env_vars = {} | |
| def __del__(self) -> None: | |
| """Cleanup on garbage collection.""" | |
| self.stop_container() | |