""" Docker Executor — Runs commands safely inside an isolated Docker container. Provides a sandbox for executing agent-generated shell commands with timeout enforcement and safety checking. """ from __future__ import annotations import time import subprocess from dataclasses import dataclass, field from typing import Optional, List from executor.safety import CommandSafetyChecker, SafetyCheckResult @dataclass class ExecutionResult: """Result from executing a command in the Docker sandbox. Attributes: stdout: Standard output from the command. stderr: Standard error from the command. exit_code: Process exit code (0 = success). timed_out: Whether the command exceeded the timeout. blocked: Whether the command was blocked by safety checks. block_reason: Reason the command was blocked, if applicable. execution_time: Time taken to execute in seconds. """ stdout: str = "" stderr: str = "" exit_code: int = -1 timed_out: bool = False blocked: bool = False block_reason: str = "" execution_time: float = 0.0 class DockerExecutor: """Executes shell commands inside a Docker container sandbox. Each episode gets a fresh container. Commands are safety-checked before execution and subject to a configurable timeout. Usage: executor = DockerExecutor(image="devops-sandbox:latest") executor.start_container() result = executor.execute("pip install flask") executor.stop_container() """ def __init__( self, image: str = "devops-sandbox:latest", timeout: int = 30, container_name_prefix: str = "devops-sandbox", use_local_fallback: bool = True, ) -> None: """Initialize the Docker executor. Args: image: Docker image to use for the sandbox. timeout: Maximum seconds per command execution. container_name_prefix: Prefix for container names. use_local_fallback: If True, fall back to local subprocess when Docker is not available (for development/testing). """ self.image = image self.timeout = timeout self.container_name_prefix = container_name_prefix self.use_local_fallback = use_local_fallback self.safety_checker = CommandSafetyChecker() self._container_id: Optional[str] = None self._docker_available: Optional[bool] = None self._env_vars: dict = {} @property def docker_available(self) -> bool: """Check if Docker is available on the host.""" if self._docker_available is None: try: result = subprocess.run( ["docker", "info"], capture_output=True, timeout=5, ) self._docker_available = result.returncode == 0 except (FileNotFoundError, subprocess.TimeoutExpired): self._docker_available = False return self._docker_available def start_container(self, scenario_setup_commands: List[str] | None = None) -> str: """Start a fresh Docker container for an episode. Args: scenario_setup_commands: Commands to run to set up the broken state. Returns: Container ID or 'local-fallback' if using local mode. """ self._env_vars = {} if self.docker_available: name = f"{self.container_name_prefix}-{int(time.time())}" result = subprocess.run( ["docker", "run", "-d", "--name", name, "--memory=512m", "--cpus=1", self.image, "sleep", "3600"], capture_output=True, text=True, timeout=10, ) if result.returncode != 0: if self.use_local_fallback: self._container_id = "local-fallback" self._run_setup_commands(scenario_setup_commands) return self._container_id raise RuntimeError(f"Failed to start container: {result.stderr}") self._container_id = result.stdout.strip() # Run setup commands if scenario_setup_commands: for cmd in scenario_setup_commands: subprocess.run( ["docker", "exec", self._container_id, "bash", "-c", cmd], capture_output=True, text=True, timeout=60, ) return self._container_id else: self._container_id = "local-fallback" self._run_setup_commands(scenario_setup_commands) return self._container_id def _run_setup_commands(self, commands: List[str] | None) -> None: """Run setup commands in local fallback mode.""" if not commands: return for cmd in commands: try: subprocess.run( ["bash", "-c", cmd], capture_output=True, text=True, timeout=60, cwd="/tmp", ) except (subprocess.TimeoutExpired, Exception): pass def execute(self, command: str) -> ExecutionResult: """Execute a command in the sandbox. Args: command: Shell command to execute. Returns: ExecutionResult with stdout, stderr, exit_code, etc. """ # Safety check first safety = self.safety_checker.check(command) if not safety.is_safe: return ExecutionResult( stdout="", stderr=f"BLOCKED: {safety.reason}", exit_code=-1, blocked=True, block_reason=safety.reason, ) # Track env var exports for local fallback if command.strip().startswith("export "): parts = command.strip()[7:].split("=", 1) if len(parts) == 2: self._env_vars[parts[0]] = parts[1] start_time = time.time() try: if self._container_id and self._container_id != "local-fallback": return self._execute_docker(command, start_time) else: return self._execute_local(command, start_time) except subprocess.TimeoutExpired: return ExecutionResult( stdout="", stderr="Command timed out", exit_code=-1, timed_out=True, execution_time=self.timeout, ) except Exception as e: return ExecutionResult( stdout="", stderr=str(e), exit_code=-1, execution_time=time.time() - start_time, ) def _execute_docker(self, command: str, start_time: float) -> ExecutionResult: """Execute command in Docker container.""" # Inject tracked environment variables env_exports = "" for k, v in self._env_vars.items(): env_exports += f"export {k}='{v}'; " full_command = env_exports + command result = subprocess.run( ["docker", "exec", self._container_id, "bash", "-c", full_command], capture_output=True, text=True, timeout=self.timeout, ) return ExecutionResult( stdout=result.stdout[:5000], stderr=result.stderr[:5000], exit_code=result.returncode, execution_time=time.time() - start_time, ) def _execute_local(self, command: str, start_time: float) -> ExecutionResult: """Execute command locally (fallback for development).""" import os env = os.environ.copy() env.update(self._env_vars) # Handle PEP 668 in local fallback if "pip install" in command and "--break-system-packages" not in command: command = command.replace("pip install", "pip install --break-system-packages") elif "pip3 install" in command and "--break-system-packages" not in command: command = command.replace("pip3 install", "pip3 install --break-system-packages") result = subprocess.run( ["bash", "-c", command], capture_output=True, text=True, timeout=self.timeout, cwd="/tmp", env=env, ) return ExecutionResult( stdout=result.stdout[:5000], stderr=result.stderr[:5000], exit_code=result.returncode, execution_time=time.time() - start_time, ) def stop_container(self) -> None: """Stop and remove the current container.""" if self._container_id and self._container_id != "local-fallback": try: subprocess.run( ["docker", "rm", "-f", self._container_id], capture_output=True, timeout=10, ) except Exception: pass self._container_id = None self._env_vars = {} def __del__(self) -> None: """Cleanup on garbage collection.""" self.stop_container()