AdithyaSK HF Staff commited on
Commit
32df48d
·
verified ·
1 Parent(s): c60b0a7

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=coding_tools_env
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+ ENV ENABLE_WEB_INTERFACE=true
74
+
75
+ # Health check
76
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
77
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health', timeout=2).read()" || exit 1
78
+
79
+ # Run the FastAPI server
80
+ # The module path is constructed to work with the /app/env structure
81
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
README.md CHANGED
@@ -1,10 +1,72 @@
1
  ---
2
- title: Coding Tools Env
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
 
 
 
 
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Coding Tools Environment Server
3
+ emoji: 🧰
4
+ colorFrom: green
5
+ colorTo: gray
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
+ - coding
13
+ - tools
14
+ - e2b
15
+ short_description: SETA-style multi-tool coding environment backed by E2B
16
  ---
17
 
18
+ # Coding Tools Environment
19
+
20
+ `coding_tools_env` is an E2B-backed multi-tool coding environment with explicit
21
+ filesystem and shell tools.
22
+
23
+ Each episode creates a fresh E2B sandbox. Reset accepts setup and verify
24
+ commands. Verify commands are used by `submit_solution`.
25
+
26
+ ## Tools
27
+
28
+ - `bash(command, timeout=30)`
29
+ - `read(file_path, offset=None, limit=None)`
30
+ - `write(file_path, content)`
31
+ - `edit(file_path, old_string, new_string, replace_all=False)`
32
+ - `multi_edit(file_path, edits)`
33
+ - `glob(pattern, path=None)`
34
+ - `grep(pattern, path=None, include=None)`
35
+ - `ls(path=".", ignore=None)`
36
+ - `todo_write(todos)`
37
+ - `submit_solution()`
38
+
39
+ ## Quick Start
40
+
41
+ ```python
42
+ from coding_tools_env import CodingToolsEnv
43
+
44
+ with CodingToolsEnv(base_url="http://localhost:8000").sync() as env:
45
+ env.reset(
46
+ setup=["mkdir -p /home/user/work"],
47
+ verify=["test -f /home/user/work/answer.txt"],
48
+ )
49
+ print(env.call_tool("write", file_path="/home/user/work/answer.txt", content="done\n"))
50
+ print(env.call_tool("submit_solution"))
51
+ ```
52
+
53
+ ## Local Server
54
+
55
+ ```bash
56
+ cd envs/coding_tools_env
57
+ E2B_API_KEY=e2b_... uv run --project . server
58
+ ```
59
+
60
+ ## Docker
61
+
62
+ ```bash
63
+ cd envs/coding_tools_env
64
+ openenv build -t coding-tools-env
65
+ docker run -p 8000:8000 -e E2B_API_KEY=e2b_... coding-tools-env
66
+ ```
67
+
68
+ ## Configuration
69
+
70
+ - `E2B_API_KEY`: required when resetting an episode.
71
+ - `MAX_CONCURRENT_ENVS`: max concurrent sessions (default `4`).
72
+
__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Coding Tools Environment for OpenEnv."""
8
+
9
+ from openenv.core.env_server.mcp_types import CallToolAction, ListToolsAction
10
+
11
+ from .client import CodingToolsEnv
12
+ from .models import CodingToolsState, CommandResult, EditSpec, TodoItem
13
+
14
+ __all__ = [
15
+ "CodingToolsEnv",
16
+ "CodingToolsState",
17
+ "CommandResult",
18
+ "TodoItem",
19
+ "EditSpec",
20
+ "CallToolAction",
21
+ "ListToolsAction",
22
+ ]
client.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Client for coding_tools_env."""
8
+
9
+ from openenv.core.mcp_client import MCPToolClient
10
+
11
+
12
+ class CodingToolsEnv(MCPToolClient):
13
+ """MCP client for coding_tools_env."""
14
+
15
+ pass
models.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Models for coding_tools_env."""
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+ from openenv.core.env_server.types import State
14
+ from pydantic import BaseModel, Field
15
+
16
+
17
+ class CommandResult(BaseModel):
18
+ """Normalized result for one tool execution."""
19
+
20
+ tool: str
21
+ ok: bool
22
+ output: str = ""
23
+ error: str | None = None
24
+ metadata: dict[str, Any] = Field(default_factory=dict)
25
+
26
+
27
+ class TodoItem(BaseModel):
28
+ """Todo item tracked by todo_write."""
29
+
30
+ id: str
31
+ content: str
32
+ status: str
33
+ priority: str
34
+
35
+
36
+ class CodingToolsState(State):
37
+ """Per-session state for coding_tools_env."""
38
+
39
+ sandbox_id: str | None = None
40
+ setup_results: list[CommandResult] = Field(default_factory=list)
41
+ verify_commands: list[str] = Field(default_factory=list)
42
+ verify_results: list[CommandResult] = Field(default_factory=list)
43
+ todos: list[TodoItem] = Field(default_factory=list)
44
+ tool_history: list[CommandResult] = Field(default_factory=list)
45
+ submitted: bool = False
46
+ last_reward: float | None = None
47
+ last_error: str | None = None
48
+
49
+
50
+ class EditSpec(BaseModel):
51
+ """One edit operation for multi_edit."""
52
+
53
+ old_string: str
54
+ new_string: str
55
+ replace_all: bool = False
openenv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: coding_tools_env
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
7
+
pyproject.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-coding_tools_env"
13
+ version = "0.1.0"
14
+ description = "SETA-style coding tools environment for OpenEnv backed by E2B"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ "openenv-core[core]>=0.2.2",
18
+ "e2b-code-interpreter>=1.0.0",
19
+ "fastapi>=0.115.0",
20
+ "fastmcp>=3.0.0",
21
+ "gradio>=4.0.0",
22
+ "pydantic>=2.0.0",
23
+ "requests>=2.31.0",
24
+ "uvicorn>=0.24.0",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ dev = [
29
+ "pytest>=8.0.0",
30
+ "pytest-cov>=4.0.0",
31
+ ]
32
+
33
+ [project.scripts]
34
+ server = "coding_tools_env.server.app:main"
35
+
36
+ [tool.setuptools]
37
+ include-package-data = true
38
+ packages = ["coding_tools_env", "coding_tools_env.server"]
39
+ package-dir = { "coding_tools_env" = ".", "coding_tools_env.server" = "server" }
40
+
41
+ [tool.setuptools.package-data]
42
+ coding_tools_env = ["**/*.txt", "**/*.yaml"]
server/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Coding Tools Env environment server components."""
8
+
9
+ from .coding_tools_env_environment import CodingToolsEnvironment
10
+
11
+ __all__ = ["CodingToolsEnvironment"]
server/app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """FastAPI app for coding_tools_env."""
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ from pathlib import Path
13
+
14
+ from openenv.core.env_server.http_server import create_app
15
+ from openenv.core.env_server.mcp_types import CallToolAction, CallToolObservation
16
+
17
+ try:
18
+ from .coding_tools_env_environment import CodingToolsEnvironment
19
+ from .gradio_ui import coding_tools_ui_builder
20
+ except ImportError: # pragma: no cover
21
+ from server.coding_tools_env_environment import CodingToolsEnvironment # type: ignore
22
+ from server.gradio_ui import coding_tools_ui_builder # type: ignore
23
+
24
+
25
+ def _load_env_file() -> None:
26
+ candidate = Path(__file__).resolve().parents[1] / ".env"
27
+ if not candidate.exists():
28
+ return
29
+ for raw in candidate.read_text(encoding="utf-8").splitlines():
30
+ line = raw.strip()
31
+ if not line or line.startswith("#") or "=" not in line:
32
+ continue
33
+ key, _, value = line.partition("=")
34
+ key = key.strip()
35
+ value = value.strip().strip('"').strip("'")
36
+ if key and key not in os.environ:
37
+ os.environ[key] = value
38
+
39
+
40
+ _load_env_file()
41
+ os.environ.setdefault("ENABLE_WEB_INTERFACE", "true")
42
+
43
+ app = create_app(
44
+ CodingToolsEnvironment,
45
+ CallToolAction,
46
+ CallToolObservation,
47
+ env_name="coding_tools_env",
48
+ max_concurrent_envs=int(os.getenv("MAX_CONCURRENT_ENVS", "4")),
49
+ gradio_builder=coding_tools_ui_builder,
50
+ )
51
+
52
+
53
+ def main(host: str = "0.0.0.0", port: int = 8000) -> None:
54
+ import uvicorn
55
+
56
+ uvicorn.run(app, host=host, port=port)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ main()
server/coding_tools_env_environment.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """SETA-style multi-tool coding environment backed by E2B."""
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ from typing import Any, Optional
13
+ from uuid import uuid4
14
+
15
+ from fastmcp import FastMCP
16
+ from openenv.core.env_server.mcp_environment import MCPEnvironment
17
+ from openenv.core.env_server.types import Action, Observation
18
+
19
+ try:
20
+ from .e2b_sandbox import E2BSandbox
21
+ from ..models import CodingToolsState, CommandResult, EditSpec, TodoItem
22
+ except ImportError: # pragma: no cover
23
+ from models import CodingToolsState, CommandResult, EditSpec, TodoItem
24
+ from server.e2b_sandbox import E2BSandbox
25
+
26
+
27
+ REWARD_FILE = "/home/user/logs/verifier/reward.txt"
28
+
29
+
30
+ class CodingToolsEnvironment(MCPEnvironment):
31
+ """Tool-centric coding environment with one sandbox per episode."""
32
+
33
+ SUPPORTS_CONCURRENT_SESSIONS = True
34
+
35
+ def __init__(self):
36
+ self._sandbox: Optional[E2BSandbox] = None
37
+ self._state = CodingToolsState(episode_id=str(uuid4()), step_count=0)
38
+
39
+ mcp = FastMCP("coding_tools_env")
40
+
41
+ @mcp.tool
42
+ def bash(command: str, timeout: float | None = 30) -> str:
43
+ """Execute bash commands using the computer instance."""
44
+ if not self._sandbox:
45
+ return "Error: environment not reset. Call reset() first."
46
+ timeout_value = 30 if timeout is None else float(timeout)
47
+ result = self._sandbox.run_shell(command, timeout_s=timeout_value)
48
+ self._record("bash", result.ok, result.output, result.error, result.metadata)
49
+ return result.output if result.ok else f"ERROR: {result.error}\n{result.output}".strip()
50
+
51
+ @mcp.tool
52
+ def read(file_path: str, offset: int | None = None, limit: int | None = None) -> str:
53
+ """Read file contents using computer instance."""
54
+ if not self._sandbox:
55
+ return "Error: environment not reset. Call reset() first."
56
+ result = self._sandbox.read_file(file_path=file_path, offset=offset, limit=limit)
57
+ self._record("read", result.ok, result.output, result.error, result.metadata)
58
+ return result.output if result.ok else f"ERROR: {result.error}"
59
+
60
+ @mcp.tool
61
+ def write(file_path: str, content: str) -> str:
62
+ """Write content to a file using computer instance."""
63
+ if not self._sandbox:
64
+ return "Error: environment not reset. Call reset() first."
65
+ result = self._sandbox.write_file(file_path=file_path, content=content)
66
+ self._record("write", result.ok, result.output, result.error, result.metadata)
67
+ return result.output if result.ok else f"ERROR: {result.error}"
68
+
69
+ @mcp.tool
70
+ def edit(
71
+ file_path: str,
72
+ old_string: str,
73
+ new_string: str,
74
+ replace_all: bool = False,
75
+ ) -> str:
76
+ """Perform exact string replacement in a file."""
77
+ if not self._sandbox:
78
+ return "Error: environment not reset. Call reset() first."
79
+ read_result = self._sandbox.read_file(file_path=file_path)
80
+ if not read_result.ok:
81
+ self._record("edit", False, "", read_result.error, None)
82
+ return f"ERROR: {read_result.error}"
83
+ original = read_result.output
84
+ if old_string not in original:
85
+ self._record("edit", False, "", "old_string not found", None)
86
+ return "ERROR: old_string not found"
87
+ if replace_all:
88
+ updated = original.replace(old_string, new_string)
89
+ else:
90
+ updated = original.replace(old_string, new_string, 1)
91
+ write_result = self._sandbox.write_file(file_path=file_path, content=updated)
92
+ ok = write_result.ok
93
+ msg = "edit ok" if ok else ""
94
+ self._record("edit", ok, msg, write_result.error, {"replace_all": replace_all})
95
+ return msg if ok else f"ERROR: {write_result.error}"
96
+
97
+ @mcp.tool
98
+ def multi_edit(file_path: str, edits: list[dict[str, Any]]) -> str:
99
+ """Perform multiple edits on a single file."""
100
+ if not self._sandbox:
101
+ return "Error: environment not reset. Call reset() first."
102
+ read_result = self._sandbox.read_file(file_path=file_path)
103
+ if not read_result.ok:
104
+ self._record("multi_edit", False, "", read_result.error, None)
105
+ return f"ERROR: {read_result.error}"
106
+ text = read_result.output
107
+ applied = 0
108
+ for raw in edits:
109
+ spec = EditSpec.model_validate(raw)
110
+ if spec.old_string not in text:
111
+ self._record(
112
+ "multi_edit",
113
+ False,
114
+ "",
115
+ f"old_string not found: {spec.old_string[:80]}",
116
+ {"applied": applied},
117
+ )
118
+ return f"ERROR: old_string not found: {spec.old_string[:80]}"
119
+ if spec.replace_all:
120
+ text = text.replace(spec.old_string, spec.new_string)
121
+ else:
122
+ text = text.replace(spec.old_string, spec.new_string, 1)
123
+ applied += 1
124
+ write_result = self._sandbox.write_file(file_path=file_path, content=text)
125
+ self._record(
126
+ "multi_edit",
127
+ write_result.ok,
128
+ f"applied {applied} edits" if write_result.ok else "",
129
+ write_result.error,
130
+ {"applied": applied},
131
+ )
132
+ return f"applied {applied} edits" if write_result.ok else f"ERROR: {write_result.error}"
133
+
134
+ @mcp.tool
135
+ def glob(pattern: str, path: str | None = None) -> str:
136
+ """Find files matching a glob pattern."""
137
+ if not self._sandbox:
138
+ return "Error: environment not reset. Call reset() first."
139
+ result = self._sandbox.glob_files(pattern=pattern, path=path)
140
+ self._record("glob", result.ok, result.output, result.error, result.metadata)
141
+ return result.output if result.ok else f"ERROR: {result.error}"
142
+
143
+ @mcp.tool
144
+ def grep(pattern: str, path: str | None = None, include: str | None = None) -> str:
145
+ """Search for patterns in files."""
146
+ if not self._sandbox:
147
+ return "Error: environment not reset. Call reset() first."
148
+ result = self._sandbox.grep(pattern=pattern, path=path, include=include)
149
+ self._record("grep", result.ok, result.output, result.error, result.metadata)
150
+ return result.output if result.ok else f"ERROR: {result.error}\n{result.output}".strip()
151
+
152
+ @mcp.tool
153
+ def ls(path: str = ".", ignore: list[str] | None = None) -> str:
154
+ """List files and directories."""
155
+ if not self._sandbox:
156
+ return "Error: environment not reset. Call reset() first."
157
+ result = self._sandbox.list_dir(path=path, ignore=ignore)
158
+ self._record("ls", result.ok, result.output, result.error, result.metadata)
159
+ return result.output if result.ok else f"ERROR: {result.error}"
160
+
161
+ @mcp.tool
162
+ def todo_write(todos: list[dict[str, Any]]) -> str:
163
+ """Manage todo list for planning and progress tracking."""
164
+ validated = [TodoItem.model_validate(todo) for todo in todos]
165
+ in_progress = [item for item in validated if item.status == "in_progress"]
166
+ if len(in_progress) > 1:
167
+ msg = "ERROR: only one todo item can be in_progress"
168
+ self._record("todo_write", False, "", msg, None)
169
+ return msg
170
+ for item in validated:
171
+ if item.status not in {"pending", "in_progress", "completed"}:
172
+ msg = f"ERROR: invalid status {item.status}"
173
+ self._record("todo_write", False, "", msg, None)
174
+ return msg
175
+ if item.priority not in {"high", "medium", "low"}:
176
+ msg = f"ERROR: invalid priority {item.priority}"
177
+ self._record("todo_write", False, "", msg, None)
178
+ return msg
179
+ self._state.todos = validated
180
+ self._record("todo_write", True, f"stored {len(validated)} todos", None, None)
181
+ return f"stored {len(validated)} todos"
182
+
183
+ @mcp.tool
184
+ def submit_solution() -> str:
185
+ """Submit solution and run test suite via verify commands."""
186
+ if not self._sandbox:
187
+ return "Error: environment not reset. Call reset() first."
188
+ self._state.submitted = True
189
+ if not self._state.verify_commands:
190
+ self._state.last_reward = 0.0
191
+ self._record(
192
+ "submit_solution",
193
+ True,
194
+ "No verify commands configured. reward=0.0",
195
+ None,
196
+ {"reward": 0.0, "finished": True},
197
+ )
198
+ return "No verify commands configured. reward=0.0"
199
+ summary = self._run_verify_commands()
200
+ self._record(
201
+ "submit_solution",
202
+ True,
203
+ (
204
+ f"Verification: {summary['passed']}/{summary['total']} passed; "
205
+ f"reward={summary['reward']}"
206
+ ),
207
+ None,
208
+ {"reward": summary["reward"], "finished": True},
209
+ )
210
+ return (
211
+ f"Verification: {summary['passed']}/{summary['total']} passed; "
212
+ f"reward={summary['reward']}"
213
+ )
214
+
215
+ super().__init__(mcp)
216
+
217
+ def reset(
218
+ self,
219
+ seed: Optional[int] = None,
220
+ episode_id: Optional[str] = None,
221
+ **kwargs: Any,
222
+ ) -> Observation:
223
+ if self._sandbox:
224
+ self._sandbox.kill()
225
+ self._sandbox = None
226
+
227
+ api_key = os.environ.get("E2B_API_KEY")
228
+ self._state = CodingToolsState(
229
+ episode_id=episode_id or str(uuid4()),
230
+ step_count=0,
231
+ )
232
+ if not api_key:
233
+ return Observation(
234
+ done=True,
235
+ reward=None,
236
+ metadata={
237
+ "status": "error",
238
+ "error": "E2B_API_KEY is not set. Configure it before reset.",
239
+ },
240
+ )
241
+
242
+ try:
243
+ self._sandbox = E2BSandbox(api_key=api_key)
244
+ except Exception as exc: # noqa: BLE001
245
+ return Observation(
246
+ done=True,
247
+ reward=None,
248
+ metadata={
249
+ "status": "error",
250
+ "error": f"failed to create E2B sandbox: {type(exc).__name__}: {exc}",
251
+ },
252
+ )
253
+
254
+ self._state.sandbox_id = self._sandbox.sandbox_id
255
+ setup_commands = _coerce_commands(
256
+ kwargs.get("setup", kwargs.get("setup_scripts", []))
257
+ )
258
+ verify_commands = _coerce_commands(
259
+ kwargs.get("verify", kwargs.get("verify_scripts", []))
260
+ )
261
+ self._state.verify_commands = verify_commands
262
+
263
+ self._sandbox.run_shell("mkdir -p /home/user/logs/verifier")
264
+ if setup_commands:
265
+ for command in setup_commands:
266
+ result = self._sandbox.run_shell(command, timeout_s=60)
267
+ command_result = CommandResult(
268
+ tool="setup",
269
+ ok=result.ok,
270
+ output=result.output,
271
+ error=result.error,
272
+ metadata={"command": command},
273
+ )
274
+ self._state.setup_results.append(command_result)
275
+ if not result.ok:
276
+ return Observation(
277
+ done=True,
278
+ reward=None,
279
+ metadata={
280
+ "status": "error",
281
+ "sandbox_id": self._state.sandbox_id,
282
+ "message": "Setup command failed.",
283
+ "setup_results": [
284
+ entry.model_dump() for entry in self._state.setup_results
285
+ ],
286
+ },
287
+ )
288
+
289
+ return Observation(
290
+ done=False,
291
+ reward=None,
292
+ metadata={
293
+ "status": "ready",
294
+ "sandbox_id": self._state.sandbox_id,
295
+ "message": "coding_tools_env ready.",
296
+ "verify_commands": verify_commands,
297
+ "setup_results": [
298
+ entry.model_dump() for entry in self._state.setup_results
299
+ ],
300
+ },
301
+ )
302
+
303
+ def _step_impl(
304
+ self,
305
+ action: Action,
306
+ timeout_s: Optional[float] = None,
307
+ **_: Any,
308
+ ) -> Observation:
309
+ return Observation(
310
+ done=False,
311
+ reward=None,
312
+ metadata={
313
+ "error": (
314
+ f"Unknown action type: {type(action).__name__}. "
315
+ "Use ListToolsAction or CallToolAction for MCP interactions."
316
+ )
317
+ },
318
+ )
319
+
320
+ def step(
321
+ self,
322
+ action: Action,
323
+ timeout_s: Optional[float] = None,
324
+ **kwargs: Any,
325
+ ) -> Observation:
326
+ self._state.step_count += 1
327
+ return super().step(action, timeout_s=timeout_s, **kwargs)
328
+
329
+ async def step_async(
330
+ self,
331
+ action: Action,
332
+ timeout_s: Optional[float] = None,
333
+ **kwargs: Any,
334
+ ) -> Observation:
335
+ self._state.step_count += 1
336
+ return await super().step_async(action, timeout_s=timeout_s, **kwargs)
337
+
338
+ @property
339
+ def state(self) -> CodingToolsState:
340
+ return self._state
341
+
342
+ def close(self) -> None:
343
+ if self._sandbox:
344
+ self._sandbox.kill()
345
+ self._sandbox = None
346
+
347
+ def _record(
348
+ self,
349
+ tool: str,
350
+ ok: bool,
351
+ output: str,
352
+ error: str | None,
353
+ metadata: dict[str, Any] | None,
354
+ ) -> None:
355
+ result = CommandResult(
356
+ tool=tool,
357
+ ok=ok,
358
+ output=output,
359
+ error=error,
360
+ metadata=metadata or {},
361
+ )
362
+ self._state.tool_history.append(result)
363
+ self._state.last_error = error
364
+
365
+ def _run_verify_commands(self) -> dict[str, Any]:
366
+ self._sandbox.run_shell("mkdir -p /home/user/logs/verifier")
367
+ self._state.verify_results = []
368
+ passed = 0
369
+ for command in self._state.verify_commands:
370
+ result = self._sandbox.run_shell(command, timeout_s=120)
371
+ record = CommandResult(
372
+ tool="verify",
373
+ ok=result.ok,
374
+ output=result.output,
375
+ error=result.error,
376
+ metadata={"command": command},
377
+ )
378
+ self._state.verify_results.append(record)
379
+ if result.ok:
380
+ passed += 1
381
+ total = len(self._state.verify_commands)
382
+ reward = _read_reward_override(self._sandbox)
383
+ if reward is None:
384
+ reward = (passed / total) if total else 0.0
385
+ self._state.last_reward = reward
386
+ return {"passed": passed, "total": total, "reward": reward}
387
+
388
+
389
+ def _coerce_commands(value: Any) -> list[str]:
390
+ if value is None:
391
+ return []
392
+ if isinstance(value, str):
393
+ return [value] if value.strip() else []
394
+ return [str(item) for item in value if str(item).strip()]
395
+
396
+
397
+ def _read_reward_override(sandbox: E2BSandbox) -> float | None:
398
+ result = sandbox.read_file(REWARD_FILE)
399
+ if not result.ok:
400
+ return None
401
+ raw = (result.output or "").strip()
402
+ if not raw:
403
+ return None
404
+ try:
405
+ return float(raw)
406
+ except ValueError:
407
+ return None
server/e2b_sandbox.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """E2B sandbox wrapper with shell and file-operation helpers."""
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from dataclasses import dataclass
13
+ from typing import Any
14
+
15
+ _E2B_IMPORT_ERROR: ImportError | None = None
16
+
17
+ try:
18
+ from e2b_code_interpreter import Sandbox
19
+ except ImportError as _e2b_import_error: # pragma: no cover
20
+ _E2B_IMPORT_ERROR = _e2b_import_error
21
+ Sandbox = None # type: ignore[assignment]
22
+
23
+
24
+ @dataclass
25
+ class ToolResult:
26
+ ok: bool
27
+ output: str = ""
28
+ error: str | None = None
29
+ metadata: dict[str, Any] | None = None
30
+
31
+
32
+ class E2BSandbox:
33
+ """One E2B sandbox per OpenEnv episode."""
34
+
35
+ def __init__(self, api_key: str):
36
+ if Sandbox is None:
37
+ raise ImportError(
38
+ "e2b-code-interpreter is not installed. Install coding_tools_env "
39
+ f"dependencies. Original import error: {_E2B_IMPORT_ERROR}"
40
+ )
41
+ self._sbx = Sandbox.create(api_key=api_key)
42
+ self.sandbox_id: str = self._sbx.sandbox_id
43
+ self.run_shell("mkdir -p /home/user/work")
44
+
45
+ def run_shell(self, command: str, timeout_s: float = 30) -> ToolResult:
46
+ code = (
47
+ "import subprocess, json\n"
48
+ f"_r = subprocess.run({command!r}, shell=True, capture_output=True, text=True, timeout={float(timeout_s)!r})\n"
49
+ "print(json.dumps({'returncode': _r.returncode, 'stdout': _r.stdout, 'stderr': _r.stderr}))\n"
50
+ )
51
+ execution = self._sbx.run_code(code)
52
+ result = _decode_json_line(execution.logs.stdout)
53
+ if result is None:
54
+ return ToolResult(ok=False, error=_format_error(execution))
55
+ rc = int(result.get("returncode", 1))
56
+ stdout = str(result.get("stdout", ""))
57
+ stderr = str(result.get("stderr", ""))
58
+ ok = rc == 0
59
+ return ToolResult(
60
+ ok=ok,
61
+ output=(stdout + stderr).strip(),
62
+ error=None if ok else f"exit_code={rc}",
63
+ metadata={"exit_code": rc},
64
+ )
65
+
66
+ def read_file(
67
+ self, file_path: str, offset: int | None = None, limit: int | None = None
68
+ ) -> ToolResult:
69
+ code = (
70
+ "from pathlib import Path\n"
71
+ "import json\n"
72
+ f"_p = Path({file_path!r})\n"
73
+ "if not _p.exists():\n"
74
+ " print(json.dumps({'ok': False, 'error': 'file not found'}))\n"
75
+ "else:\n"
76
+ " _t = _p.read_text(encoding='utf-8')\n"
77
+ f" _o = {offset if offset is not None else 'None'}\n"
78
+ f" _l = {limit if limit is not None else 'None'}\n"
79
+ " if _o is not None:\n"
80
+ " _t = _t[_o:]\n"
81
+ " if _l is not None:\n"
82
+ " _t = _t[:_l]\n"
83
+ " print(json.dumps({'ok': True, 'content': _t}))\n"
84
+ )
85
+ execution = self._sbx.run_code(code)
86
+ result = _decode_json_line(execution.logs.stdout)
87
+ if result is None:
88
+ return ToolResult(ok=False, error=_format_error(execution))
89
+ if not result.get("ok", False):
90
+ return ToolResult(ok=False, error=str(result.get("error", "read failed")))
91
+ return ToolResult(ok=True, output=str(result.get("content", "")))
92
+
93
+ def write_file(self, file_path: str, content: str) -> ToolResult:
94
+ code = (
95
+ "from pathlib import Path\n"
96
+ "import json\n"
97
+ f"_p = Path({file_path!r})\n"
98
+ "_p.parent.mkdir(parents=True, exist_ok=True)\n"
99
+ f"_p.write_text({content!r}, encoding='utf-8')\n"
100
+ "print(json.dumps({'ok': True, 'bytes': len(_p.read_bytes())}))\n"
101
+ )
102
+ execution = self._sbx.run_code(code)
103
+ result = _decode_json_line(execution.logs.stdout)
104
+ if result is None:
105
+ return ToolResult(ok=False, error=_format_error(execution))
106
+ return ToolResult(ok=True, output="write ok", metadata={"bytes": result.get("bytes", 0)})
107
+
108
+ def glob_files(self, pattern: str, path: str | None = None) -> ToolResult:
109
+ code = (
110
+ "from pathlib import Path\n"
111
+ "import json\n"
112
+ f"_root = Path({(path or '.')!r})\n"
113
+ f"_matches = sorted(str(p) for p in _root.glob({pattern!r}))\n"
114
+ "print(json.dumps({'ok': True, 'matches': _matches}))\n"
115
+ )
116
+ execution = self._sbx.run_code(code)
117
+ result = _decode_json_line(execution.logs.stdout)
118
+ if result is None:
119
+ return ToolResult(ok=False, error=_format_error(execution))
120
+ matches = result.get("matches", [])
121
+ return ToolResult(ok=True, output="\n".join(matches), metadata={"matches": matches})
122
+
123
+ def list_dir(self, path: str = ".", ignore: list[str] | None = None) -> ToolResult:
124
+ ignore = ignore or []
125
+ code = (
126
+ "from pathlib import Path\n"
127
+ "import json\n"
128
+ f"_ignore = set({ignore!r})\n"
129
+ f"_p = Path({path!r})\n"
130
+ "if not _p.exists():\n"
131
+ " print(json.dumps({'ok': False, 'error': 'path not found'}))\n"
132
+ "else:\n"
133
+ " _items = []\n"
134
+ " for _x in sorted(_p.iterdir()):\n"
135
+ " if _x.name in _ignore:\n"
136
+ " continue\n"
137
+ " _items.append({'name': _x.name, 'is_dir': _x.is_dir()})\n"
138
+ " print(json.dumps({'ok': True, 'items': _items}))\n"
139
+ )
140
+ execution = self._sbx.run_code(code)
141
+ result = _decode_json_line(execution.logs.stdout)
142
+ if result is None:
143
+ return ToolResult(ok=False, error=_format_error(execution))
144
+ if not result.get("ok", False):
145
+ return ToolResult(ok=False, error=str(result.get("error", "ls failed")))
146
+ items = result.get("items", [])
147
+ lines = [f"{'[dir]' if item['is_dir'] else '[file]'} {item['name']}" for item in items]
148
+ return ToolResult(ok=True, output="\n".join(lines), metadata={"items": items})
149
+
150
+ def grep(self, pattern: str, path: str | None = None, include: str | None = None) -> ToolResult:
151
+ root = path or "."
152
+ code = (
153
+ "from pathlib import Path\n"
154
+ "import fnmatch, json, re\n"
155
+ f"_root = Path({root!r})\n"
156
+ f"_pat = re.compile({pattern!r})\n"
157
+ f"_include = {include!r}\n"
158
+ "_out = []\n"
159
+ "if not _root.exists():\n"
160
+ " print(json.dumps({'ok': False, 'error': 'path not found'}))\n"
161
+ "else:\n"
162
+ " for _p in _root.rglob('*'):\n"
163
+ " if not _p.is_file():\n"
164
+ " continue\n"
165
+ " if _include and not fnmatch.fnmatch(_p.name, _include):\n"
166
+ " continue\n"
167
+ " try:\n"
168
+ " _lines = _p.read_text(encoding='utf-8').splitlines()\n"
169
+ " except Exception:\n"
170
+ " continue\n"
171
+ " for _i, _line in enumerate(_lines, start=1):\n"
172
+ " if _pat.search(_line):\n"
173
+ " _out.append(f'{_p}:{_i}:{_line}')\n"
174
+ " print(json.dumps({'ok': True, 'matches': _out}))\n"
175
+ )
176
+ execution = self._sbx.run_code(code)
177
+ result = _decode_json_line(execution.logs.stdout)
178
+ if result is None:
179
+ return ToolResult(ok=False, error=_format_error(execution))
180
+ if not result.get("ok", False):
181
+ return ToolResult(ok=False, error=str(result.get("error", "grep failed")))
182
+ matches = result.get("matches", [])
183
+ return ToolResult(ok=True, output="\n".join(matches), metadata={"matches": matches})
184
+
185
+ def kill(self) -> None:
186
+ try:
187
+ self._sbx.kill()
188
+ except Exception:
189
+ try:
190
+ self._sbx.close()
191
+ except Exception:
192
+ pass
193
+
194
+
195
+ def _decode_json_line(lines: list[str] | None) -> dict[str, Any] | None:
196
+ if not lines:
197
+ return None
198
+ for raw in reversed(lines):
199
+ raw = raw.strip()
200
+ if not raw:
201
+ continue
202
+ try:
203
+ value = json.loads(raw)
204
+ except Exception:
205
+ continue
206
+ if isinstance(value, dict):
207
+ return value
208
+ return None
209
+
210
+
211
+ def _format_error(execution: Any) -> str:
212
+ if execution.error:
213
+ return f"{execution.error.name}: {execution.error.value}"
214
+ stderr = "\n".join(execution.logs.stderr or [])
215
+ return stderr or "sandbox execution failed"
server/gradio_ui.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Custom Gradio UI for coding_tools_env."""
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from typing import Any, Optional
13
+
14
+ import gradio as gr
15
+
16
+
17
+ def _lines(value: str) -> list[str]:
18
+ return [line.strip() for line in value.splitlines() if line.strip()]
19
+
20
+
21
+ def _extract_tool_text(result: dict[str, Any]) -> str:
22
+ obs = result.get("observation", {})
23
+ if not isinstance(obs, dict):
24
+ return ""
25
+ tool_result = obs.get("result", {})
26
+ if not isinstance(tool_result, dict):
27
+ return ""
28
+ content = tool_result.get("content", [])
29
+ if isinstance(content, list) and content:
30
+ first = content[0]
31
+ if isinstance(first, dict):
32
+ return str(first.get("text", ""))
33
+ return str(tool_result.get("data", ""))
34
+
35
+
36
+ def coding_tools_ui_builder(
37
+ web_manager,
38
+ action_fields: list[dict[str, Any]],
39
+ metadata: Optional[Any],
40
+ is_chat_env: bool,
41
+ title: str,
42
+ quick_start_md: Optional[str],
43
+ ) -> gr.Blocks:
44
+ def state_payload() -> dict[str, Any]:
45
+ try:
46
+ return web_manager.get_state()
47
+ except RuntimeError:
48
+ return {}
49
+
50
+ with gr.Blocks(title=f"{title} - Coding Tools") as demo:
51
+ gr.Markdown(f"# {title}")
52
+ gr.Markdown("E2B-backed coding tools environment with SETA-style tool surface.")
53
+
54
+ with gr.Row():
55
+ with gr.Column(scale=1):
56
+ gr.Markdown("### Session")
57
+ setup_input = gr.Textbox(
58
+ label="Setup commands",
59
+ value="mkdir -p /home/user/work",
60
+ lines=4,
61
+ )
62
+ verify_input = gr.Textbox(
63
+ label="Verify commands",
64
+ value="pytest -q",
65
+ lines=3,
66
+ )
67
+ reset_btn = gr.Button("Reset sandbox", variant="primary")
68
+ close_btn = gr.Button("Stop / Close session", variant="secondary")
69
+
70
+ gr.Markdown("### Bash")
71
+ bash_input = gr.Textbox(label="Command", value="pwd && ls -la", lines=3)
72
+ timeout_input = gr.Number(label="Timeout (seconds)", value=30)
73
+ bash_btn = gr.Button("Run bash", variant="primary")
74
+
75
+ gr.Markdown("### Submit")
76
+ submit_btn = gr.Button("Submit solution", variant="primary")
77
+
78
+ with gr.Column(scale=1):
79
+ gr.Markdown("### File tools")
80
+ file_path = gr.Textbox(label="File path", value="/home/user/work/main.py")
81
+ read_btn = gr.Button("Read")
82
+ write_content = gr.Textbox(label="Write content", lines=5)
83
+ write_btn = gr.Button("Write")
84
+ old_string = gr.Textbox(label="Old string", lines=2)
85
+ new_string = gr.Textbox(label="New string", lines=2)
86
+ replace_all = gr.Checkbox(label="Replace all", value=False)
87
+ edit_btn = gr.Button("Edit")
88
+
89
+ gr.Markdown("### Search tools")
90
+ glob_pattern = gr.Textbox(label="Glob pattern", value="**/*.py")
91
+ glob_path = gr.Textbox(label="Glob path", value="/home/user/work")
92
+ glob_btn = gr.Button("Glob")
93
+ grep_pattern = gr.Textbox(label="Grep pattern", value="TODO")
94
+ grep_path = gr.Textbox(label="Grep path", value="/home/user/work")
95
+ grep_include = gr.Textbox(label="Grep include (optional)")
96
+ grep_btn = gr.Button("Grep")
97
+
98
+ with gr.Column(scale=1):
99
+ gr.Markdown("### Directory and Todo")
100
+ ls_path = gr.Textbox(label="LS path", value="/home/user/work")
101
+ ls_ignore = gr.Textbox(label="LS ignore (comma-separated)")
102
+ ls_btn = gr.Button("LS")
103
+ todos_json = gr.Code(
104
+ label="Todos JSON",
105
+ language="json",
106
+ value='[{"id":"1","content":"Inspect files","status":"in_progress","priority":"high"}]',
107
+ lines=8,
108
+ )
109
+ todo_btn = gr.Button("Update todos")
110
+ state_view = gr.JSON(label="Session state", value={})
111
+
112
+ output = gr.Textbox(label="Tool output", lines=12, interactive=False)
113
+ status = gr.Textbox(label="Status", value="Ready.", interactive=False)
114
+
115
+ async def on_reset(setup_text: str, verify_text: str):
116
+ result = await web_manager.reset_environment(
117
+ {"setup": _lines(setup_text), "verify": _lines(verify_text)}
118
+ )
119
+ state = state_payload()
120
+ return state, json.dumps(result, indent=2), "Sandbox reset."
121
+
122
+ async def on_close():
123
+ await web_manager._run_sync_in_thread_pool(web_manager.env.close)
124
+ return {}, "Session closed.", "Session closed."
125
+
126
+ async def on_bash(command: str, timeout: float):
127
+ result = await web_manager.step_environment(
128
+ {
129
+ "tool_name": "bash",
130
+ "arguments": {"command": command, "timeout": timeout},
131
+ }
132
+ )
133
+ return state_payload(), _extract_tool_text(result), "bash executed."
134
+
135
+ async def on_read(path: str):
136
+ result = await web_manager.step_environment(
137
+ {"tool_name": "read", "arguments": {"file_path": path}}
138
+ )
139
+ return state_payload(), _extract_tool_text(result), "read executed."
140
+
141
+ async def on_write(path: str, content: str):
142
+ result = await web_manager.step_environment(
143
+ {
144
+ "tool_name": "write",
145
+ "arguments": {"file_path": path, "content": content},
146
+ }
147
+ )
148
+ return state_payload(), _extract_tool_text(result), "write executed."
149
+
150
+ async def on_edit(path: str, old: str, new: str, all_matches: bool):
151
+ result = await web_manager.step_environment(
152
+ {
153
+ "tool_name": "edit",
154
+ "arguments": {
155
+ "file_path": path,
156
+ "old_string": old,
157
+ "new_string": new,
158
+ "replace_all": all_matches,
159
+ },
160
+ }
161
+ )
162
+ return state_payload(), _extract_tool_text(result), "edit executed."
163
+
164
+ async def on_glob(pattern: str, path: str):
165
+ result = await web_manager.step_environment(
166
+ {"tool_name": "glob", "arguments": {"pattern": pattern, "path": path}}
167
+ )
168
+ return state_payload(), _extract_tool_text(result), "glob executed."
169
+
170
+ async def on_grep(pattern: str, path: str, include: str):
171
+ args: dict[str, Any] = {"pattern": pattern, "path": path}
172
+ if include.strip():
173
+ args["include"] = include
174
+ result = await web_manager.step_environment(
175
+ {"tool_name": "grep", "arguments": args}
176
+ )
177
+ return state_payload(), _extract_tool_text(result), "grep executed."
178
+
179
+ async def on_ls(path: str, ignore_csv: str):
180
+ ignore = [item.strip() for item in ignore_csv.split(",") if item.strip()]
181
+ result = await web_manager.step_environment(
182
+ {"tool_name": "ls", "arguments": {"path": path, "ignore": ignore or None}}
183
+ )
184
+ return state_payload(), _extract_tool_text(result), "ls executed."
185
+
186
+ async def on_todo(todo_raw: str):
187
+ todos = json.loads(todo_raw)
188
+ result = await web_manager.step_environment(
189
+ {"tool_name": "todo_write", "arguments": {"todos": todos}}
190
+ )
191
+ return state_payload(), _extract_tool_text(result), "todo_write executed."
192
+
193
+ async def on_submit():
194
+ result = await web_manager.step_environment(
195
+ {"tool_name": "submit_solution", "arguments": {}}
196
+ )
197
+ return state_payload(), _extract_tool_text(result), "submit_solution executed."
198
+
199
+ reset_btn.click(on_reset, [setup_input, verify_input], [state_view, output, status])
200
+ close_btn.click(on_close, outputs=[state_view, output, status])
201
+ bash_btn.click(on_bash, [bash_input, timeout_input], [state_view, output, status])
202
+ read_btn.click(on_read, [file_path], [state_view, output, status])
203
+ write_btn.click(on_write, [file_path, write_content], [state_view, output, status])
204
+ edit_btn.click(
205
+ on_edit,
206
+ [file_path, old_string, new_string, replace_all],
207
+ [state_view, output, status],
208
+ )
209
+ glob_btn.click(on_glob, [glob_pattern, glob_path], [state_view, output, status])
210
+ grep_btn.click(
211
+ on_grep, [grep_pattern, grep_path, grep_include], [state_view, output, status]
212
+ )
213
+ ls_btn.click(on_ls, [ls_path, ls_ignore], [state_view, output, status])
214
+ todo_btn.click(on_todo, [todos_json], [state_view, output, status])
215
+ submit_btn.click(on_submit, outputs=[state_view, output, status])
216
+
217
+ return demo
server/requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ openenv-core[core]>=0.2.2
2
+ e2b-code-interpreter>=1.0.0
3
+ fastapi>=0.115.0
4
+ fastmcp>=3.0.0
5
+ gradio>=4.0.0
6
+ pydantic>=2.0.0
7
+ requests>=2.31.0
8
+ uvicorn>=0.24.0
9
+
10
+