Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- README.md +2 -1
- server/coding_tools_env_environment.py +12 -2
- server/e2b_sandbox.py +7 -14
README.md
CHANGED
|
@@ -18,7 +18,8 @@ short_description: SETA-style multi-tool coding environment backed by E2B
|
|
| 18 |
# Coding Tools Environment
|
| 19 |
|
| 20 |
`coding_tools_env` is an E2B-backed multi-tool coding environment with explicit
|
| 21 |
-
filesystem and shell tools.
|
|
|
|
| 22 |
|
| 23 |
Each episode creates a fresh E2B sandbox. Reset accepts setup and verify
|
| 24 |
commands. Verify commands are used by `submit_solution`.
|
|
|
|
| 18 |
# Coding Tools Environment
|
| 19 |
|
| 20 |
`coding_tools_env` is an E2B-backed multi-tool coding environment with explicit
|
| 21 |
+
filesystem and shell tools. The tool surface is modeled after
|
| 22 |
+
[SETA](https://github.com/EnvCommons/SETA).
|
| 23 |
|
| 24 |
Each episode creates a fresh E2B sandbox. Reset accepts setup and verify
|
| 25 |
commands. Verify commands are used by `submit_solution`.
|
server/coding_tools_env_environment.py
CHANGED
|
@@ -324,7 +324,11 @@ class CodingToolsEnvironment(MCPEnvironment):
|
|
| 324 |
**kwargs: Any,
|
| 325 |
) -> Observation:
|
| 326 |
self._state.step_count += 1
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
|
| 329 |
async def step_async(
|
| 330 |
self,
|
|
@@ -333,7 +337,11 @@ class CodingToolsEnvironment(MCPEnvironment):
|
|
| 333 |
**kwargs: Any,
|
| 334 |
) -> Observation:
|
| 335 |
self._state.step_count += 1
|
| 336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
@property
|
| 339 |
def state(self) -> CodingToolsState:
|
|
@@ -363,6 +371,8 @@ class CodingToolsEnvironment(MCPEnvironment):
|
|
| 363 |
self._state.last_error = error
|
| 364 |
|
| 365 |
def _run_verify_commands(self) -> dict[str, Any]:
|
|
|
|
|
|
|
| 366 |
self._sandbox.run_shell("mkdir -p /home/user/logs/verifier")
|
| 367 |
self._state.verify_results = []
|
| 368 |
passed = 0
|
|
|
|
| 324 |
**kwargs: Any,
|
| 325 |
) -> Observation:
|
| 326 |
self._state.step_count += 1
|
| 327 |
+
obs = super().step(action, timeout_s=timeout_s, **kwargs)
|
| 328 |
+
if self._state.submitted and self._state.last_reward is not None:
|
| 329 |
+
obs.done = True
|
| 330 |
+
obs.reward = self._state.last_reward
|
| 331 |
+
return obs
|
| 332 |
|
| 333 |
async def step_async(
|
| 334 |
self,
|
|
|
|
| 337 |
**kwargs: Any,
|
| 338 |
) -> Observation:
|
| 339 |
self._state.step_count += 1
|
| 340 |
+
obs = await super().step_async(action, timeout_s=timeout_s, **kwargs)
|
| 341 |
+
if self._state.submitted and self._state.last_reward is not None:
|
| 342 |
+
obs.done = True
|
| 343 |
+
obs.reward = self._state.last_reward
|
| 344 |
+
return obs
|
| 345 |
|
| 346 |
@property
|
| 347 |
def state(self) -> CodingToolsState:
|
|
|
|
| 371 |
self._state.last_error = error
|
| 372 |
|
| 373 |
def _run_verify_commands(self) -> dict[str, Any]:
|
| 374 |
+
if not self._sandbox:
|
| 375 |
+
return {"passed": 0, "total": 0, "reward": None}
|
| 376 |
self._sandbox.run_shell("mkdir -p /home/user/logs/verifier")
|
| 377 |
self._state.verify_results = []
|
| 378 |
passed = 0
|
server/e2b_sandbox.py
CHANGED
|
@@ -56,9 +56,10 @@ class E2BSandbox:
|
|
| 56 |
stdout = str(result.get("stdout", ""))
|
| 57 |
stderr = str(result.get("stderr", ""))
|
| 58 |
ok = rc == 0
|
|
|
|
| 59 |
return ToolResult(
|
| 60 |
ok=ok,
|
| 61 |
-
output=
|
| 62 |
error=None if ok else f"exit_code={rc}",
|
| 63 |
metadata={"exit_code": rc},
|
| 64 |
)
|
|
@@ -91,19 +92,11 @@ class E2BSandbox:
|
|
| 91 |
return ToolResult(ok=True, output=str(result.get("content", "")))
|
| 92 |
|
| 93 |
def write_file(self, file_path: str, content: str) -> ToolResult:
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
"
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
f"_p.write_text({content!r}, encoding='utf-8')\n"
|
| 100 |
-
"print(json.dumps({'ok': True, 'bytes': len(_p.read_bytes())}))\n"
|
| 101 |
-
)
|
| 102 |
-
execution = self._sbx.run_code(code)
|
| 103 |
-
result = _decode_json_line(execution.logs.stdout)
|
| 104 |
-
if result is None:
|
| 105 |
-
return ToolResult(ok=False, error=_format_error(execution))
|
| 106 |
-
return ToolResult(ok=True, output="write ok", metadata={"bytes": result.get("bytes", 0)})
|
| 107 |
|
| 108 |
def glob_files(self, pattern: str, path: str | None = None) -> ToolResult:
|
| 109 |
code = (
|
|
|
|
| 56 |
stdout = str(result.get("stdout", ""))
|
| 57 |
stderr = str(result.get("stderr", ""))
|
| 58 |
ok = rc == 0
|
| 59 |
+
output = "\n".join(part for part in [stdout.strip(), stderr.strip()] if part)
|
| 60 |
return ToolResult(
|
| 61 |
ok=ok,
|
| 62 |
+
output=output,
|
| 63 |
error=None if ok else f"exit_code={rc}",
|
| 64 |
metadata={"exit_code": rc},
|
| 65 |
)
|
|
|
|
| 92 |
return ToolResult(ok=True, output=str(result.get("content", "")))
|
| 93 |
|
| 94 |
def write_file(self, file_path: str, content: str) -> ToolResult:
|
| 95 |
+
try:
|
| 96 |
+
self._sbx.files.write(file_path, content.encode("utf-8"))
|
| 97 |
+
return ToolResult(ok=True, output="write ok", metadata={"bytes": len(content.encode("utf-8"))})
|
| 98 |
+
except Exception as exc:
|
| 99 |
+
return ToolResult(ok=False, error=f"write failed: {exc}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def glob_files(self, pattern: str, path: str | None = None) -> ToolResult:
|
| 102 |
code = (
|