Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .dockerignore +10 -0
- .env.example +11 -0
- .gitignore +9 -0
- README.md +0 -1
- local_ui.py +517 -0
- server/catalog.py +200 -0
- server/gradio_ui.py +441 -138
- server/opencode_environment.py +106 -36
- server/sandbox_smoke.py +320 -0
- server/transcript.py +237 -0
.dockerignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv
|
| 2 |
+
__pycache__
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.egg-info
|
| 6 |
+
.env
|
| 7 |
+
.git
|
| 8 |
+
.gitignore
|
| 9 |
+
README.md
|
| 10 |
+
*.md
|
.env.example
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Required
|
| 2 |
+
E2B_API_KEY=e2b_...
|
| 3 |
+
|
| 4 |
+
# Optional - for Mode A rollouts against real OpenAI
|
| 5 |
+
OPENAI_API_KEY=sk-...
|
| 6 |
+
|
| 7 |
+
# Optional - max concurrent sandbox sessions per environment (default: 4)
|
| 8 |
+
MAX_CONCURRENT_ENVS=4
|
| 9 |
+
|
| 10 |
+
# Optional - enable the Gradio UI mounted at /
|
| 11 |
+
ENABLE_WEB_INTERFACE=true
|
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.egg-info/
|
| 6 |
+
.env
|
| 7 |
+
*.egg-info
|
| 8 |
+
.pytest_cache/
|
| 9 |
+
.gradio/
|
README.md
CHANGED
|
@@ -6,7 +6,6 @@ colorTo: pink
|
|
| 6 |
sdk: docker
|
| 7 |
app_port: 8000
|
| 8 |
pinned: false
|
| 9 |
-
base_path: /web
|
| 10 |
---
|
| 11 |
|
| 12 |
# opencode-openenv
|
|
|
|
| 6 |
sdk: docker
|
| 7 |
app_port: 8000
|
| 8 |
pinned: false
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
# opencode-openenv
|
local_ui.py
ADDED
|
@@ -0,0 +1,517 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Chat-style Gradio UI for a locally-running ``opencode serve``.
|
| 2 |
+
|
| 3 |
+
Prereq: ``opencode serve`` on http://127.0.0.1:4096.
|
| 4 |
+
|
| 5 |
+
Run:
|
| 6 |
+
uv run --with gradio --with httpx python local_ui.py
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import html as _html
|
| 12 |
+
import json
|
| 13 |
+
import threading
|
| 14 |
+
import time
|
| 15 |
+
from typing import Any, Generator
|
| 16 |
+
|
| 17 |
+
import gradio as gr
|
| 18 |
+
import httpx
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
BASE = "http://127.0.0.1:4096"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# ββ HTTP helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _get(path: str, **kw) -> Any:
|
| 28 |
+
r = httpx.get(f"{BASE}{path}", timeout=15, **kw)
|
| 29 |
+
r.raise_for_status()
|
| 30 |
+
return r.json()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _create_session() -> str:
|
| 34 |
+
return httpx.post(f"{BASE}/session", json={"title": "gradio"}, timeout=15).json()["id"]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _fire_async(sid: str, prompt: str) -> None:
|
| 38 |
+
httpx.post(
|
| 39 |
+
f"{BASE}/session/{sid}/prompt_async",
|
| 40 |
+
json={"parts": [{"type": "text", "text": prompt}]},
|
| 41 |
+
timeout=30,
|
| 42 |
+
).raise_for_status()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _abort(sid: str) -> None:
|
| 46 |
+
try:
|
| 47 |
+
httpx.post(f"{BASE}/session/{sid}/abort", timeout=10)
|
| 48 |
+
except Exception:
|
| 49 |
+
pass
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _session_diff(sid: str) -> list[dict]:
|
| 53 |
+
try:
|
| 54 |
+
return _get(f"/session/{sid}/diff") or []
|
| 55 |
+
except Exception:
|
| 56 |
+
return []
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _session_todo(sid: str) -> list[dict]:
|
| 60 |
+
try:
|
| 61 |
+
return _get(f"/session/{sid}/todo") or []
|
| 62 |
+
except Exception:
|
| 63 |
+
return []
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ββ Server identity ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _banner() -> str:
|
| 70 |
+
try:
|
| 71 |
+
h = _get("/global/health")
|
| 72 |
+
c = _get("/config")
|
| 73 |
+
prov = (c.get("provider") or {}).get("vllm") or {}
|
| 74 |
+
opts = prov.get("options") or {}
|
| 75 |
+
model = c.get("model") or "?"
|
| 76 |
+
base_url = opts.get("baseURL") or "?"
|
| 77 |
+
limit = next(iter(prov.get("models", {}).values()), {}).get("limit") or {}
|
| 78 |
+
try:
|
| 79 |
+
tools = _get("/experimental/tool/ids") or []
|
| 80 |
+
except Exception:
|
| 81 |
+
tools = []
|
| 82 |
+
tool_line = (
|
| 83 |
+
f"<div class='tools'>tools: {', '.join(_esc(t) for t in tools)}</div>"
|
| 84 |
+
if tools else ""
|
| 85 |
+
)
|
| 86 |
+
return (
|
| 87 |
+
"<div class='banner'>"
|
| 88 |
+
f"<span class='chip ok'>opencode v{_esc(h.get('version','?'))}</span> "
|
| 89 |
+
f"<span class='chip'>model: <code>{_esc(model)}</code></span> "
|
| 90 |
+
f"<span class='chip'>baseURL: <code>{_esc(base_url)}</code></span> "
|
| 91 |
+
f"<span class='chip'>ctx: <code>{limit.get('context','?')}</code></span> "
|
| 92 |
+
f"<span class='chip'>out: <code>{limit.get('output','?')}</code></span>"
|
| 93 |
+
f"</div>{tool_line}"
|
| 94 |
+
)
|
| 95 |
+
except Exception as exc:
|
| 96 |
+
return f"<div class='banner'><span class='chip err'>server unreachable: {_esc(exc)}</span></div>"
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# ββ SSE ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _stream(sid_filter: str, events: list, stop: threading.Event) -> None:
|
| 103 |
+
"""Tail GET /event, append every frame (caller filters)."""
|
| 104 |
+
try:
|
| 105 |
+
with httpx.stream("GET", f"{BASE}/event", timeout=None) as r:
|
| 106 |
+
for line in r.iter_lines():
|
| 107 |
+
if stop.is_set():
|
| 108 |
+
return
|
| 109 |
+
if not line or not line.startswith("data:"):
|
| 110 |
+
continue
|
| 111 |
+
try:
|
| 112 |
+
events.append(json.loads(line[5:].strip()))
|
| 113 |
+
except Exception:
|
| 114 |
+
pass
|
| 115 |
+
except Exception:
|
| 116 |
+
return
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ββ Part + delta assembly ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _assemble(events: list[dict]) -> tuple[list[dict], list[str]]:
|
| 123 |
+
"""Reduce events to ordered parts and collect any error reasons.
|
| 124 |
+
|
| 125 |
+
- ``message.part.updated`` is authoritative per ``part.id``.
|
| 126 |
+
- ``message.part.delta`` frames for a text part whose last snapshot is
|
| 127 |
+
shorter than the accumulated delta are appended live so streaming
|
| 128 |
+
looks smooth.
|
| 129 |
+
"""
|
| 130 |
+
order: list[str] = []
|
| 131 |
+
latest: dict[str, dict] = {}
|
| 132 |
+
deltas: dict[str, str] = {}
|
| 133 |
+
errors: list[str] = []
|
| 134 |
+
for ev in events:
|
| 135 |
+
t = ev.get("type")
|
| 136 |
+
props = ev.get("properties") or {}
|
| 137 |
+
if t == "message.part.updated":
|
| 138 |
+
p = props.get("part") or {}
|
| 139 |
+
pid = p.get("id")
|
| 140 |
+
if not pid:
|
| 141 |
+
continue
|
| 142 |
+
if pid not in latest:
|
| 143 |
+
order.append(pid)
|
| 144 |
+
latest[pid] = p
|
| 145 |
+
if (p.get("state") or {}).get("status") == "error":
|
| 146 |
+
err = (p.get("state") or {}).get("error") or "tool error"
|
| 147 |
+
errors.append(f"{p.get('tool','?')}: {err}")
|
| 148 |
+
elif t == "message.part.delta":
|
| 149 |
+
p = props.get("part") or {}
|
| 150 |
+
pid = p.get("partID") or p.get("id")
|
| 151 |
+
if not pid:
|
| 152 |
+
continue
|
| 153 |
+
delta = p.get("delta") or p.get("text") or ""
|
| 154 |
+
if isinstance(delta, str) and delta:
|
| 155 |
+
deltas[pid] = deltas.get(pid, "") + delta
|
| 156 |
+
elif t in ("error", "client.error"):
|
| 157 |
+
errors.append(_esc(props.get("reason") or ev.get("reason") or "unknown"))
|
| 158 |
+
|
| 159 |
+
# Splice in any deltas that exceed the latest snapshot (live streaming).
|
| 160 |
+
parts: list[dict] = []
|
| 161 |
+
for pid in order:
|
| 162 |
+
p = dict(latest[pid])
|
| 163 |
+
if p.get("type") == "text" and pid in deltas:
|
| 164 |
+
if len(deltas[pid]) > len(p.get("text") or ""):
|
| 165 |
+
p["text"] = deltas[pid]
|
| 166 |
+
parts.append(p)
|
| 167 |
+
return parts, errors
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
# ββ Rendering ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def _esc(s: Any) -> str:
|
| 174 |
+
return _html.escape("" if s is None else str(s))
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _cap(s: str, n: int = 6000) -> str:
|
| 178 |
+
if len(s) <= n:
|
| 179 |
+
return s
|
| 180 |
+
return s[:n] + f"\n⦠({len(s) - n} chars hidden)"
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _fmt_tool(name: str, state: dict, raw: dict) -> str:
|
| 184 |
+
status = (state or {}).get("status") or "?"
|
| 185 |
+
inp = (state or {}).get("input") or raw.get("input") or {}
|
| 186 |
+
out = (state or {}).get("output") or raw.get("output") or ""
|
| 187 |
+
badge = {"completed": "ok", "error": "err", "running": "run"}.get(status, "")
|
| 188 |
+
|
| 189 |
+
if name == "read":
|
| 190 |
+
summary = f"π read <code>{_esc(inp.get('filePath') or inp.get('path'))}</code>"
|
| 191 |
+
body = f"<pre>{_esc(_cap(str(out)))}</pre>"
|
| 192 |
+
elif name == "write":
|
| 193 |
+
path = inp.get("filePath") or inp.get("path")
|
| 194 |
+
content = inp.get("content") or ""
|
| 195 |
+
summary = f"βοΈ write <code>{_esc(path)}</code> ({len(content)} chars)"
|
| 196 |
+
body = f"<pre>{_esc(_cap(content))}</pre>"
|
| 197 |
+
elif name == "edit":
|
| 198 |
+
path = inp.get("filePath") or inp.get("path")
|
| 199 |
+
old = inp.get("oldString") or ""
|
| 200 |
+
new = inp.get("newString") or ""
|
| 201 |
+
summary = f"βοΈ edit <code>{_esc(path)}</code>"
|
| 202 |
+
body = (
|
| 203 |
+
f"<div class='lbl'>- old</div><pre class='del'>{_esc(_cap(old, 3000))}</pre>"
|
| 204 |
+
f"<div class='lbl'>+ new</div><pre class='add'>{_esc(_cap(new, 3000))}</pre>"
|
| 205 |
+
)
|
| 206 |
+
if out:
|
| 207 |
+
body += f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 2000))}</pre>"
|
| 208 |
+
elif name == "bash":
|
| 209 |
+
cmd = inp.get("command") or inp.get("cmd") or ""
|
| 210 |
+
summary = f"β‘ bash <code>{_esc(cmd[:160])}</code>"
|
| 211 |
+
body = f"<pre>{_esc(_cap(str(out)))}</pre>"
|
| 212 |
+
elif name in ("glob", "find"):
|
| 213 |
+
pattern = inp.get("pattern") or inp.get("query") or ""
|
| 214 |
+
summary = f"π {name} <code>{_esc(pattern)}</code>"
|
| 215 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 216 |
+
elif name == "grep":
|
| 217 |
+
pattern = inp.get("pattern") or ""
|
| 218 |
+
path = inp.get("path") or ""
|
| 219 |
+
summary = f"π grep <code>{_esc(pattern)}</code>" + (
|
| 220 |
+
f" in <code>{_esc(path)}</code>" if path else ""
|
| 221 |
+
)
|
| 222 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 223 |
+
elif name == "todowrite":
|
| 224 |
+
todos = inp.get("todos") or []
|
| 225 |
+
summary = f"π todowrite ({len(todos)} items)"
|
| 226 |
+
body = "<ul>" + "".join(
|
| 227 |
+
f"<li>{_todo_icon(t.get('status'))} {_esc(t.get('content'))}</li>"
|
| 228 |
+
for t in todos
|
| 229 |
+
) + "</ul>"
|
| 230 |
+
elif name == "task":
|
| 231 |
+
desc = inp.get("description") or inp.get("prompt") or ""
|
| 232 |
+
summary = f"π§© task β {_esc(desc[:160])}"
|
| 233 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 234 |
+
elif name == "webfetch":
|
| 235 |
+
summary = f"π webfetch <code>{_esc(inp.get('url'))}</code>"
|
| 236 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 237 |
+
else:
|
| 238 |
+
summary = f"π§ {_esc(name)}"
|
| 239 |
+
body = (
|
| 240 |
+
f"<div class='lbl'>input</div><pre>{_esc(_cap(json.dumps(inp, indent=2, default=str), 4000))}</pre>"
|
| 241 |
+
f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 242 |
+
)
|
| 243 |
+
return (
|
| 244 |
+
"<details class='tool' open>"
|
| 245 |
+
f"<summary>{summary} <span class='badge {badge}'>{_esc(status)}</span></summary>"
|
| 246 |
+
f"<div class='tbody'>{body}</div>"
|
| 247 |
+
"</details>"
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def _todo_icon(status: str | None) -> str:
|
| 252 |
+
return {"completed": "β
", "in_progress": "π"}.get(status or "", "β³")
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def _render_transcript(parts: list[dict], errors: list[str]) -> str:
|
| 256 |
+
out: list[str] = []
|
| 257 |
+
if errors:
|
| 258 |
+
out.append(
|
| 259 |
+
"<div class='errbox'><b>β οΈ errors</b><ul>"
|
| 260 |
+
+ "".join(f"<li>{_esc(e)}</li>" for e in errors[:8])
|
| 261 |
+
+ "</ul></div>"
|
| 262 |
+
)
|
| 263 |
+
if not parts:
|
| 264 |
+
out.append("<div class='empty'>waiting for first partβ¦</div>")
|
| 265 |
+
return "".join(out)
|
| 266 |
+
out.append("<div class='chat'>")
|
| 267 |
+
for p in parts:
|
| 268 |
+
t = p.get("type")
|
| 269 |
+
if t == "step-start":
|
| 270 |
+
out.append("<div class='step'>ββ new step ββ</div>")
|
| 271 |
+
elif t == "reasoning":
|
| 272 |
+
txt = (p.get("text") or "").strip()
|
| 273 |
+
if txt:
|
| 274 |
+
out.append(
|
| 275 |
+
"<details class='reasoning'><summary>π§ reasoning</summary>"
|
| 276 |
+
f"<pre>{_esc(_cap(txt, 4000))}</pre></details>"
|
| 277 |
+
)
|
| 278 |
+
elif t == "text":
|
| 279 |
+
txt = (p.get("text") or "").strip()
|
| 280 |
+
if txt:
|
| 281 |
+
out.append(f"<div class='assistant'><pre>{_esc(txt)}</pre></div>")
|
| 282 |
+
elif t == "tool":
|
| 283 |
+
out.append(_fmt_tool(p.get("tool") or "?", p.get("state") or {}, p))
|
| 284 |
+
elif t == "step-finish":
|
| 285 |
+
tokens = p.get("tokens") or (p.get("state") or {}).get("tokens") or {}
|
| 286 |
+
if tokens:
|
| 287 |
+
out.append(f"<div class='stepfin'>tokens: {_esc(json.dumps(tokens, default=str))}</div>")
|
| 288 |
+
out.append("</div>")
|
| 289 |
+
return "".join(out)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _render_todo(todos: list[dict]) -> str:
|
| 293 |
+
if not todos:
|
| 294 |
+
return ""
|
| 295 |
+
items = "".join(
|
| 296 |
+
f"<li>{_todo_icon(t.get('status'))} {_esc(t.get('content') or t.get('text',''))}</li>"
|
| 297 |
+
for t in todos
|
| 298 |
+
)
|
| 299 |
+
return f"<div class='todostrip'><b>plan</b><ul>{items}</ul></div>"
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def _render_diff(diffs: list[dict]) -> str:
|
| 303 |
+
if not diffs:
|
| 304 |
+
return ""
|
| 305 |
+
blocks = []
|
| 306 |
+
for d in diffs:
|
| 307 |
+
path = d.get("path") or d.get("file") or "?"
|
| 308 |
+
patch = d.get("patch") or d.get("diff") or ""
|
| 309 |
+
blocks.append(
|
| 310 |
+
f"<details class='diff'><summary>{_esc(path)}</summary>"
|
| 311 |
+
f"<pre>{_esc(_cap(patch, 6000))}</pre></details>"
|
| 312 |
+
)
|
| 313 |
+
return (
|
| 314 |
+
"<details class='diff-wrap' open>"
|
| 315 |
+
f"<summary>π session diff ({len(diffs)} files)</summary>"
|
| 316 |
+
f"{''.join(blocks)}</details>"
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
# ββ State ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
class _State:
|
| 324 |
+
sid: str = "" # empty β next Run creates a new session
|
| 325 |
+
stop: threading.Event | None = None
|
| 326 |
+
events: list[dict] = [] # reset per session
|
| 327 |
+
sse_thread: threading.Thread | None = None
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
_STATE = _State()
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def _ensure_session() -> str:
|
| 334 |
+
"""Create a session if none exists; reuse across runs for multi-turn."""
|
| 335 |
+
if _STATE.sid:
|
| 336 |
+
return _STATE.sid
|
| 337 |
+
_STATE.sid = _create_session()
|
| 338 |
+
_STATE.stop = threading.Event()
|
| 339 |
+
_STATE.events = []
|
| 340 |
+
_STATE.sse_thread = threading.Thread(
|
| 341 |
+
target=_stream, args=(_STATE.sid, _STATE.events, _STATE.stop), daemon=True
|
| 342 |
+
)
|
| 343 |
+
_STATE.sse_thread.start()
|
| 344 |
+
time.sleep(0.15)
|
| 345 |
+
return _STATE.sid
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
def _new_session_cb() -> tuple[str, str, str, str]:
|
| 349 |
+
"""Tear down any existing SSE and clear state. Next Run opens a fresh session."""
|
| 350 |
+
if _STATE.stop:
|
| 351 |
+
_STATE.stop.set()
|
| 352 |
+
if _STATE.sid:
|
| 353 |
+
_abort(_STATE.sid)
|
| 354 |
+
_STATE.sid = ""
|
| 355 |
+
_STATE.stop = None
|
| 356 |
+
_STATE.events = []
|
| 357 |
+
return (
|
| 358 |
+
"β¨ new session β Run to start",
|
| 359 |
+
"", # transcript
|
| 360 |
+
"", # todo
|
| 361 |
+
"", # diff
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
# ββ Main βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def run(prompt: str) -> Generator[tuple[str, str, str, str], None, None]:
|
| 369 |
+
try:
|
| 370 |
+
sid = _ensure_session()
|
| 371 |
+
except Exception as exc:
|
| 372 |
+
yield f"β session create failed: {exc}", "", "", ""
|
| 373 |
+
return
|
| 374 |
+
|
| 375 |
+
# Snapshot the event index BEFORE firing β "idle for THIS turn" must be
|
| 376 |
+
# scoped to events that arrive after the prompt is sent, otherwise the
|
| 377 |
+
# idle frame from the previous turn fires the break immediately.
|
| 378 |
+
turn_start = len(_STATE.events)
|
| 379 |
+
|
| 380 |
+
try:
|
| 381 |
+
_fire_async(sid, prompt)
|
| 382 |
+
except Exception as exc:
|
| 383 |
+
yield f"β prompt failed: {exc}", "", "", ""
|
| 384 |
+
return
|
| 385 |
+
|
| 386 |
+
t0 = time.time()
|
| 387 |
+
last_todo_refresh = 0.0
|
| 388 |
+
todos: list[dict] = []
|
| 389 |
+
|
| 390 |
+
while time.time() - t0 < 600:
|
| 391 |
+
new_events = _STATE.events[turn_start:]
|
| 392 |
+
idle = any(e.get("type") in ("session.idle", "idle") for e in new_events)
|
| 393 |
+
parts, errors = _assemble(_STATE.events)
|
| 394 |
+
|
| 395 |
+
if time.time() - last_todo_refresh > 3.0:
|
| 396 |
+
todos = _session_todo(sid)
|
| 397 |
+
last_todo_refresh = time.time()
|
| 398 |
+
|
| 399 |
+
status = (
|
| 400 |
+
f"{'β
idle' if idle else 'β‘ running'} Β· "
|
| 401 |
+
f"session <code>{sid[:18]}β¦</code> Β· "
|
| 402 |
+
f"{time.time()-t0:.1f}s Β· {len(parts)} parts Β· {len(_STATE.events)} events"
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
diff_html = ""
|
| 406 |
+
if idle:
|
| 407 |
+
diff_html = _render_diff(_session_diff(sid))
|
| 408 |
+
|
| 409 |
+
yield status, _render_transcript(parts, errors), _render_todo(todos), diff_html
|
| 410 |
+
|
| 411 |
+
if idle:
|
| 412 |
+
break
|
| 413 |
+
time.sleep(0.4)
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
def abort_cb() -> str:
|
| 417 |
+
if _STATE.sid:
|
| 418 |
+
_abort(_STATE.sid)
|
| 419 |
+
# leave SSE open so user sees the abort-related events; actual teardown on new session
|
| 420 |
+
return "βΉ aborted (session kept β click New session to clear)"
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
def refresh_banner() -> str:
|
| 424 |
+
return _banner()
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
# ββ CSS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 428 |
+
|
| 429 |
+
_CSS = """
|
| 430 |
+
.banner { margin:4px 0 2px; }
|
| 431 |
+
.tools { font-size:11px; color:#888; margin:2px 0 8px; }
|
| 432 |
+
.chip { display:inline-block; padding:2px 8px; margin:2px; border-radius:10px;
|
| 433 |
+
background:#2b2d31; color:#ddd; font-size:12px; }
|
| 434 |
+
.chip.ok { background:#1f6f43; }
|
| 435 |
+
.chip.err { background:#7a1e1e; }
|
| 436 |
+
.chip code { background:transparent; color:#9ad; }
|
| 437 |
+
.errbox { background:#2a1414; border:1px solid #7a1e1e; border-radius:6px;
|
| 438 |
+
padding:6px 10px; margin:6px 0; color:#f88; font-size:13px; }
|
| 439 |
+
.errbox ul { margin:2px 0 0 18px; }
|
| 440 |
+
.chat { font-size:14px; }
|
| 441 |
+
.assistant pre { background:#0e1013; padding:10px; border-radius:8px;
|
| 442 |
+
white-space:pre-wrap; color:#eee; margin:6px 0; }
|
| 443 |
+
.reasoning { opacity:0.8; margin:4px 0; }
|
| 444 |
+
.reasoning pre { background:#0a0b0d; color:#aab; padding:8px; white-space:pre-wrap; }
|
| 445 |
+
.tool { border:1px solid #2a2f3a; border-radius:8px; padding:6px 10px;
|
| 446 |
+
margin:6px 0; background:#12161c; }
|
| 447 |
+
.tool summary { cursor:pointer; color:#ddd; }
|
| 448 |
+
.tool code { background:#222; color:#9cf; padding:1px 4px; border-radius:3px; }
|
| 449 |
+
.tbody { margin-top:6px; }
|
| 450 |
+
.tbody pre { background:#0a0b0d; padding:8px; border-radius:4px;
|
| 451 |
+
white-space:pre-wrap; max-height:400px; overflow:auto;
|
| 452 |
+
font-size:12px; color:#ddd; margin:2px 0; }
|
| 453 |
+
.tbody pre.add { border-left:3px solid #2e6; }
|
| 454 |
+
.tbody pre.del { border-left:3px solid #e53; }
|
| 455 |
+
.tbody .lbl { color:#888; font-size:11px; margin-top:6px; }
|
| 456 |
+
.badge { padding:1px 6px; border-radius:8px; font-size:11px;
|
| 457 |
+
background:#333; color:#ddd; }
|
| 458 |
+
.badge.ok { background:#1f6f43; color:white; }
|
| 459 |
+
.badge.err { background:#7a1e1e; color:white; }
|
| 460 |
+
.badge.run { background:#7a5c1e; color:white; }
|
| 461 |
+
.step { color:#555; text-align:center; margin:10px 0; font-size:11px; }
|
| 462 |
+
.stepfin { color:#666; font-size:11px; margin:4px 0 12px; }
|
| 463 |
+
.empty { color:#666; font-style:italic; padding:12px; }
|
| 464 |
+
.todostrip { background:#14181e; border:1px solid #2a2f3a; border-radius:6px;
|
| 465 |
+
padding:6px 10px; margin:6px 0; font-size:13px; }
|
| 466 |
+
.todostrip ul { margin:4px 0 0 18px; }
|
| 467 |
+
.diff-wrap { margin:8px 0; }
|
| 468 |
+
.diff summary { cursor:pointer; color:#9ad; font-family:monospace; }
|
| 469 |
+
.diff pre { background:#0a0b0d; padding:8px; border-radius:4px;
|
| 470 |
+
white-space:pre; font-size:12px; color:#ddd; overflow:auto; }
|
| 471 |
+
"""
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
# ββ Layout βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
with gr.Blocks(title="opencode serve", css=_CSS) as demo:
|
| 478 |
+
banner_html = gr.HTML(value="_(loadingβ¦)_")
|
| 479 |
+
status_md = gr.Markdown()
|
| 480 |
+
todo_html = gr.HTML()
|
| 481 |
+
transcript_html = gr.HTML(value="<div class='empty'>run a prompt to start</div>")
|
| 482 |
+
diff_html = gr.HTML()
|
| 483 |
+
|
| 484 |
+
with gr.Row():
|
| 485 |
+
prompt = gr.Textbox(
|
| 486 |
+
label="Prompt",
|
| 487 |
+
value="Write fizzbuzz.py that prints FizzBuzz for 1..15 and run it.",
|
| 488 |
+
lines=3,
|
| 489 |
+
scale=5,
|
| 490 |
+
)
|
| 491 |
+
run_btn = gr.Button("βΆ Run", variant="primary", scale=1)
|
| 492 |
+
with gr.Column(scale=1, min_width=120):
|
| 493 |
+
abort_btn = gr.Button("βΉ Abort", variant="stop")
|
| 494 |
+
new_btn = gr.Button("β¨ New session")
|
| 495 |
+
|
| 496 |
+
run_btn.click(
|
| 497 |
+
run,
|
| 498 |
+
inputs=[prompt],
|
| 499 |
+
outputs=[status_md, transcript_html, todo_html, diff_html],
|
| 500 |
+
)
|
| 501 |
+
abort_btn.click(abort_cb, outputs=[status_md])
|
| 502 |
+
new_btn.click(
|
| 503 |
+
_new_session_cb,
|
| 504 |
+
outputs=[status_md, transcript_html, todo_html, diff_html],
|
| 505 |
+
)
|
| 506 |
+
demo.load(refresh_banner, outputs=[banner_html])
|
| 507 |
+
|
| 508 |
+
|
| 509 |
+
if __name__ == "__main__":
|
| 510 |
+
import os
|
| 511 |
+
|
| 512 |
+
demo.queue().launch(
|
| 513 |
+
server_name="0.0.0.0",
|
| 514 |
+
server_port=int(os.environ.get("GRADIO_PORT", "7861")),
|
| 515 |
+
share=True,
|
| 516 |
+
show_error=True,
|
| 517 |
+
)
|
server/catalog.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Curated Qwen model catalog for the OpenCode OpenEnv server.
|
| 2 |
+
|
| 3 |
+
Lives in the server (not the primitive) because routing decisions β
|
| 4 |
+
which HF router backend to pick for a given Qwen repo, what counts as
|
| 5 |
+
the "default" model, whether a model supports thinking β are
|
| 6 |
+
deployment concerns, not harness concerns. The primitive remains
|
| 7 |
+
provider-agnostic; this catalog is what the Gradio UI and the MCP
|
| 8 |
+
tools consult to turn a UI selection into a concrete
|
| 9 |
+
``(base_url, api_key, model_string, disable_thinking)`` quadruple.
|
| 10 |
+
|
| 11 |
+
Backends supported:
|
| 12 |
+
|
| 13 |
+
- ``vllm`` β user-supplied OpenAI-compatible endpoint (e.g. cloudflared
|
| 14 |
+
tunnel to ``vllm serve``, or a colocated vLLM server).
|
| 15 |
+
- ``hf_router`` β Hugging Face Inference Providers router at
|
| 16 |
+
``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``.
|
| 17 |
+
Model id carries a ``:provider`` suffix to pick the HF
|
| 18 |
+
backend (``:together``, ``:scaleway``, ``:nscale``, ...).
|
| 19 |
+
|
| 20 |
+
Only HF providers verified to return ``logprobs`` are listed (see
|
| 21 |
+
``DOCS/HF/hf_inference_providers_logprobs.md``).
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
from typing import Literal
|
| 27 |
+
|
| 28 |
+
from pydantic import BaseModel
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
BackendKind = Literal["vllm", "hf_router"]
|
| 32 |
+
|
| 33 |
+
HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class CatalogModel(BaseModel):
|
| 37 |
+
"""One model entry in the curated Qwen catalog."""
|
| 38 |
+
|
| 39 |
+
#: Canonical HF-Hub repo id (no ``:provider`` suffix).
|
| 40 |
+
repo: str
|
| 41 |
+
#: Backend kind β drives routing + auth shape.
|
| 42 |
+
backend: BackendKind
|
| 43 |
+
#: For ``hf_router`` entries, the ``:<provider>`` suffix HF uses to
|
| 44 |
+
#: force a specific backend inference provider. Empty for ``vllm``.
|
| 45 |
+
hf_route: str = ""
|
| 46 |
+
#: Whether this model supports Qwen-style thinking mode.
|
| 47 |
+
supports_thinking: bool = False
|
| 48 |
+
#: Short human-readable label for UI dropdowns.
|
| 49 |
+
label: str = ""
|
| 50 |
+
|
| 51 |
+
@property
|
| 52 |
+
def dropdown_key(self) -> str:
|
| 53 |
+
"""Stable unique key for UI selectors."""
|
| 54 |
+
if self.backend == "hf_router":
|
| 55 |
+
return f"hf-router://{self.repo}{self.hf_route}"
|
| 56 |
+
return f"vllm://{self.repo}"
|
| 57 |
+
|
| 58 |
+
@property
|
| 59 |
+
def opencode_model_string(self) -> str:
|
| 60 |
+
"""Model id opencode should send to the endpoint.
|
| 61 |
+
|
| 62 |
+
For HF router we bake the ``:provider`` suffix into the model
|
| 63 |
+
string so the HF router picks the right backend.
|
| 64 |
+
"""
|
| 65 |
+
if self.backend == "hf_router":
|
| 66 |
+
return f"{self.repo}{self.hf_route}"
|
| 67 |
+
return self.repo
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# Ordered: self-hosted vLLM first (default), then HF router options.
|
| 71 |
+
CATALOG: list[CatalogModel] = [
|
| 72 |
+
# --- Local vLLM (tunneled or colocated) ---
|
| 73 |
+
CatalogModel(
|
| 74 |
+
repo="Qwen/Qwen3.5-4B",
|
| 75 |
+
backend="vllm",
|
| 76 |
+
supports_thinking=True,
|
| 77 |
+
label="Qwen3.5-4B (self-hosted vLLM)",
|
| 78 |
+
),
|
| 79 |
+
# --- HF Inference Router (Together / Scaleway / Nscale) ---
|
| 80 |
+
CatalogModel(
|
| 81 |
+
repo="Qwen/Qwen3.5-397B-A17B",
|
| 82 |
+
backend="hf_router",
|
| 83 |
+
hf_route=":together",
|
| 84 |
+
supports_thinking=True,
|
| 85 |
+
label="Qwen3.5-397B-A17B β HF/Together",
|
| 86 |
+
),
|
| 87 |
+
CatalogModel(
|
| 88 |
+
repo="Qwen/Qwen3.5-397B-A17B",
|
| 89 |
+
backend="hf_router",
|
| 90 |
+
hf_route=":scaleway",
|
| 91 |
+
supports_thinking=True,
|
| 92 |
+
label="Qwen3.5-397B-A17B β HF/Scaleway",
|
| 93 |
+
),
|
| 94 |
+
CatalogModel(
|
| 95 |
+
repo="Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
| 96 |
+
backend="hf_router",
|
| 97 |
+
hf_route=":together",
|
| 98 |
+
supports_thinking=False,
|
| 99 |
+
label="Qwen3-Coder-480B β HF/Together",
|
| 100 |
+
),
|
| 101 |
+
CatalogModel(
|
| 102 |
+
repo="Qwen/Qwen3-235B-A22B-Instruct-2507",
|
| 103 |
+
backend="hf_router",
|
| 104 |
+
hf_route=":nscale",
|
| 105 |
+
supports_thinking=False,
|
| 106 |
+
label="Qwen3-235B-A22B-2507 β HF/Nscale",
|
| 107 |
+
),
|
| 108 |
+
CatalogModel(
|
| 109 |
+
repo="Qwen/Qwen3-4B-Instruct-2507",
|
| 110 |
+
backend="hf_router",
|
| 111 |
+
hf_route=":nscale",
|
| 112 |
+
supports_thinking=False,
|
| 113 |
+
label="Qwen3-4B-Instruct-2507 β HF/Nscale",
|
| 114 |
+
),
|
| 115 |
+
CatalogModel(
|
| 116 |
+
repo="Qwen/Qwen3-Coder-30B-A3B-Instruct",
|
| 117 |
+
backend="hf_router",
|
| 118 |
+
hf_route=":scaleway",
|
| 119 |
+
supports_thinking=False,
|
| 120 |
+
label="Qwen3-Coder-30B-A3B β HF/Scaleway",
|
| 121 |
+
),
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def by_key(key: str) -> CatalogModel:
|
| 126 |
+
"""Look up a catalog entry by ``dropdown_key``.
|
| 127 |
+
|
| 128 |
+
Falls back to synthesising an ad-hoc entry from the key's prefix so
|
| 129 |
+
users can enter a custom vLLM model id or a custom HF-router model
|
| 130 |
+
id without editing the catalog:
|
| 131 |
+
|
| 132 |
+
- ``"vllm://<repo>"`` β ad-hoc vllm entry with ``repo`` as the model id.
|
| 133 |
+
- ``"hf-router://<repo>[:<provider>]"`` β ad-hoc hf_router entry; the
|
| 134 |
+
provider suffix (if present) is preserved verbatim in ``hf_route``.
|
| 135 |
+
"""
|
| 136 |
+
for m in CATALOG:
|
| 137 |
+
if m.dropdown_key == key:
|
| 138 |
+
return m
|
| 139 |
+
if key.startswith("vllm://"):
|
| 140 |
+
repo = key[len("vllm://"):].strip()
|
| 141 |
+
if not repo:
|
| 142 |
+
raise KeyError(f"missing model id in key: {key!r}")
|
| 143 |
+
return CatalogModel(
|
| 144 |
+
repo=repo, backend="vllm", supports_thinking=False,
|
| 145 |
+
label=f"{repo} (custom vLLM)",
|
| 146 |
+
)
|
| 147 |
+
if key.startswith("hf-router://"):
|
| 148 |
+
rest = key[len("hf-router://"):].strip()
|
| 149 |
+
if not rest:
|
| 150 |
+
raise KeyError(f"missing model id in key: {key!r}")
|
| 151 |
+
if ":" in rest:
|
| 152 |
+
repo, _, suffix = rest.partition(":")
|
| 153 |
+
hf_route = ":" + suffix
|
| 154 |
+
else:
|
| 155 |
+
repo, hf_route = rest, ""
|
| 156 |
+
return CatalogModel(
|
| 157 |
+
repo=repo, backend="hf_router", hf_route=hf_route,
|
| 158 |
+
supports_thinking=False,
|
| 159 |
+
label=f"{repo}{hf_route} (custom HF Router)",
|
| 160 |
+
)
|
| 161 |
+
raise KeyError(f"unknown model key: {key!r}")
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def default_model() -> CatalogModel:
|
| 165 |
+
"""First entry (self-hosted vLLM 4B)."""
|
| 166 |
+
return CATALOG[0]
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def resolve_endpoint(
|
| 170 |
+
model_key: str,
|
| 171 |
+
*,
|
| 172 |
+
vllm_url: str = "",
|
| 173 |
+
hf_token: str = "",
|
| 174 |
+
) -> tuple[str, str, str, "CatalogModel"]:
|
| 175 |
+
"""Translate a UI selection into ``(base_url, api_key, model_string, entry)``.
|
| 176 |
+
|
| 177 |
+
Raises ``ValueError`` with a clear message when a required secret is
|
| 178 |
+
missing so the UI can render a precise "please fill in X" message.
|
| 179 |
+
"""
|
| 180 |
+
m = by_key(model_key)
|
| 181 |
+
if m.backend == "vllm":
|
| 182 |
+
vllm_url = (vllm_url or "").strip()
|
| 183 |
+
if not vllm_url:
|
| 184 |
+
raise ValueError(
|
| 185 |
+
f"model {m.dropdown_key!r} requires a vLLM base URL "
|
| 186 |
+
"(the tunneled or in-cluster /v1 endpoint)."
|
| 187 |
+
)
|
| 188 |
+
base = vllm_url.rstrip("/")
|
| 189 |
+
if not base.endswith("/v1"):
|
| 190 |
+
base = base + "/v1"
|
| 191 |
+
return base, "anything", m.opencode_model_string, m
|
| 192 |
+
if m.backend == "hf_router":
|
| 193 |
+
hf_token = (hf_token or "").strip()
|
| 194 |
+
if not hf_token:
|
| 195 |
+
raise ValueError(
|
| 196 |
+
f"model {m.dropdown_key!r} requires an HF token "
|
| 197 |
+
"(hf_... from https://huggingface.co/settings/tokens)."
|
| 198 |
+
)
|
| 199 |
+
return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m
|
| 200 |
+
raise ValueError(f"unknown backend: {m.backend}")
|
server/gradio_ui.py
CHANGED
|
@@ -15,11 +15,27 @@ ticker instead of a frozen page.
|
|
| 15 |
from __future__ import annotations
|
| 16 |
|
| 17 |
import json
|
|
|
|
| 18 |
import time
|
| 19 |
from typing import Any
|
| 20 |
|
| 21 |
import gradio as gr
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ββ Preset tasks ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
# Shown in the dropdown. Each has instruction + matching bash verifier.
|
|
@@ -141,12 +157,20 @@ PRESET_TASKS: dict[str, tuple[str, str]] = {
|
|
| 141 |
}
|
| 142 |
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
"Qwen/Qwen3-Coder-Next",
|
| 147 |
-
"openai/gpt-4o-mini",
|
| 148 |
-
"openai/gpt-5.3-chat-latest",
|
| 149 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
|
| 152 |
def opencode_ui_builder(
|
|
@@ -174,7 +198,7 @@ def opencode_ui_builder(
|
|
| 174 |
_env_cache["instance"] = inst
|
| 175 |
return inst
|
| 176 |
|
| 177 |
-
with gr.Blocks(title=title, analytics_enabled=False) as demo:
|
| 178 |
gr.Markdown(
|
| 179 |
f"# {title}\n"
|
| 180 |
"Run one OpenCode rollout against any OpenAI-compatible endpoint. "
|
|
@@ -184,27 +208,56 @@ def opencode_ui_builder(
|
|
| 184 |
)
|
| 185 |
|
| 186 |
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
with gr.Row():
|
| 188 |
-
with gr.Column(scale=
|
| 189 |
-
|
| 190 |
-
label="
|
| 191 |
-
|
| 192 |
-
|
| 193 |
)
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
)
|
| 204 |
-
|
| 205 |
-
label="
|
| 206 |
-
value=
|
| 207 |
-
type="password",
|
| 208 |
)
|
| 209 |
with gr.Column(scale=1):
|
| 210 |
mode = gr.Dropdown(
|
|
@@ -221,82 +274,132 @@ def opencode_ui_builder(
|
|
| 221 |
minimum=60, maximum=1200, value=300, step=30,
|
| 222 |
)
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
)
|
| 231 |
-
task_id = gr.Textbox(
|
| 232 |
-
label="Task id (optional label)",
|
| 233 |
-
value="hello_demo",
|
| 234 |
)
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
value=PRESET_TASKS["hello"][1],
|
| 244 |
-
language="shell",
|
| 245 |
)
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
)
|
| 251 |
|
| 252 |
-
#
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
)
|
| 262 |
-
|
| 263 |
with gr.Row():
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
-
# ββ Output panels βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
-
status = gr.Markdown()
|
| 270 |
with gr.Row():
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
workdir_md = gr.Markdown()
|
| 278 |
-
with gr.Accordion("Proxy trace (per turn)", open=False):
|
| 279 |
proxy_trace_json = gr.JSON(label=None)
|
| 280 |
-
with gr.Accordion("
|
| 281 |
-
verifier_out = gr.Textbox(label="
|
| 282 |
-
verifier_err = gr.Textbox(label="
|
| 283 |
with gr.Accordion("Raw result JSON", open=False):
|
| 284 |
raw_json = gr.JSON(label=None)
|
| 285 |
|
| 286 |
# ββ Streaming Run handler βββββββββββββββββββββββββββββββββββββββββ
|
| 287 |
def _run_streaming(
|
|
|
|
|
|
|
| 288 |
vllm_url_v: str,
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
| 292 |
mode_v: str,
|
| 293 |
max_tokens_cap_v: int,
|
| 294 |
agent_timeout_s_v: float,
|
| 295 |
task_id_v: str,
|
| 296 |
instruction_v: str,
|
| 297 |
-
test_script_v: str,
|
| 298 |
setup_shell_v: str,
|
| 299 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
"""Gradio generator: yields UI updates as the rollout progresses.
|
| 301 |
|
| 302 |
Uses the non-blocking fine-grained tools:
|
|
@@ -305,29 +408,52 @@ def opencode_ui_builder(
|
|
| 305 |
import httpx
|
| 306 |
from openenv.core.env_server.mcp_types import CallToolAction
|
| 307 |
|
| 308 |
-
# 0)
|
| 309 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
yield (
|
| 311 |
-
"π **
|
| 312 |
None, None, None, 0,
|
| 313 |
-
"", [], "", "", {"stage": "validate"},
|
|
|
|
|
|
|
| 314 |
)
|
| 315 |
-
|
| 316 |
-
if
|
| 317 |
-
|
| 318 |
try:
|
| 319 |
-
r = httpx.get(
|
|
|
|
|
|
|
| 320 |
if r.status_code != 200:
|
| 321 |
yield _error_tuple(
|
| 322 |
-
f"
|
|
|
|
| 323 |
)
|
| 324 |
return
|
| 325 |
except Exception as exc:
|
| 326 |
yield _error_tuple(
|
| 327 |
-
f"
|
| 328 |
)
|
| 329 |
return
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
try:
|
| 332 |
env = _get_env()
|
| 333 |
env.reset()
|
|
@@ -335,27 +461,23 @@ def opencode_ui_builder(
|
|
| 335 |
yield _error_tuple(f"env init failed: {type(exc).__name__}: {exc}")
|
| 336 |
return
|
| 337 |
|
| 338 |
-
#
|
|
|
|
| 339 |
try:
|
| 340 |
start_obs = env.step(
|
| 341 |
CallToolAction(
|
| 342 |
tool_name="start_rollout",
|
| 343 |
arguments={
|
|
|
|
| 344 |
"vllm_url": vllm_url_v,
|
| 345 |
-
"
|
|
|
|
| 346 |
"instruction": instruction_v,
|
| 347 |
"test_script": test_script_v,
|
| 348 |
"task_id": task_id_v,
|
| 349 |
"setup_shell": setup_shell_v,
|
| 350 |
"upload_files": {},
|
| 351 |
-
"provider": provider_v,
|
| 352 |
-
"api_key": api_key_v,
|
| 353 |
"mode": mode_v,
|
| 354 |
-
# chat_template_kwargs.enable_thinking=false is a
|
| 355 |
-
# harmless no-op for non-Qwen models (vLLM silently
|
| 356 |
-
# ignores unknown template kwargs). Keep it on by
|
| 357 |
-
# default so Qwen3/Qwen3.5 don't dump think blocks.
|
| 358 |
-
"disable_thinking": True,
|
| 359 |
"max_tokens_cap": int(max_tokens_cap_v),
|
| 360 |
"agent_timeout_s": float(agent_timeout_s_v),
|
| 361 |
},
|
|
@@ -372,17 +494,21 @@ def opencode_ui_builder(
|
|
| 372 |
yield _error_tuple(f"start_rollout returned no rollout_id: {start_payload}")
|
| 373 |
return
|
| 374 |
|
| 375 |
-
# Initial UI update
|
|
|
|
| 376 |
yield (
|
| 377 |
-
f"
|
| 378 |
None, None, None, 0,
|
| 379 |
"_(no files yet)_", [], "", "", start_payload,
|
|
|
|
|
|
|
| 380 |
)
|
| 381 |
|
| 382 |
-
# 2)
|
|
|
|
| 383 |
deadline = time.time() + float(agent_timeout_s_v) + 120
|
|
|
|
| 384 |
status_str = "running"
|
| 385 |
-
state_payload: dict[str, Any] = {}
|
| 386 |
while time.time() < deadline:
|
| 387 |
try:
|
| 388 |
state_obs = env.step(
|
|
@@ -390,27 +516,59 @@ def opencode_ui_builder(
|
|
| 390 |
tool_name="get_state",
|
| 391 |
arguments={"rollout_id": rollout_id},
|
| 392 |
),
|
| 393 |
-
timeout_s=
|
| 394 |
)
|
| 395 |
state_payload = _parse_result(state_obs)
|
| 396 |
except Exception as exc:
|
| 397 |
state_payload = {"error": f"{type(exc).__name__}: {exc}"}
|
| 398 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
status_str = state_payload.get("status", "?")
|
| 400 |
-
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
yield (
|
| 404 |
-
f"
|
| 405 |
-
|
| 406 |
-
None, None, None, turns_so_far,
|
| 407 |
"_(workdir populated on finalize)_",
|
| 408 |
[], "", "", state_payload,
|
|
|
|
|
|
|
| 409 |
)
|
| 410 |
|
| 411 |
if status_str == "done":
|
| 412 |
break
|
| 413 |
-
time.sleep(
|
| 414 |
|
| 415 |
# 3) finalize_rollout β run verifier + collect full result
|
| 416 |
try:
|
|
@@ -429,6 +587,38 @@ def opencode_ui_builder(
|
|
| 429 |
status_md = _summarize_status(result)
|
| 430 |
wd_md = _render_workdir(result.get("workdir_files") or {})
|
| 431 |
turns = result.get("proxy_turns") or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
yield (
|
| 433 |
status_md,
|
| 434 |
result.get("reward"),
|
|
@@ -437,69 +627,156 @@ def opencode_ui_builder(
|
|
| 437 |
len(turns),
|
| 438 |
wd_md,
|
| 439 |
turns,
|
| 440 |
-
|
| 441 |
-
|
| 442 |
result,
|
|
|
|
|
|
|
| 443 |
)
|
| 444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
run_btn.click(
|
| 446 |
_run_streaming,
|
| 447 |
inputs=[
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
| 449 |
max_tokens_cap, agent_timeout_s,
|
| 450 |
-
task_id, instruction,
|
| 451 |
-
],
|
| 452 |
-
outputs=[
|
| 453 |
-
status, reward_out, wall_out, exit_out, turns_out,
|
| 454 |
-
workdir_md, proxy_trace_json,
|
| 455 |
-
verifier_out, verifier_err, raw_json,
|
| 456 |
],
|
|
|
|
| 457 |
)
|
| 458 |
|
| 459 |
-
# Check-
|
| 460 |
-
#
|
| 461 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
import httpx
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
try:
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
except Exception as exc:
|
| 470 |
return f"β `{models_url}` unreachable: `{type(exc).__name__}: {exc}`"
|
| 471 |
if r.status_code != 200:
|
| 472 |
return f"β `{models_url}` β HTTP {r.status_code}\n```\n{r.text[:400]}\n```"
|
| 473 |
try:
|
| 474 |
-
|
| 475 |
-
ids = [m.get("id") for m in body.get("data", []) if m.get("id")]
|
| 476 |
except Exception:
|
| 477 |
ids = []
|
|
|
|
| 478 |
if ids:
|
| 479 |
-
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
|
| 484 |
-
# Reset handler
|
| 485 |
-
#
|
| 486 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
_env_cache["instance"] = None
|
| 488 |
return (
|
| 489 |
-
"π **
|
| 490 |
None, None, None, None,
|
| 491 |
"_(workdir cleared)_",
|
| 492 |
[], "", "", {"reset": True},
|
|
|
|
|
|
|
| 493 |
)
|
| 494 |
|
| 495 |
reset_btn.click(
|
| 496 |
_reset,
|
| 497 |
-
inputs=[],
|
| 498 |
-
outputs=
|
| 499 |
-
status, reward_out, wall_out, exit_out, turns_out,
|
| 500 |
-
workdir_md, proxy_trace_json,
|
| 501 |
-
verifier_out, verifier_err, raw_json,
|
| 502 |
-
],
|
| 503 |
)
|
| 504 |
|
| 505 |
return demo
|
|
@@ -508,11 +785,37 @@ def opencode_ui_builder(
|
|
| 508 |
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 509 |
|
| 510 |
|
| 511 |
-
def _error_tuple(msg: str) -> tuple:
|
| 512 |
return (
|
| 513 |
f"β **Error:** `{msg}`",
|
| 514 |
None, None, None, None,
|
| 515 |
"", [], "", "", {"error": msg},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
)
|
| 517 |
|
| 518 |
|
|
|
|
| 15 |
from __future__ import annotations
|
| 16 |
|
| 17 |
import json
|
| 18 |
+
import os
|
| 19 |
import time
|
| 20 |
from typing import Any
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
|
| 24 |
+
try:
|
| 25 |
+
from .catalog import CATALOG, by_key, default_model, resolve_endpoint
|
| 26 |
+
from .transcript import (
|
| 27 |
+
TRANSCRIPT_CSS,
|
| 28 |
+
collect_parts_from_messages,
|
| 29 |
+
render_transcript,
|
| 30 |
+
)
|
| 31 |
+
except ImportError: # pragma: no cover β support running as a script
|
| 32 |
+
from catalog import CATALOG, by_key, default_model, resolve_endpoint # type: ignore
|
| 33 |
+
from transcript import ( # type: ignore
|
| 34 |
+
TRANSCRIPT_CSS,
|
| 35 |
+
collect_parts_from_messages,
|
| 36 |
+
render_transcript,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
|
| 40 |
# ββ Preset tasks ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
# Shown in the dropdown. Each has instruction + matching bash verifier.
|
|
|
|
| 157 |
}
|
| 158 |
|
| 159 |
|
| 160 |
+
_HF_MODEL_CHOICES = [
|
| 161 |
+
(m.label, m.dropdown_key) for m in CATALOG if m.backend == "hf_router"
|
|
|
|
|
|
|
|
|
|
| 162 |
]
|
| 163 |
+
# Sentinel value used for the "type your own HF-router id" dropdown option.
|
| 164 |
+
_CUSTOM_HF_KEY = "__custom_hf__"
|
| 165 |
+
_HF_MODEL_CHOICES.append(("Custom β enter HF Router model id below", _CUSTOM_HF_KEY))
|
| 166 |
+
|
| 167 |
+
_DEFAULT_HF_KEY = _HF_MODEL_CHOICES[0][1]
|
| 168 |
+
_HF_TOKEN_ENV = os.environ.get("HF_TOKEN", "")
|
| 169 |
+
|
| 170 |
+
# Suggested / recent vllm model ids (user can type anything).
|
| 171 |
+
_VLLM_MODEL_SUGGESTIONS = [
|
| 172 |
+
m.repo for m in CATALOG if m.backend == "vllm"
|
| 173 |
+
] + ["Qwen/Qwen3.5-4B", "Qwen/Qwen2.5-7B-Instruct"]
|
| 174 |
|
| 175 |
|
| 176 |
def opencode_ui_builder(
|
|
|
|
| 198 |
_env_cache["instance"] = inst
|
| 199 |
return inst
|
| 200 |
|
| 201 |
+
with gr.Blocks(title=title, analytics_enabled=False, css=TRANSCRIPT_CSS) as demo:
|
| 202 |
gr.Markdown(
|
| 203 |
f"# {title}\n"
|
| 204 |
"Run one OpenCode rollout against any OpenAI-compatible endpoint. "
|
|
|
|
| 208 |
)
|
| 209 |
|
| 210 |
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 211 |
+
# Two backends:
|
| 212 |
+
# 1. Self-hosted vLLM β user supplies model id + base URL.
|
| 213 |
+
# 2. Hosted (HF Router) β user picks from the curated Qwen
|
| 214 |
+
# catalog, or selects "Custom" and types their own HF-router
|
| 215 |
+
# model id (e.g. ``Qwen/Qwen3-8B:together``).
|
| 216 |
with gr.Row():
|
| 217 |
+
with gr.Column(scale=3):
|
| 218 |
+
backend_mode = gr.Radio(
|
| 219 |
+
label="Backend",
|
| 220 |
+
choices=["Self-hosted vLLM", "Hosted (HF Router)"],
|
| 221 |
+
value="Hosted (HF Router)",
|
| 222 |
)
|
| 223 |
+
# --- Self-hosted vLLM fields (shown only when selected) ---
|
| 224 |
+
with gr.Row(visible=False) as vllm_row:
|
| 225 |
+
vllm_model = gr.Textbox(
|
| 226 |
+
label="Model id (as served by your vLLM)",
|
| 227 |
+
value=_VLLM_MODEL_SUGGESTIONS[0],
|
| 228 |
+
placeholder="Qwen/Qwen3.5-4B",
|
| 229 |
+
scale=1,
|
| 230 |
+
)
|
| 231 |
+
vllm_url = gr.Textbox(
|
| 232 |
+
label="vLLM base URL",
|
| 233 |
+
value="",
|
| 234 |
+
placeholder="https://.../v1",
|
| 235 |
+
scale=2,
|
| 236 |
+
)
|
| 237 |
+
# --- Hosted HF Router fields (default visible) ---
|
| 238 |
+
with gr.Row(visible=True) as hf_row:
|
| 239 |
+
hosted_model = gr.Dropdown(
|
| 240 |
+
label="Hosted model",
|
| 241 |
+
choices=_HF_MODEL_CHOICES,
|
| 242 |
+
value=_DEFAULT_HF_KEY,
|
| 243 |
+
scale=2,
|
| 244 |
+
)
|
| 245 |
+
hf_token = gr.Textbox(
|
| 246 |
+
label="HF token",
|
| 247 |
+
value=_HF_TOKEN_ENV,
|
| 248 |
+
type="password",
|
| 249 |
+
placeholder="hf_...",
|
| 250 |
+
scale=2,
|
| 251 |
+
)
|
| 252 |
+
hosted_custom_id = gr.Textbox(
|
| 253 |
+
label="Custom HF-router model id",
|
| 254 |
+
value="",
|
| 255 |
+
placeholder="Qwen/Qwen3-8B:together (org/repo[:provider])",
|
| 256 |
+
visible=False,
|
| 257 |
)
|
| 258 |
+
thinking = gr.Checkbox(
|
| 259 |
+
label="Thinking mode (Qwen3.5 only)",
|
| 260 |
+
value=False,
|
|
|
|
| 261 |
)
|
| 262 |
with gr.Column(scale=1):
|
| 263 |
mode = gr.Dropdown(
|
|
|
|
| 274 |
minimum=60, maximum=1200, value=300, step=30,
|
| 275 |
)
|
| 276 |
|
| 277 |
+
def _on_backend_change(mode_v: str):
|
| 278 |
+
is_vllm = mode_v == "Self-hosted vLLM"
|
| 279 |
+
return (
|
| 280 |
+
gr.update(visible=is_vllm), # vllm_row
|
| 281 |
+
gr.update(visible=not is_vllm), # hf_row
|
| 282 |
+
gr.update(visible=False), # hosted_custom_id reset
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
)
|
| 284 |
|
| 285 |
+
def _on_hosted_change(choice: str):
|
| 286 |
+
return gr.update(visible=(choice == _CUSTOM_HF_KEY))
|
| 287 |
+
|
| 288 |
+
backend_mode.change(
|
| 289 |
+
_on_backend_change,
|
| 290 |
+
inputs=[backend_mode],
|
| 291 |
+
outputs=[vllm_row, hf_row, hosted_custom_id],
|
|
|
|
|
|
|
| 292 |
)
|
| 293 |
+
hosted_model.change(
|
| 294 |
+
_on_hosted_change,
|
| 295 |
+
inputs=[hosted_model],
|
| 296 |
+
outputs=[hosted_custom_id],
|
| 297 |
)
|
| 298 |
|
| 299 |
+
# ββ Task fields ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 300 |
+
# Verifier (test.sh) is intentionally not surfaced here β it's only
|
| 301 |
+
# needed for scored training. For interactive use, leave it empty
|
| 302 |
+
# and just have the agent finish with something observable (e.g.
|
| 303 |
+
# "print DONE at the end"). MCP tools already accept
|
| 304 |
+
# ``test_script=""`` and skip scoring when empty.
|
| 305 |
+
instruction = gr.Textbox(
|
| 306 |
+
label="Instruction",
|
| 307 |
+
value=(
|
| 308 |
+
"Write `hello.py` in the current directory that prints "
|
| 309 |
+
"`hello` (no quotes). Then run it and print `DONE` when "
|
| 310 |
+
"you are finished."
|
| 311 |
+
),
|
| 312 |
+
lines=4,
|
| 313 |
)
|
|
|
|
| 314 |
with gr.Row():
|
| 315 |
+
task_id = gr.Textbox(
|
| 316 |
+
label="Task id (optional label)",
|
| 317 |
+
value="interactive",
|
| 318 |
+
scale=1,
|
| 319 |
+
)
|
| 320 |
+
setup_shell = gr.Textbox(
|
| 321 |
+
label="Setup shell (optional, runs before opencode)",
|
| 322 |
+
value="",
|
| 323 |
+
placeholder="e.g. pip install polars",
|
| 324 |
+
scale=3,
|
| 325 |
+
)
|
| 326 |
|
|
|
|
|
|
|
| 327 |
with gr.Row():
|
| 328 |
+
run_btn = gr.Button("βΆ Run", variant="primary", scale=2)
|
| 329 |
+
abort_btn = gr.Button("βΉ Abort", variant="stop", scale=1)
|
| 330 |
+
reset_btn = gr.Button("οΏ½οΏ½οΏ½ Reset", variant="secondary", scale=1)
|
| 331 |
+
check_btn = gr.Button("π Check endpoint", scale=1)
|
| 332 |
+
|
| 333 |
+
# ββ Output: chat-style single-column ββββββββββββββββββββββββββββββ
|
| 334 |
+
# Transcript is the hero. The status line above it carries a
|
| 335 |
+
# sandbox-boot phase indicator so users know whether we're
|
| 336 |
+
# spawning E2B, installing opencode, or waiting for the agent.
|
| 337 |
+
# Everything else (reward, files, logprob trace, verifier, raw
|
| 338 |
+
# JSON) lives in collapsed accordions below. Matches the chat
|
| 339 |
+
# shape of local_ui.py.
|
| 340 |
+
status = gr.Markdown()
|
| 341 |
+
# Shared state: the active rollout_id so Abort and Reset can find it.
|
| 342 |
+
rollout_state = gr.State("")
|
| 343 |
+
transcript_html = gr.HTML(
|
| 344 |
+
value="<div class='empty'>run a rollout to see the transcript</div>",
|
| 345 |
+
)
|
| 346 |
|
| 347 |
+
# Hidden outputs retained only so the streaming handler's tuple
|
| 348 |
+
# shape doesn't have to change. They never render in the UI.
|
| 349 |
+
reward_out = gr.Number(visible=False)
|
| 350 |
+
wall_out = gr.Number(visible=False)
|
| 351 |
+
exit_out = gr.Number(visible=False)
|
| 352 |
+
turns_out = gr.Number(visible=False)
|
| 353 |
+
with gr.Accordion("Workdir files", open=False):
|
| 354 |
workdir_md = gr.Markdown()
|
| 355 |
+
with gr.Accordion("Proxy trace (per turn β logprobs)", open=False):
|
| 356 |
proxy_trace_json = gr.JSON(label=None)
|
| 357 |
+
with gr.Accordion("Diagnostics (proxy Β· install Β· agent logs)", open=False):
|
| 358 |
+
verifier_out = gr.Textbox(label="proxy/install/agent log tails", lines=12)
|
| 359 |
+
verifier_err = gr.Textbox(label="primitive error (if any)", lines=3)
|
| 360 |
with gr.Accordion("Raw result JSON", open=False):
|
| 361 |
raw_json = gr.JSON(label=None)
|
| 362 |
|
| 363 |
# ββ Streaming Run handler βββββββββββββββββββββββββββββββββββββββββ
|
| 364 |
def _run_streaming(
|
| 365 |
+
backend_mode_v: str,
|
| 366 |
+
vllm_model_v: str,
|
| 367 |
vllm_url_v: str,
|
| 368 |
+
hosted_model_v: str,
|
| 369 |
+
hosted_custom_id_v: str,
|
| 370 |
+
hf_token_v: str,
|
| 371 |
+
thinking_v: bool,
|
| 372 |
mode_v: str,
|
| 373 |
max_tokens_cap_v: int,
|
| 374 |
agent_timeout_s_v: float,
|
| 375 |
task_id_v: str,
|
| 376 |
instruction_v: str,
|
|
|
|
| 377 |
setup_shell_v: str,
|
| 378 |
):
|
| 379 |
+
# Verifier is optional. For interactive use we pass an empty
|
| 380 |
+
# test_script so the finalizer skips scoring.
|
| 381 |
+
test_script_v = ""
|
| 382 |
+
# Assemble the uniform model_key from the UI's two-backend picker.
|
| 383 |
+
if backend_mode_v == "Self-hosted vLLM":
|
| 384 |
+
if not vllm_model_v.strip():
|
| 385 |
+
yield _error_tuple("Self-hosted vLLM requires a model id.")
|
| 386 |
+
return
|
| 387 |
+
model_key_v = f"vllm://{vllm_model_v.strip()}"
|
| 388 |
+
else:
|
| 389 |
+
if hosted_model_v == _CUSTOM_HF_KEY:
|
| 390 |
+
cid = hosted_custom_id_v.strip()
|
| 391 |
+
if not cid:
|
| 392 |
+
yield _error_tuple(
|
| 393 |
+
"Hosted 'Custom' picked but no model id entered."
|
| 394 |
+
)
|
| 395 |
+
return
|
| 396 |
+
if not cid.startswith("hf-router://"):
|
| 397 |
+
# Accept either plain "Org/Repo[:provider]" or a
|
| 398 |
+
# fully-prefixed key.
|
| 399 |
+
cid = f"hf-router://{cid}"
|
| 400 |
+
model_key_v = cid
|
| 401 |
+
else:
|
| 402 |
+
model_key_v = hosted_model_v
|
| 403 |
"""Gradio generator: yields UI updates as the rollout progresses.
|
| 404 |
|
| 405 |
Uses the non-blocking fine-grained tools:
|
|
|
|
| 408 |
import httpx
|
| 409 |
from openenv.core.env_server.mcp_types import CallToolAction
|
| 410 |
|
| 411 |
+
# 0) Resolve the catalog pick into (base_url, api_key, model).
|
| 412 |
+
# This validates the secret matches the selected backend.
|
| 413 |
+
try:
|
| 414 |
+
base_url, _api_key, _model, entry = resolve_endpoint(
|
| 415 |
+
model_key_v,
|
| 416 |
+
vllm_url=vllm_url_v,
|
| 417 |
+
hf_token=hf_token_v,
|
| 418 |
+
)
|
| 419 |
+
except Exception as exc:
|
| 420 |
+
yield _error_tuple(f"config: {exc}")
|
| 421 |
+
return
|
| 422 |
+
|
| 423 |
+
# 1) Pre-flight: verify the endpoint is reachable before burning
|
| 424 |
+
# an E2B sandbox on a URL typo / bad token.
|
| 425 |
yield (
|
| 426 |
+
"π **validating endpointβ¦**",
|
| 427 |
None, None, None, 0,
|
| 428 |
+
"", [], "", "", {"stage": "validate", "backend": entry.backend},
|
| 429 |
+
"<div class='empty'>validating endpointβ¦</div>",
|
| 430 |
+
"",
|
| 431 |
)
|
| 432 |
+
probe_headers: dict[str, str] = {}
|
| 433 |
+
if entry.backend == "hf_router":
|
| 434 |
+
probe_headers["Authorization"] = f"Bearer {hf_token_v}"
|
| 435 |
try:
|
| 436 |
+
r = httpx.get(
|
| 437 |
+
f"{base_url}/models", headers=probe_headers, timeout=15,
|
| 438 |
+
)
|
| 439 |
if r.status_code != 200:
|
| 440 |
yield _error_tuple(
|
| 441 |
+
f"{entry.backend} probe {base_url}/models β HTTP {r.status_code}: "
|
| 442 |
+
f"{r.text[:200]}"
|
| 443 |
)
|
| 444 |
return
|
| 445 |
except Exception as exc:
|
| 446 |
yield _error_tuple(
|
| 447 |
+
f"endpoint unreachable: {type(exc).__name__}: {exc}"
|
| 448 |
)
|
| 449 |
return
|
| 450 |
|
| 451 |
+
yield (
|
| 452 |
+
"π‘ **initialising env (creating MCP registry)β¦**",
|
| 453 |
+
None, None, None, 0, "", [], "", "", {"stage": "env_init"},
|
| 454 |
+
"<div class='empty'>initialising envβ¦</div>",
|
| 455 |
+
"",
|
| 456 |
+
)
|
| 457 |
try:
|
| 458 |
env = _get_env()
|
| 459 |
env.reset()
|
|
|
|
| 461 |
yield _error_tuple(f"env init failed: {type(exc).__name__}: {exc}")
|
| 462 |
return
|
| 463 |
|
| 464 |
+
# 2) start_rollout β uniform args: model_key + vllm_url + hf_token
|
| 465 |
+
# + thinking. The env resolves via the catalog server-side.
|
| 466 |
try:
|
| 467 |
start_obs = env.step(
|
| 468 |
CallToolAction(
|
| 469 |
tool_name="start_rollout",
|
| 470 |
arguments={
|
| 471 |
+
"model_key": model_key_v,
|
| 472 |
"vllm_url": vllm_url_v,
|
| 473 |
+
"hf_token": hf_token_v,
|
| 474 |
+
"thinking": bool(thinking_v),
|
| 475 |
"instruction": instruction_v,
|
| 476 |
"test_script": test_script_v,
|
| 477 |
"task_id": task_id_v,
|
| 478 |
"setup_shell": setup_shell_v,
|
| 479 |
"upload_files": {},
|
|
|
|
|
|
|
| 480 |
"mode": mode_v,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
"max_tokens_cap": int(max_tokens_cap_v),
|
| 482 |
"agent_timeout_s": float(agent_timeout_s_v),
|
| 483 |
},
|
|
|
|
| 494 |
yield _error_tuple(f"start_rollout returned no rollout_id: {start_payload}")
|
| 495 |
return
|
| 496 |
|
| 497 |
+
# Initial UI update β yield the rollout_id into shared state so
|
| 498 |
+
# Abort / Reset can target the right rollout.
|
| 499 |
yield (
|
| 500 |
+
f"π‘ **rollout `{rollout_id}` started β booting sandboxβ¦**",
|
| 501 |
None, None, None, 0,
|
| 502 |
"_(no files yet)_", [], "", "", start_payload,
|
| 503 |
+
"<div class='empty'>booting sandbox β this takes ~20β40s coldβ¦</div>",
|
| 504 |
+
rollout_id,
|
| 505 |
)
|
| 506 |
|
| 507 |
+
# 2) Poll get_state + get_messages at 1s cadence. Show a sandbox
|
| 508 |
+
# boot-phase label so users can tell "booting" from "stuck".
|
| 509 |
deadline = time.time() + float(agent_timeout_s_v) + 120
|
| 510 |
+
t_started = float(start_payload.get("started_at") or time.time())
|
| 511 |
status_str = "running"
|
|
|
|
| 512 |
while time.time() < deadline:
|
| 513 |
try:
|
| 514 |
state_obs = env.step(
|
|
|
|
| 516 |
tool_name="get_state",
|
| 517 |
arguments={"rollout_id": rollout_id},
|
| 518 |
),
|
| 519 |
+
timeout_s=20,
|
| 520 |
)
|
| 521 |
state_payload = _parse_result(state_obs)
|
| 522 |
except Exception as exc:
|
| 523 |
state_payload = {"error": f"{type(exc).__name__}: {exc}"}
|
| 524 |
|
| 525 |
+
# Live transcript β only meaningful once opencode serve has
|
| 526 |
+
# created its session (state_payload carries serve_session_id
|
| 527 |
+
# in that case). Before that, get_messages returns an empty
|
| 528 |
+
# list with a ``note`` field.
|
| 529 |
+
parts_list: list = []
|
| 530 |
+
transcript = "<div class='empty'>waiting for first partβ¦</div>"
|
| 531 |
+
try:
|
| 532 |
+
msg_obs = env.step(
|
| 533 |
+
CallToolAction(
|
| 534 |
+
tool_name="get_messages",
|
| 535 |
+
arguments={"rollout_id": rollout_id},
|
| 536 |
+
),
|
| 537 |
+
timeout_s=20,
|
| 538 |
+
)
|
| 539 |
+
msg_payload = _parse_result(msg_obs)
|
| 540 |
+
parts_list = collect_parts_from_messages(
|
| 541 |
+
msg_payload.get("messages") or []
|
| 542 |
+
)
|
| 543 |
+
if parts_list:
|
| 544 |
+
transcript = render_transcript(parts_list)
|
| 545 |
+
except Exception:
|
| 546 |
+
pass
|
| 547 |
+
|
| 548 |
status_str = state_payload.get("status", "?")
|
| 549 |
+
elapsed = time.time() - t_started
|
| 550 |
+
msg_count = len(
|
| 551 |
+
(state_payload.get("messages") if isinstance(state_payload, dict) else None) or []
|
| 552 |
+
)
|
| 553 |
+
# Prefer message count from the transcript payload.
|
| 554 |
+
try:
|
| 555 |
+
msg_count = len(msg_payload.get("messages") or [])
|
| 556 |
+
except Exception:
|
| 557 |
+
msg_count = 0
|
| 558 |
+
phase = _boot_phase(state_payload, msg_count, len(parts_list))
|
| 559 |
|
| 560 |
yield (
|
| 561 |
+
f"{phase} Β· elapsed `{elapsed:.1f}s` Β· rollout `{rollout_id}`",
|
| 562 |
+
None, None, None, state_payload.get("proxy_turns_so_far", 0),
|
|
|
|
| 563 |
"_(workdir populated on finalize)_",
|
| 564 |
[], "", "", state_payload,
|
| 565 |
+
transcript,
|
| 566 |
+
rollout_id,
|
| 567 |
)
|
| 568 |
|
| 569 |
if status_str == "done":
|
| 570 |
break
|
| 571 |
+
time.sleep(1.0)
|
| 572 |
|
| 573 |
# 3) finalize_rollout β run verifier + collect full result
|
| 574 |
try:
|
|
|
|
| 587 |
status_md = _summarize_status(result)
|
| 588 |
wd_md = _render_workdir(result.get("workdir_files") or {})
|
| 589 |
turns = result.get("proxy_turns") or []
|
| 590 |
+
|
| 591 |
+
# One last transcript fetch β captures any final parts that
|
| 592 |
+
# arrived between the last poll and session.idle.
|
| 593 |
+
final_transcript = "<div class='empty'>(transcript unavailable)</div>"
|
| 594 |
+
try:
|
| 595 |
+
msg_obs = env.step(
|
| 596 |
+
CallToolAction(
|
| 597 |
+
tool_name="get_messages",
|
| 598 |
+
arguments={"rollout_id": rollout_id},
|
| 599 |
+
),
|
| 600 |
+
timeout_s=30,
|
| 601 |
+
)
|
| 602 |
+
msg_payload = _parse_result(msg_obs)
|
| 603 |
+
parts = collect_parts_from_messages(msg_payload.get("messages") or [])
|
| 604 |
+
final_transcript = render_transcript(parts)
|
| 605 |
+
except Exception:
|
| 606 |
+
pass
|
| 607 |
+
|
| 608 |
+
# Diagnostics pane: concat the three log tails so failures
|
| 609 |
+
# are visible without expanding the raw JSON.
|
| 610 |
+
diag_tail = "\n".join([
|
| 611 |
+
"--- PROXY LOG TAIL ---",
|
| 612 |
+
(result.get("proxy_log_tail") or "(empty)")[-2000:],
|
| 613 |
+
"",
|
| 614 |
+
"--- INSTALL LOG TAIL ---",
|
| 615 |
+
(result.get("install_log_tail") or "(empty)")[-1000:],
|
| 616 |
+
"",
|
| 617 |
+
"--- AGENT LOG TAIL ---",
|
| 618 |
+
(result.get("agent_log_tail") or "(empty)")[-2000:],
|
| 619 |
+
])
|
| 620 |
+
err_line = result.get("error") or ""
|
| 621 |
+
|
| 622 |
yield (
|
| 623 |
status_md,
|
| 624 |
result.get("reward"),
|
|
|
|
| 627 |
len(turns),
|
| 628 |
wd_md,
|
| 629 |
turns,
|
| 630 |
+
diag_tail,
|
| 631 |
+
err_line,
|
| 632 |
result,
|
| 633 |
+
final_transcript,
|
| 634 |
+
rollout_id,
|
| 635 |
)
|
| 636 |
|
| 637 |
+
_output_widgets = [
|
| 638 |
+
status, reward_out, wall_out, exit_out, turns_out,
|
| 639 |
+
workdir_md, proxy_trace_json,
|
| 640 |
+
verifier_out, verifier_err, raw_json,
|
| 641 |
+
transcript_html, rollout_state,
|
| 642 |
+
]
|
| 643 |
run_btn.click(
|
| 644 |
_run_streaming,
|
| 645 |
inputs=[
|
| 646 |
+
backend_mode,
|
| 647 |
+
vllm_model, vllm_url,
|
| 648 |
+
hosted_model, hosted_custom_id, hf_token,
|
| 649 |
+
thinking, mode,
|
| 650 |
max_tokens_cap, agent_timeout_s,
|
| 651 |
+
task_id, instruction, setup_shell,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
],
|
| 653 |
+
outputs=_output_widgets,
|
| 654 |
)
|
| 655 |
|
| 656 |
+
# Check-endpoint handler β cheap GET /v1/models probe against the
|
| 657 |
+
# currently-configured backend.
|
| 658 |
+
def _check_endpoint(
|
| 659 |
+
backend_mode_v: str,
|
| 660 |
+
vllm_model_v: str, vllm_url_v: str,
|
| 661 |
+
hosted_model_v: str, hosted_custom_id_v: str, hf_token_v: str,
|
| 662 |
+
) -> str:
|
| 663 |
import httpx
|
| 664 |
+
if backend_mode_v == "Self-hosted vLLM":
|
| 665 |
+
model_key_v = f"vllm://{(vllm_model_v or '').strip()}"
|
| 666 |
+
else:
|
| 667 |
+
if hosted_model_v == _CUSTOM_HF_KEY:
|
| 668 |
+
cid = (hosted_custom_id_v or "").strip()
|
| 669 |
+
if not cid:
|
| 670 |
+
return "β custom HF model id is empty"
|
| 671 |
+
model_key_v = cid if cid.startswith("hf-router://") else f"hf-router://{cid}"
|
| 672 |
+
else:
|
| 673 |
+
model_key_v = hosted_model_v
|
| 674 |
try:
|
| 675 |
+
base_url, _key, _model, entry = resolve_endpoint(
|
| 676 |
+
model_key_v, vllm_url=vllm_url_v, hf_token=hf_token_v,
|
| 677 |
+
)
|
| 678 |
+
except Exception as exc:
|
| 679 |
+
return f"β {exc}"
|
| 680 |
+
headers = {"Authorization": f"Bearer {hf_token_v}"} if entry.backend == "hf_router" else {}
|
| 681 |
+
models_url = f"{base_url}/models"
|
| 682 |
+
try:
|
| 683 |
+
r = httpx.get(models_url, headers=headers, timeout=15)
|
| 684 |
except Exception as exc:
|
| 685 |
return f"β `{models_url}` unreachable: `{type(exc).__name__}: {exc}`"
|
| 686 |
if r.status_code != 200:
|
| 687 |
return f"β `{models_url}` β HTTP {r.status_code}\n```\n{r.text[:400]}\n```"
|
| 688 |
try:
|
| 689 |
+
ids = [m.get("id") for m in r.json().get("data", []) if m.get("id")]
|
|
|
|
| 690 |
except Exception:
|
| 691 |
ids = []
|
| 692 |
+
hint = f" Β· backend=`{entry.backend}` Β· resolved=`{_model}`"
|
| 693 |
if ids:
|
| 694 |
+
shown = ", ".join(ids[:5]) + (f", β¦ (+{len(ids)-5} more)" if len(ids) > 5 else "")
|
| 695 |
+
return f"β
reachable{hint} Β· models: `{shown}`"
|
| 696 |
+
return f"β οΈ reachable (HTTP 200) but no `data[*].id` in response{hint}"
|
| 697 |
+
|
| 698 |
+
check_btn.click(
|
| 699 |
+
_check_endpoint,
|
| 700 |
+
inputs=[backend_mode, vllm_model, vllm_url, hosted_model, hosted_custom_id, hf_token],
|
| 701 |
+
outputs=[status],
|
| 702 |
+
)
|
| 703 |
+
|
| 704 |
+
# ββ Abort handler ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 705 |
+
# Fire-and-forget abort on the active rollout. Keeps the env + UI
|
| 706 |
+
# state so the user can see what the transcript looked like at the
|
| 707 |
+
# moment of abort.
|
| 708 |
+
def _abort(current_rollout_id: str) -> tuple:
|
| 709 |
+
from openenv.core.env_server.mcp_types import CallToolAction
|
| 710 |
+
if not current_rollout_id:
|
| 711 |
+
return (
|
| 712 |
+
"β οΈ nothing to abort (no active rollout).",
|
| 713 |
+
None, None, None, None,
|
| 714 |
+
"", [], "", "", {"abort": "no-op"},
|
| 715 |
+
gr.update(), current_rollout_id,
|
| 716 |
+
)
|
| 717 |
+
try:
|
| 718 |
+
env = _get_env()
|
| 719 |
+
env.step(
|
| 720 |
+
CallToolAction(
|
| 721 |
+
tool_name="abort_rollout",
|
| 722 |
+
arguments={"rollout_id": current_rollout_id},
|
| 723 |
+
),
|
| 724 |
+
timeout_s=30,
|
| 725 |
+
)
|
| 726 |
+
except Exception as exc: # noqa: BLE001
|
| 727 |
+
return (
|
| 728 |
+
f"β οΈ abort failed: `{type(exc).__name__}: {exc}`",
|
| 729 |
+
None, None, None, None,
|
| 730 |
+
"", [], "", "", {"abort": str(exc)},
|
| 731 |
+
gr.update(), current_rollout_id,
|
| 732 |
+
)
|
| 733 |
+
return (
|
| 734 |
+
f"βΉ **aborted** rollout `{current_rollout_id}`",
|
| 735 |
+
None, None, None, None,
|
| 736 |
+
"", [], "", "", {"abort": current_rollout_id},
|
| 737 |
+
gr.update(), current_rollout_id,
|
| 738 |
+
)
|
| 739 |
|
| 740 |
+
abort_btn.click(
|
| 741 |
+
_abort,
|
| 742 |
+
inputs=[rollout_state],
|
| 743 |
+
outputs=_output_widgets,
|
| 744 |
+
)
|
| 745 |
|
| 746 |
+
# ββ Reset handler ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 747 |
+
# Aborts any in-flight rollout, drops the cached env so the next Run
|
| 748 |
+
# creates a fresh :class:`OpenCodeEnvironment` (new MCP registry),
|
| 749 |
+
# and clears all UI panels including the transcript.
|
| 750 |
+
def _reset(current_rollout_id: str) -> tuple:
|
| 751 |
+
from openenv.core.env_server.mcp_types import CallToolAction
|
| 752 |
+
if current_rollout_id:
|
| 753 |
+
try:
|
| 754 |
+
env = _get_env()
|
| 755 |
+
env.step(
|
| 756 |
+
CallToolAction(
|
| 757 |
+
tool_name="abort_rollout",
|
| 758 |
+
arguments={"rollout_id": current_rollout_id},
|
| 759 |
+
),
|
| 760 |
+
timeout_s=30,
|
| 761 |
+
)
|
| 762 |
+
except Exception:
|
| 763 |
+
# Best-effort β if abort fails, still drop the env below
|
| 764 |
+
# so the next Run starts clean.
|
| 765 |
+
pass
|
| 766 |
_env_cache["instance"] = None
|
| 767 |
return (
|
| 768 |
+
"π **reset.** next Run will create a fresh environment.",
|
| 769 |
None, None, None, None,
|
| 770 |
"_(workdir cleared)_",
|
| 771 |
[], "", "", {"reset": True},
|
| 772 |
+
"<div class='empty'>run a rollout to see the transcript</div>",
|
| 773 |
+
"",
|
| 774 |
)
|
| 775 |
|
| 776 |
reset_btn.click(
|
| 777 |
_reset,
|
| 778 |
+
inputs=[rollout_state],
|
| 779 |
+
outputs=_output_widgets,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 780 |
)
|
| 781 |
|
| 782 |
return demo
|
|
|
|
| 785 |
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 786 |
|
| 787 |
|
| 788 |
+
def _error_tuple(msg: str, rollout_id: str = "") -> tuple:
|
| 789 |
return (
|
| 790 |
f"β **Error:** `{msg}`",
|
| 791 |
None, None, None, None,
|
| 792 |
"", [], "", "", {"error": msg},
|
| 793 |
+
f"<div class='errbox'>β {msg}</div>",
|
| 794 |
+
rollout_id,
|
| 795 |
+
)
|
| 796 |
+
|
| 797 |
+
|
| 798 |
+
def _boot_phase(state: dict, msg_count: int, parts_count: int) -> str:
|
| 799 |
+
"""Human-readable sandbox + session boot phase label."""
|
| 800 |
+
if state.get("error"):
|
| 801 |
+
return f"β οΈ state error: `{state.get('error')}`"
|
| 802 |
+
status = state.get("status", "?")
|
| 803 |
+
if status == "unknown":
|
| 804 |
+
return "β³ **starting rolloutβ¦**"
|
| 805 |
+
serve_sid = state.get("serve_session_id")
|
| 806 |
+
if not serve_sid:
|
| 807 |
+
return (
|
| 808 |
+
"π‘ **booting sandbox** β spawning E2B, installing opencode, "
|
| 809 |
+
"starting proxy + opencode serve (this takes ~20β40s cold)"
|
| 810 |
+
)
|
| 811 |
+
if msg_count == 0:
|
| 812 |
+
return "π‘ **creating session** β serve is up, prompt about to fire"
|
| 813 |
+
if parts_count == 0:
|
| 814 |
+
return "π **agent thinking** β first LLM call in flight"
|
| 815 |
+
turns = state.get("proxy_turns_so_far", 0)
|
| 816 |
+
return (
|
| 817 |
+
f"β‘ **running** Β· serve session `{serve_sid[:14]}β¦` Β· "
|
| 818 |
+
f"parts `{parts_count}` Β· turns `{turns}`"
|
| 819 |
)
|
| 820 |
|
| 821 |
|
server/opencode_environment.py
CHANGED
|
@@ -31,6 +31,11 @@ from fastmcp import FastMCP
|
|
| 31 |
from openenv.core.env_server.mcp_environment import MCPEnvironment
|
| 32 |
from openenv.core.env_server.types import Action, Observation
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
load_dotenv()
|
| 35 |
|
| 36 |
|
|
@@ -168,56 +173,62 @@ class OpenCodeEnvironment(MCPEnvironment):
|
|
| 168 |
|
| 169 |
@mcp.tool
|
| 170 |
def run_rollout(
|
| 171 |
-
|
| 172 |
-
model: str,
|
| 173 |
instruction: str,
|
| 174 |
test_script: str,
|
|
|
|
|
|
|
|
|
|
| 175 |
task_id: str = "",
|
| 176 |
setup_shell: str = "",
|
| 177 |
upload_files: Optional[dict[str, str]] = None,
|
| 178 |
-
provider: str = "openai_compatible",
|
| 179 |
-
api_key: str = "intercepted",
|
| 180 |
mode: str = "transparent_proxy",
|
| 181 |
-
disable_thinking: bool = False,
|
| 182 |
max_tokens_cap: int = 4096,
|
| 183 |
agent_timeout_s: float = 600.0,
|
| 184 |
) -> str:
|
| 185 |
"""Run one OpenCode rollout end-to-end.
|
| 186 |
|
| 187 |
Args:
|
| 188 |
-
|
| 189 |
-
|
|
|
|
| 190 |
instruction: Prompt passed to ``opencode run``.
|
| 191 |
test_script: Bash verifier. Must write a float reward to
|
| 192 |
``/home/user/logs/verifier/reward.txt``.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
task_id: Optional identifier echoed back for traceability.
|
| 194 |
setup_shell: Optional shell run before opencode starts.
|
| 195 |
-
upload_files: Optional {remote_path: content} staged
|
| 196 |
-
sandbox.
|
| 197 |
-
|
| 198 |
-
``"
|
| 199 |
-
api_key: Provider API key. vLLM ignores this.
|
| 200 |
-
mode: ``"transparent_proxy"`` (captures per-turn logprobs) or
|
| 201 |
-
``"black_box"`` (direct connection, no logprobs).
|
| 202 |
-
disable_thinking: Qwen3/Qwen3.5 proxy-side thinking disable.
|
| 203 |
max_tokens_cap: Clamp forwarded ``max_tokens``.
|
| 204 |
agent_timeout_s: Max opencode runtime in seconds.
|
| 205 |
|
| 206 |
Returns:
|
| 207 |
JSON-serialized :class:`RolloutResult`.
|
| 208 |
"""
|
|
|
|
|
|
|
|
|
|
| 209 |
return self._run_rollout_impl(
|
| 210 |
-
vllm_url=
|
| 211 |
model=model,
|
| 212 |
instruction=instruction,
|
| 213 |
test_script=test_script,
|
| 214 |
task_id=task_id,
|
| 215 |
setup_shell=setup_shell,
|
| 216 |
upload_files=upload_files or {},
|
| 217 |
-
provider=
|
| 218 |
api_key=api_key,
|
| 219 |
mode=mode,
|
| 220 |
-
disable_thinking=
|
| 221 |
max_tokens_cap=max_tokens_cap,
|
| 222 |
agent_timeout_s=agent_timeout_s,
|
| 223 |
)
|
|
@@ -230,41 +241,46 @@ class OpenCodeEnvironment(MCPEnvironment):
|
|
| 230 |
|
| 231 |
@mcp.tool
|
| 232 |
def start_rollout(
|
| 233 |
-
|
| 234 |
-
model: str,
|
| 235 |
instruction: str,
|
| 236 |
test_script: str = "",
|
|
|
|
|
|
|
|
|
|
| 237 |
task_id: str = "",
|
| 238 |
setup_shell: str = "",
|
| 239 |
upload_files: Optional[dict[str, str]] = None,
|
| 240 |
-
provider: str = "openai_compatible",
|
| 241 |
-
api_key: str = "intercepted",
|
| 242 |
mode: str = "transparent_proxy",
|
| 243 |
-
disable_thinking: bool = False,
|
| 244 |
max_tokens_cap: int = 4096,
|
| 245 |
agent_timeout_s: float = 600.0,
|
| 246 |
) -> str:
|
| 247 |
"""Start a rollout asynchronously; return a ``rollout_id`` immediately.
|
| 248 |
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
| 253 |
"""
|
|
|
|
|
|
|
|
|
|
| 254 |
rid = uuid4().hex[:12]
|
| 255 |
handle = self._spawn_async_rollout(
|
| 256 |
rollout_id=rid,
|
| 257 |
-
vllm_url=
|
| 258 |
model=model,
|
| 259 |
instruction=instruction,
|
| 260 |
test_script=test_script,
|
| 261 |
task_id=task_id,
|
| 262 |
setup_shell=setup_shell,
|
| 263 |
upload_files=upload_files or {},
|
| 264 |
-
provider=
|
| 265 |
api_key=api_key,
|
| 266 |
mode=mode,
|
| 267 |
-
disable_thinking=
|
| 268 |
max_tokens_cap=max_tokens_cap,
|
| 269 |
agent_timeout_s=agent_timeout_s,
|
| 270 |
)
|
|
@@ -305,6 +321,53 @@ class OpenCodeEnvironment(MCPEnvironment):
|
|
| 305 |
"finished_at": handle.finished_at,
|
| 306 |
})
|
| 307 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
@mcp.tool
|
| 309 |
def abort_rollout(rollout_id: str) -> str:
|
| 310 |
"""Cancel an in-flight rollout.
|
|
@@ -449,13 +512,15 @@ class OpenCodeEnvironment(MCPEnvironment):
|
|
| 449 |
result = self._result_cls(task_id=task_id, mode=mode)
|
| 450 |
t0 = time.time()
|
| 451 |
|
| 452 |
-
|
| 453 |
-
|
|
|
|
|
|
|
| 454 |
config = self._OpenCodeConfig(
|
| 455 |
provider=provider,
|
| 456 |
base_url=vllm_url.rstrip("/"),
|
| 457 |
api_key=api_key,
|
| 458 |
-
model=
|
| 459 |
agent_timeout_s=agent_timeout_s,
|
| 460 |
proxy_disable_thinking=disable_thinking,
|
| 461 |
proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
|
|
@@ -574,12 +639,13 @@ class OpenCodeEnvironment(MCPEnvironment):
|
|
| 574 |
metadata={"task_id": task_id},
|
| 575 |
)
|
| 576 |
|
| 577 |
-
|
|
|
|
| 578 |
config = self._OpenCodeConfig(
|
| 579 |
provider=provider,
|
| 580 |
base_url=vllm_url.rstrip("/"),
|
| 581 |
api_key=api_key,
|
| 582 |
-
model=
|
| 583 |
agent_timeout_s=agent_timeout_s,
|
| 584 |
proxy_disable_thinking=disable_thinking,
|
| 585 |
proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
|
|
@@ -597,12 +663,16 @@ class OpenCodeEnvironment(MCPEnvironment):
|
|
| 597 |
|
| 598 |
def worker() -> None:
|
| 599 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
factory = self._OpenCodeSessionFactory(
|
| 601 |
config=config,
|
| 602 |
sandbox_backend=self._E2BSandboxBackend(),
|
| 603 |
mode=mode,
|
| 604 |
verifier=None,
|
| 605 |
-
driver="
|
| 606 |
)
|
| 607 |
handle.session = factory.create(task=task)
|
| 608 |
try:
|
|
|
|
| 31 |
from openenv.core.env_server.mcp_environment import MCPEnvironment
|
| 32 |
from openenv.core.env_server.types import Action, Observation
|
| 33 |
|
| 34 |
+
try:
|
| 35 |
+
from .catalog import resolve_endpoint
|
| 36 |
+
except ImportError: # pragma: no cover
|
| 37 |
+
from catalog import resolve_endpoint # type: ignore
|
| 38 |
+
|
| 39 |
load_dotenv()
|
| 40 |
|
| 41 |
|
|
|
|
| 173 |
|
| 174 |
@mcp.tool
|
| 175 |
def run_rollout(
|
| 176 |
+
model_key: str,
|
|
|
|
| 177 |
instruction: str,
|
| 178 |
test_script: str,
|
| 179 |
+
vllm_url: str = "",
|
| 180 |
+
hf_token: str = "",
|
| 181 |
+
thinking: bool = False,
|
| 182 |
task_id: str = "",
|
| 183 |
setup_shell: str = "",
|
| 184 |
upload_files: Optional[dict[str, str]] = None,
|
|
|
|
|
|
|
| 185 |
mode: str = "transparent_proxy",
|
|
|
|
| 186 |
max_tokens_cap: int = 4096,
|
| 187 |
agent_timeout_s: float = 600.0,
|
| 188 |
) -> str:
|
| 189 |
"""Run one OpenCode rollout end-to-end.
|
| 190 |
|
| 191 |
Args:
|
| 192 |
+
model_key: Catalog key β one of the entries in
|
| 193 |
+
:data:`server.catalog.CATALOG`. Shape is
|
| 194 |
+
``"vllm://<repo>"`` or ``"hf-router://<repo>:<provider>"``.
|
| 195 |
instruction: Prompt passed to ``opencode run``.
|
| 196 |
test_script: Bash verifier. Must write a float reward to
|
| 197 |
``/home/user/logs/verifier/reward.txt``.
|
| 198 |
+
vllm_url: Required when ``model_key`` is a ``vllm://...``
|
| 199 |
+
entry. The tunneled or in-cluster ``/v1`` endpoint.
|
| 200 |
+
hf_token: Required when ``model_key`` is a
|
| 201 |
+
``hf-router://...`` entry. User's HF token.
|
| 202 |
+
thinking: Enable Qwen-style thinking mode. Ignored for
|
| 203 |
+
models where ``supports_thinking`` is False. Passed to
|
| 204 |
+
the proxy as ``chat_template_kwargs.enable_thinking``.
|
| 205 |
task_id: Optional identifier echoed back for traceability.
|
| 206 |
setup_shell: Optional shell run before opencode starts.
|
| 207 |
+
upload_files: Optional ``{remote_path: content}`` staged
|
| 208 |
+
into the sandbox.
|
| 209 |
+
mode: ``"transparent_proxy"`` (captures per-turn logprobs)
|
| 210 |
+
or ``"black_box"`` (direct connection, no logprobs).
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
max_tokens_cap: Clamp forwarded ``max_tokens``.
|
| 212 |
agent_timeout_s: Max opencode runtime in seconds.
|
| 213 |
|
| 214 |
Returns:
|
| 215 |
JSON-serialized :class:`RolloutResult`.
|
| 216 |
"""
|
| 217 |
+
base_url, api_key, model, _entry = resolve_endpoint(
|
| 218 |
+
model_key, vllm_url=vllm_url, hf_token=hf_token
|
| 219 |
+
)
|
| 220 |
return self._run_rollout_impl(
|
| 221 |
+
vllm_url=base_url,
|
| 222 |
model=model,
|
| 223 |
instruction=instruction,
|
| 224 |
test_script=test_script,
|
| 225 |
task_id=task_id,
|
| 226 |
setup_shell=setup_shell,
|
| 227 |
upload_files=upload_files or {},
|
| 228 |
+
provider="openai_compatible",
|
| 229 |
api_key=api_key,
|
| 230 |
mode=mode,
|
| 231 |
+
disable_thinking=not bool(thinking),
|
| 232 |
max_tokens_cap=max_tokens_cap,
|
| 233 |
agent_timeout_s=agent_timeout_s,
|
| 234 |
)
|
|
|
|
| 241 |
|
| 242 |
@mcp.tool
|
| 243 |
def start_rollout(
|
| 244 |
+
model_key: str,
|
|
|
|
| 245 |
instruction: str,
|
| 246 |
test_script: str = "",
|
| 247 |
+
vllm_url: str = "",
|
| 248 |
+
hf_token: str = "",
|
| 249 |
+
thinking: bool = False,
|
| 250 |
task_id: str = "",
|
| 251 |
setup_shell: str = "",
|
| 252 |
upload_files: Optional[dict[str, str]] = None,
|
|
|
|
|
|
|
| 253 |
mode: str = "transparent_proxy",
|
|
|
|
| 254 |
max_tokens_cap: int = 4096,
|
| 255 |
agent_timeout_s: float = 600.0,
|
| 256 |
) -> str:
|
| 257 |
"""Start a rollout asynchronously; return a ``rollout_id`` immediately.
|
| 258 |
|
| 259 |
+
Same uniform args as :func:`run_rollout`: ``model_key``, plus
|
| 260 |
+
``vllm_url`` OR ``hf_token`` (depending on backend), plus
|
| 261 |
+
``thinking``. Spawns a background worker that creates the
|
| 262 |
+
sandbox, installs opencode, boots ``opencode serve``, and
|
| 263 |
+
fires the instruction. The caller then uses
|
| 264 |
+
``subscribe_events`` / ``get_state`` / ``abort_rollout`` /
|
| 265 |
+
``finalize`` with the returned id.
|
| 266 |
"""
|
| 267 |
+
base_url, api_key, model, _entry = resolve_endpoint(
|
| 268 |
+
model_key, vllm_url=vllm_url, hf_token=hf_token
|
| 269 |
+
)
|
| 270 |
rid = uuid4().hex[:12]
|
| 271 |
handle = self._spawn_async_rollout(
|
| 272 |
rollout_id=rid,
|
| 273 |
+
vllm_url=base_url,
|
| 274 |
model=model,
|
| 275 |
instruction=instruction,
|
| 276 |
test_script=test_script,
|
| 277 |
task_id=task_id,
|
| 278 |
setup_shell=setup_shell,
|
| 279 |
upload_files=upload_files or {},
|
| 280 |
+
provider="openai_compatible",
|
| 281 |
api_key=api_key,
|
| 282 |
mode=mode,
|
| 283 |
+
disable_thinking=not bool(thinking),
|
| 284 |
max_tokens_cap=max_tokens_cap,
|
| 285 |
agent_timeout_s=agent_timeout_s,
|
| 286 |
)
|
|
|
|
| 321 |
"finished_at": handle.finished_at,
|
| 322 |
})
|
| 323 |
|
| 324 |
+
@mcp.tool
|
| 325 |
+
def get_messages(rollout_id: str) -> str:
|
| 326 |
+
"""Return the sandbox-side opencode serve transcript for a rollout.
|
| 327 |
+
|
| 328 |
+
Shape matches opencode's ``GET /session/:id/message`` β
|
| 329 |
+
``{"messages": [{info, parts}, ...]}``. Empty ``messages`` list
|
| 330 |
+
if the rollout hasn't created its serve session yet, isn't
|
| 331 |
+
running under the ``serve`` driver, or fetching the transcript
|
| 332 |
+
failed. Designed for UI polling to render a live chat view.
|
| 333 |
+
"""
|
| 334 |
+
handle = self._registry.get(rollout_id)
|
| 335 |
+
if handle is None:
|
| 336 |
+
return json.dumps({"rollout_id": rollout_id, "messages": [], "status": "unknown"})
|
| 337 |
+
session = handle.session
|
| 338 |
+
status = "done" if handle.is_done() else "running"
|
| 339 |
+
if session is None:
|
| 340 |
+
return json.dumps({
|
| 341 |
+
"rollout_id": rollout_id,
|
| 342 |
+
"messages": [],
|
| 343 |
+
"status": status,
|
| 344 |
+
"error": handle.error,
|
| 345 |
+
})
|
| 346 |
+
serve_client = getattr(session, "serve_client", None)
|
| 347 |
+
serve_sid = getattr(session, "serve_session_id", None)
|
| 348 |
+
if serve_client is None or not serve_sid:
|
| 349 |
+
return json.dumps({
|
| 350 |
+
"rollout_id": rollout_id,
|
| 351 |
+
"messages": [],
|
| 352 |
+
"status": status,
|
| 353 |
+
"note": "no serve driver (transcript unavailable)",
|
| 354 |
+
})
|
| 355 |
+
try:
|
| 356 |
+
msgs = serve_client.list_messages(serve_sid) or []
|
| 357 |
+
except Exception as exc: # noqa: BLE001
|
| 358 |
+
return json.dumps({
|
| 359 |
+
"rollout_id": rollout_id,
|
| 360 |
+
"messages": [],
|
| 361 |
+
"status": status,
|
| 362 |
+
"error": f"list_messages failed: {type(exc).__name__}: {exc}",
|
| 363 |
+
})
|
| 364 |
+
return json.dumps({
|
| 365 |
+
"rollout_id": rollout_id,
|
| 366 |
+
"messages": msgs,
|
| 367 |
+
"status": status,
|
| 368 |
+
"serve_session_id": serve_sid,
|
| 369 |
+
})
|
| 370 |
+
|
| 371 |
@mcp.tool
|
| 372 |
def abort_rollout(rollout_id: str) -> str:
|
| 373 |
"""Cancel an in-flight rollout.
|
|
|
|
| 512 |
result = self._result_cls(task_id=task_id, mode=mode)
|
| 513 |
t0 = time.time()
|
| 514 |
|
| 515 |
+
# Pass the resolved model id straight through β the primitive now
|
| 516 |
+
# preserves ``config.model`` verbatim as the upstream model override,
|
| 517 |
+
# so any ``_qualify_model`` wrapping here would double-prefix and
|
| 518 |
+
# cause a 404 (``openai_compatible/Qwen/Qwen3.5-4B does not exist``).
|
| 519 |
config = self._OpenCodeConfig(
|
| 520 |
provider=provider,
|
| 521 |
base_url=vllm_url.rstrip("/"),
|
| 522 |
api_key=api_key,
|
| 523 |
+
model=model,
|
| 524 |
agent_timeout_s=agent_timeout_s,
|
| 525 |
proxy_disable_thinking=disable_thinking,
|
| 526 |
proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
|
|
|
|
| 639 |
metadata={"task_id": task_id},
|
| 640 |
)
|
| 641 |
|
| 642 |
+
# Pass model verbatim (no _qualify_model) β primitive now uses
|
| 643 |
+
# ``config.model`` as the upstream override directly.
|
| 644 |
config = self._OpenCodeConfig(
|
| 645 |
provider=provider,
|
| 646 |
base_url=vllm_url.rstrip("/"),
|
| 647 |
api_key=api_key,
|
| 648 |
+
model=model,
|
| 649 |
agent_timeout_s=agent_timeout_s,
|
| 650 |
proxy_disable_thinking=disable_thinking,
|
| 651 |
proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
|
|
|
|
| 663 |
|
| 664 |
def worker() -> None:
|
| 665 |
try:
|
| 666 |
+
# serve driver: opencode serve runs inside the sandbox, the
|
| 667 |
+
# primitive fires the prompt via POST /session/:id/prompt_async,
|
| 668 |
+
# and ``list_messages(serve_session_id)`` is what powers the
|
| 669 |
+
# live chat transcript exposed via the ``get_messages`` tool.
|
| 670 |
factory = self._OpenCodeSessionFactory(
|
| 671 |
config=config,
|
| 672 |
sandbox_backend=self._E2BSandboxBackend(),
|
| 673 |
mode=mode,
|
| 674 |
verifier=None,
|
| 675 |
+
driver="serve",
|
| 676 |
)
|
| 677 |
handle.session = factory.create(task=task)
|
| 678 |
try:
|
server/sandbox_smoke.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Stand-alone E2B sandbox smoke β boot opencode serve, expose it publicly.
|
| 2 |
+
|
| 3 |
+
This script isolates "can a sandbox even stand up opencode serve?" from
|
| 4 |
+
the rest of the env (no MCP server, no proxy, no primitive, no UI). Good
|
| 5 |
+
for when a full rollout fails and you want to rule out the sandbox path.
|
| 6 |
+
|
| 7 |
+
What it does:
|
| 8 |
+
1. Create a fresh E2B sandbox.
|
| 9 |
+
2. Write ``~/.config/opencode/opencode.json`` pointing at either:
|
| 10 |
+
- the HF Router (default, just needs HF_TOKEN), or
|
| 11 |
+
- a user-provided vLLM URL.
|
| 12 |
+
3. Install opencode via the upstream one-liner.
|
| 13 |
+
4. Start ``opencode serve --port 4096 --hostname 0.0.0.0`` in bg.
|
| 14 |
+
5. ``sandbox.get_host(4096)`` β a public ``https://4096-<sbx>.e2b.app``.
|
| 15 |
+
6. Poll ``{public}/doc`` until it answers 200.
|
| 16 |
+
7. Print the public URL + ``sandbox_id`` and keep the sandbox alive so
|
| 17 |
+
you can hit it manually. Ctrl-C closes the sandbox.
|
| 18 |
+
|
| 19 |
+
Usage:
|
| 20 |
+
# HF Router (default)
|
| 21 |
+
HF_TOKEN=hf_... uv run python server/sandbox_smoke.py
|
| 22 |
+
|
| 23 |
+
# or self-hosted vLLM
|
| 24 |
+
uv run python server/sandbox_smoke.py \\
|
| 25 |
+
--backend vllm \\
|
| 26 |
+
--vllm-url https://my-tunnel.example/v1 \\
|
| 27 |
+
--model Qwen/Qwen3.5-4B
|
| 28 |
+
|
| 29 |
+
Once it prints the URL you can:
|
| 30 |
+
|
| 31 |
+
curl https://4096-<sbx>.e2b.app/global/health
|
| 32 |
+
curl https://4096-<sbx>.e2b.app/config
|
| 33 |
+
# create + send prompt
|
| 34 |
+
SID=$(curl -s -X POST https://4096-<sbx>.e2b.app/session \\
|
| 35 |
+
-H 'content-type: application/json' \\
|
| 36 |
+
-d '{"title":"smoke"}' | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])')
|
| 37 |
+
curl -X POST https://4096-<sbx>.e2b.app/session/$SID/prompt_async \\
|
| 38 |
+
-H 'content-type: application/json' \\
|
| 39 |
+
-d '{"parts":[{"type":"text","text":"write hello.py"}]}'
|
| 40 |
+
curl -N https://4096-<sbx>.e2b.app/event
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
from __future__ import annotations
|
| 44 |
+
|
| 45 |
+
import argparse
|
| 46 |
+
import json
|
| 47 |
+
import os
|
| 48 |
+
import signal
|
| 49 |
+
import sys
|
| 50 |
+
import time
|
| 51 |
+
from pathlib import Path
|
| 52 |
+
from typing import Any
|
| 53 |
+
|
| 54 |
+
# Load the env-server's .env (E2B_API_KEY, HF_TOKEN, etc.) before importing
|
| 55 |
+
# anything that needs them. Walks up from this file to find ``openenv/.env``.
|
| 56 |
+
try:
|
| 57 |
+
from dotenv import load_dotenv
|
| 58 |
+
|
| 59 |
+
_env_path = Path(__file__).resolve().parent.parent / ".env"
|
| 60 |
+
if _env_path.is_file():
|
| 61 |
+
load_dotenv(_env_path, override=False)
|
| 62 |
+
print(f"loaded env from {_env_path}")
|
| 63 |
+
except ImportError:
|
| 64 |
+
pass
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
from e2b_code_interpreter import Sandbox
|
| 68 |
+
except ImportError:
|
| 69 |
+
from e2b import Sandbox # type: ignore
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
SERVE_PORT = 4096
|
| 73 |
+
CONFIG_DIR = "/home/user/.config/opencode"
|
| 74 |
+
CONFIG_PATH = f"{CONFIG_DIR}/opencode.json"
|
| 75 |
+
LOG_DIR = "/home/user/logs/agent"
|
| 76 |
+
SERVE_LOG = f"{LOG_DIR}/serve.log"
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def build_opencode_json(
|
| 80 |
+
*,
|
| 81 |
+
backend: str,
|
| 82 |
+
model_id: str,
|
| 83 |
+
base_url: str,
|
| 84 |
+
api_key: str,
|
| 85 |
+
context_limit: int = 32768,
|
| 86 |
+
output_limit: int = 16384,
|
| 87 |
+
) -> str:
|
| 88 |
+
"""Emit a minimal, valid opencode.json for the chosen backend."""
|
| 89 |
+
provider_id = "vllm" if backend == "vllm" else "hf-router"
|
| 90 |
+
return json.dumps({
|
| 91 |
+
"$schema": "https://opencode.ai/config.json",
|
| 92 |
+
"model": f"{provider_id}/{model_id}",
|
| 93 |
+
"provider": {
|
| 94 |
+
provider_id: {
|
| 95 |
+
"npm": "@ai-sdk/openai-compatible",
|
| 96 |
+
"name": f"{provider_id} (smoke)",
|
| 97 |
+
"options": {
|
| 98 |
+
"baseURL": base_url,
|
| 99 |
+
"apiKey": api_key,
|
| 100 |
+
"timeout": 600_000,
|
| 101 |
+
},
|
| 102 |
+
"models": {
|
| 103 |
+
model_id: {
|
| 104 |
+
"name": model_id,
|
| 105 |
+
"limit": {"context": context_limit, "output": output_limit},
|
| 106 |
+
},
|
| 107 |
+
},
|
| 108 |
+
},
|
| 109 |
+
},
|
| 110 |
+
"tools": {"webfetch": False, "question": False},
|
| 111 |
+
}, indent=2)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
_START = time.time()
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def log(msg: str) -> None:
|
| 118 |
+
"""Timestamped progress line, flushed so it appears in real time."""
|
| 119 |
+
t = time.time() - _START
|
| 120 |
+
print(f"[{t:6.1f}s] {msg}", flush=True)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def run_shell(sbx: Any, cmd: str, *, timeout_s: int = 120) -> tuple[int, str, str]:
|
| 124 |
+
"""Run a shell command, return (exit_code, stdout, stderr)."""
|
| 125 |
+
out = sbx.commands.run(cmd, timeout=timeout_s)
|
| 126 |
+
return (out.exit_code, out.stdout or "", out.stderr or "")
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def main() -> int:
|
| 130 |
+
ap = argparse.ArgumentParser()
|
| 131 |
+
ap.add_argument("--backend", choices=["hf", "vllm"], default="hf")
|
| 132 |
+
ap.add_argument("--model", default="Qwen/Qwen3.5-397B-A17B:together")
|
| 133 |
+
ap.add_argument("--vllm-url", default="")
|
| 134 |
+
ap.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", ""))
|
| 135 |
+
ap.add_argument("--sandbox-timeout-s", type=int, default=900)
|
| 136 |
+
ap.add_argument("--idle-hold-s", type=int, default=1200,
|
| 137 |
+
help="keep the sandbox alive for this many seconds after boot")
|
| 138 |
+
args = ap.parse_args()
|
| 139 |
+
|
| 140 |
+
if args.backend == "hf":
|
| 141 |
+
if not args.hf_token:
|
| 142 |
+
print("ERROR: --backend hf needs --hf-token or $HF_TOKEN", file=sys.stderr)
|
| 143 |
+
return 2
|
| 144 |
+
base_url = "https://router.huggingface.co/v1"
|
| 145 |
+
api_key = args.hf_token
|
| 146 |
+
else:
|
| 147 |
+
if not args.vllm_url:
|
| 148 |
+
print("ERROR: --backend vllm needs --vllm-url", file=sys.stderr)
|
| 149 |
+
return 2
|
| 150 |
+
base_url = args.vllm_url.rstrip("/")
|
| 151 |
+
if not base_url.endswith("/v1"):
|
| 152 |
+
base_url += "/v1"
|
| 153 |
+
api_key = "anything"
|
| 154 |
+
|
| 155 |
+
if not os.environ.get("E2B_API_KEY"):
|
| 156 |
+
print("ERROR: E2B_API_KEY not set", file=sys.stderr)
|
| 157 |
+
return 2
|
| 158 |
+
|
| 159 |
+
log(f"[1/7] creating sandbox (timeout={args.sandbox_timeout_s}s) β¦")
|
| 160 |
+
sbx = Sandbox.create(timeout=args.sandbox_timeout_s)
|
| 161 |
+
log(f" sandbox_id = {sbx.sandbox_id}")
|
| 162 |
+
|
| 163 |
+
try:
|
| 164 |
+
log("[2/7] mkdir config + logs β¦")
|
| 165 |
+
rc, out, err = run_shell(sbx, f"mkdir -p {CONFIG_DIR} {LOG_DIR}")
|
| 166 |
+
if rc != 0:
|
| 167 |
+
log(f" FAIL rc={rc} stderr={err[:500]}")
|
| 168 |
+
return 1
|
| 169 |
+
|
| 170 |
+
log(f"[3/7] writing {CONFIG_PATH} β¦")
|
| 171 |
+
cfg = build_opencode_json(
|
| 172 |
+
backend=args.backend,
|
| 173 |
+
model_id=args.model,
|
| 174 |
+
base_url=base_url,
|
| 175 |
+
api_key=api_key,
|
| 176 |
+
)
|
| 177 |
+
sbx.files.write(CONFIG_PATH, cfg)
|
| 178 |
+
log(f" backend={args.backend} model={args.model}")
|
| 179 |
+
log(f" baseURL={base_url}")
|
| 180 |
+
|
| 181 |
+
log("[4/7] installing opencode via curl opencode.ai/install β¦ (~10-30s cold)")
|
| 182 |
+
rc, out, err = run_shell(
|
| 183 |
+
sbx,
|
| 184 |
+
"curl -fsSL https://opencode.ai/install | bash 2>&1",
|
| 185 |
+
timeout_s=300,
|
| 186 |
+
)
|
| 187 |
+
log(f" install rc={rc}")
|
| 188 |
+
if out:
|
| 189 |
+
for line in out.strip().splitlines()[-8:]:
|
| 190 |
+
log(f" β {line}")
|
| 191 |
+
if rc != 0:
|
| 192 |
+
log(" stderr tail:")
|
| 193 |
+
for line in (err or "").strip().splitlines()[-10:]:
|
| 194 |
+
log(f" β {line}")
|
| 195 |
+
return 1
|
| 196 |
+
|
| 197 |
+
log("[5/7] verifying opencode binary β¦")
|
| 198 |
+
rc, out, err = run_shell(sbx, '$HOME/.opencode/bin/opencode --version')
|
| 199 |
+
log(f" opencode --version rc={rc} out={(out or '').strip()[:120]}")
|
| 200 |
+
if rc != 0:
|
| 201 |
+
log(f" stderr: {(err or '')[:400]}")
|
| 202 |
+
return 1
|
| 203 |
+
|
| 204 |
+
log(f"[6/7] starting opencode serve in bg on :{SERVE_PORT} β¦")
|
| 205 |
+
serve_cmd = (
|
| 206 |
+
'export PATH="$HOME/.opencode/bin:$PATH" && '
|
| 207 |
+
f"opencode serve --port {SERVE_PORT} --hostname 0.0.0.0 "
|
| 208 |
+
f"> {SERVE_LOG} 2>&1"
|
| 209 |
+
)
|
| 210 |
+
serve_bg = sbx.commands.run(serve_cmd, background=True)
|
| 211 |
+
log(f" serve pid = {getattr(serve_bg, 'pid', '?')}")
|
| 212 |
+
|
| 213 |
+
host = sbx.get_host(SERVE_PORT)
|
| 214 |
+
public_url = f"https://{host}"
|
| 215 |
+
log(f" public URL = {public_url}")
|
| 216 |
+
|
| 217 |
+
log("[7/7] waiting for /doc to answer (polls every 0.5s for 60s) β¦")
|
| 218 |
+
import httpx
|
| 219 |
+
ok = False
|
| 220 |
+
for i in range(120):
|
| 221 |
+
try:
|
| 222 |
+
r = httpx.get(f"{public_url}/doc", timeout=5)
|
| 223 |
+
if r.status_code == 200:
|
| 224 |
+
log(f" /doc ok (poll #{i+1}, {i*0.5:.1f}s)")
|
| 225 |
+
ok = True
|
| 226 |
+
break
|
| 227 |
+
elif i % 6 == 5: # ~every 3s print progress
|
| 228 |
+
log(f" /doc β HTTP {r.status_code} (still trying, {i*0.5:.1f}s)")
|
| 229 |
+
except Exception as exc:
|
| 230 |
+
if i % 6 == 5:
|
| 231 |
+
log(f" /doc unreachable ({type(exc).__name__}, {i*0.5:.1f}s)")
|
| 232 |
+
time.sleep(0.5)
|
| 233 |
+
if not ok:
|
| 234 |
+
log(" /doc never answered β tailing serve log (last 2KB):")
|
| 235 |
+
try:
|
| 236 |
+
tail = sbx.files.read(SERVE_LOG)[-2000:]
|
| 237 |
+
except Exception as exc:
|
| 238 |
+
tail = f"(could not read log: {exc})"
|
| 239 |
+
for line in tail.splitlines()[-40:]:
|
| 240 |
+
log(f" β {line}")
|
| 241 |
+
return 1
|
| 242 |
+
|
| 243 |
+
print("\n" + "=" * 70)
|
| 244 |
+
print("sandbox is up β manual probe recipes:")
|
| 245 |
+
print("=" * 70)
|
| 246 |
+
print(f"curl -s {public_url}/global/health | jq .")
|
| 247 |
+
print(f"curl -s {public_url}/config | jq '.model, .provider'")
|
| 248 |
+
print()
|
| 249 |
+
print(f"SID=$(curl -s -X POST {public_url}/session \\")
|
| 250 |
+
print(" -H 'content-type: application/json' \\")
|
| 251 |
+
print(" -d '{\"title\":\"smoke\"}' | jq -r .id)")
|
| 252 |
+
print(f"curl -X POST {public_url}/session/$SID/prompt_async \\")
|
| 253 |
+
print(" -H 'content-type: application/json' \\")
|
| 254 |
+
print(" -d '{\"parts\":[{\"type\":\"text\",\"text\":\"write hello.py and run it\"}]}'")
|
| 255 |
+
print(f"curl -N {public_url}/event # SSE stream")
|
| 256 |
+
print()
|
| 257 |
+
print(f"serve log: sbx.files.read('{SERVE_LOG}')")
|
| 258 |
+
print(f"sandbox_id: {sbx.sandbox_id}")
|
| 259 |
+
print(f"holding for up to {args.idle_hold_s}s β Ctrl-C to close")
|
| 260 |
+
print("=" * 70 + "\n")
|
| 261 |
+
|
| 262 |
+
stopper = {"stop": False}
|
| 263 |
+
def _sigh(*_a):
|
| 264 |
+
print("\nsignal β closing sandbox")
|
| 265 |
+
stopper["stop"] = True
|
| 266 |
+
signal.signal(signal.SIGINT, _sigh)
|
| 267 |
+
signal.signal(signal.SIGTERM, _sigh)
|
| 268 |
+
|
| 269 |
+
# Periodic /doc ping so we catch opencode-serve crashes in real time.
|
| 270 |
+
# Any non-200 (incl. E2B's 502 "port not open") is a crash signal β
|
| 271 |
+
# dump serve.log and stop the hold.
|
| 272 |
+
import httpx
|
| 273 |
+
last_ok_ts = time.time()
|
| 274 |
+
deadline = time.time() + args.idle_hold_s
|
| 275 |
+
def _dump_serve_log() -> None:
|
| 276 |
+
try:
|
| 277 |
+
tail = sbx.files.read(SERVE_LOG)
|
| 278 |
+
log(" --- serve.log tail (last 4KB) ---")
|
| 279 |
+
for line in tail[-4000:].splitlines()[-60:]:
|
| 280 |
+
log(f" β {line}")
|
| 281 |
+
log(" --- end serve.log ---")
|
| 282 |
+
except Exception as exc2:
|
| 283 |
+
log(f" could not read serve.log: {exc2}")
|
| 284 |
+
# Also list workdir so we can see if the agent did anything.
|
| 285 |
+
try:
|
| 286 |
+
rc, out, err = run_shell(sbx, "ls -la /home/user/workdir 2>&1 | head -40")
|
| 287 |
+
log(" --- workdir ls ---")
|
| 288 |
+
for line in (out or err).splitlines():
|
| 289 |
+
log(f" β {line}")
|
| 290 |
+
except Exception:
|
| 291 |
+
pass
|
| 292 |
+
while time.time() < deadline and not stopper["stop"]:
|
| 293 |
+
try:
|
| 294 |
+
r = httpx.get(f"{public_url}/doc", timeout=5)
|
| 295 |
+
if r.status_code == 200:
|
| 296 |
+
last_ok_ts = time.time()
|
| 297 |
+
else:
|
| 298 |
+
log(f"!!! /doc β HTTP {r.status_code} "
|
| 299 |
+
f"(last ok {time.time()-last_ok_ts:.1f}s ago) β "
|
| 300 |
+
f"opencode serve appears dead, dumping log")
|
| 301 |
+
_dump_serve_log()
|
| 302 |
+
break
|
| 303 |
+
except Exception as exc:
|
| 304 |
+
log(f"!!! /doc probe failed: {type(exc).__name__}: {exc} "
|
| 305 |
+
f"(last ok {time.time()-last_ok_ts:.1f}s ago)")
|
| 306 |
+
_dump_serve_log()
|
| 307 |
+
break
|
| 308 |
+
time.sleep(10.0)
|
| 309 |
+
return 0
|
| 310 |
+
|
| 311 |
+
finally:
|
| 312 |
+
try:
|
| 313 |
+
print("killing sandbox β¦")
|
| 314 |
+
sbx.kill()
|
| 315 |
+
except Exception as exc:
|
| 316 |
+
print(f" kill failed (probably already dead): {exc}")
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
if __name__ == "__main__":
|
| 320 |
+
sys.exit(main())
|
server/transcript.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared transcript rendering used by both UIs.
|
| 2 |
+
|
| 3 |
+
Both ``local_ui.py`` (driving a raw ``opencode serve``) and the deployed
|
| 4 |
+
``server/gradio_ui.py`` (driving an in-sandbox ``opencode serve`` through
|
| 5 |
+
the env's MCP tools) consume the same opencode message+parts shape:
|
| 6 |
+
|
| 7 |
+
messages: [
|
| 8 |
+
{
|
| 9 |
+
"info": {id, role, sessionID, time, ...},
|
| 10 |
+
"parts": [
|
| 11 |
+
{"type": "step-start", ...},
|
| 12 |
+
{"type": "reasoning", "text": ..., "id": ...},
|
| 13 |
+
{"type": "text", "text": ..., "id": ...},
|
| 14 |
+
{"type": "tool", "tool": "...", "state": {status, input, output}, ...},
|
| 15 |
+
{"type": "step-finish", "tokens": {...}, ...},
|
| 16 |
+
],
|
| 17 |
+
},
|
| 18 |
+
...
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
or the flat SSE form:
|
| 22 |
+
|
| 23 |
+
events: [{"type": "message.part.updated", "properties": {"part": {...}}}, ...]
|
| 24 |
+
|
| 25 |
+
Both reduce to an ordered list of parts keyed on ``part.id``.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
from __future__ import annotations
|
| 29 |
+
|
| 30 |
+
import html as _html
|
| 31 |
+
import json
|
| 32 |
+
from typing import Any
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# ββ Part collection ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def collect_parts_from_events(events: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 39 |
+
"""Reduce SSE ``message.part.updated`` frames to latest snapshot per ``part.id``.
|
| 40 |
+
|
| 41 |
+
Used by ``local_ui.py`` (direct SSE consumer).
|
| 42 |
+
"""
|
| 43 |
+
order: list[str] = []
|
| 44 |
+
latest: dict[str, dict[str, Any]] = {}
|
| 45 |
+
for ev in events:
|
| 46 |
+
if ev.get("type") != "message.part.updated":
|
| 47 |
+
continue
|
| 48 |
+
p = (ev.get("properties") or {}).get("part") or {}
|
| 49 |
+
pid = p.get("id")
|
| 50 |
+
if not pid:
|
| 51 |
+
continue
|
| 52 |
+
if pid not in latest:
|
| 53 |
+
order.append(pid)
|
| 54 |
+
latest[pid] = p
|
| 55 |
+
return [latest[i] for i in order]
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def collect_parts_from_messages(
|
| 59 |
+
messages: list[dict[str, Any]],
|
| 60 |
+
) -> list[dict[str, Any]]:
|
| 61 |
+
"""Flatten the ``GET /session/:id/message`` shape into an ordered parts list.
|
| 62 |
+
|
| 63 |
+
Used by the deployed Gradio UI which polls via ``get_messages`` MCP tool.
|
| 64 |
+
Message order is preserved; within a message the server returns parts in
|
| 65 |
+
emission order so no further sorting is needed.
|
| 66 |
+
"""
|
| 67 |
+
parts: list[dict[str, Any]] = []
|
| 68 |
+
for m in messages or []:
|
| 69 |
+
if not isinstance(m, dict):
|
| 70 |
+
continue
|
| 71 |
+
for p in m.get("parts") or []:
|
| 72 |
+
if isinstance(p, dict):
|
| 73 |
+
parts.append(p)
|
| 74 |
+
return parts
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# ββ Rendering ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _esc(s: Any) -> str:
|
| 81 |
+
return _html.escape("" if s is None else str(s))
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _cap(s: str, n: int = 6000) -> str:
|
| 85 |
+
if len(s) <= n:
|
| 86 |
+
return s
|
| 87 |
+
return s[:n] + f"\n⦠({len(s) - n} chars hidden)"
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _todo_icon(status: str | None) -> str:
|
| 91 |
+
return {"completed": "β
", "in_progress": "π"}.get(status or "", "β³")
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def fmt_tool(name: str, state: dict[str, Any], raw: dict[str, Any]) -> str:
|
| 95 |
+
"""Per-tool card β mirrors opencode's own UI shapes."""
|
| 96 |
+
status = (state or {}).get("status") or "?"
|
| 97 |
+
inp = (state or {}).get("input") or raw.get("input") or {}
|
| 98 |
+
out = (state or {}).get("output") or raw.get("output") or ""
|
| 99 |
+
badge = {"completed": "ok", "error": "err", "running": "run"}.get(status, "")
|
| 100 |
+
|
| 101 |
+
if name == "read":
|
| 102 |
+
summary = f"π read <code>{_esc(inp.get('filePath') or inp.get('path'))}</code>"
|
| 103 |
+
body = f"<pre>{_esc(_cap(str(out)))}</pre>"
|
| 104 |
+
elif name == "write":
|
| 105 |
+
path = inp.get("filePath") or inp.get("path")
|
| 106 |
+
content = inp.get("content") or ""
|
| 107 |
+
summary = f"βοΈ write <code>{_esc(path)}</code> ({len(content)} chars)"
|
| 108 |
+
body = f"<pre>{_esc(_cap(content))}</pre>"
|
| 109 |
+
elif name == "edit":
|
| 110 |
+
path = inp.get("filePath") or inp.get("path")
|
| 111 |
+
old = inp.get("oldString") or ""
|
| 112 |
+
new = inp.get("newString") or ""
|
| 113 |
+
summary = f"βοΈ edit <code>{_esc(path)}</code>"
|
| 114 |
+
body = (
|
| 115 |
+
f"<div class='lbl'>- old</div><pre class='del'>{_esc(_cap(old, 3000))}</pre>"
|
| 116 |
+
f"<div class='lbl'>+ new</div><pre class='add'>{_esc(_cap(new, 3000))}</pre>"
|
| 117 |
+
)
|
| 118 |
+
if out:
|
| 119 |
+
body += f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 2000))}</pre>"
|
| 120 |
+
elif name == "bash":
|
| 121 |
+
cmd = inp.get("command") or inp.get("cmd") or ""
|
| 122 |
+
summary = f"β‘ bash <code>{_esc(cmd[:160])}</code>"
|
| 123 |
+
body = f"<pre>{_esc(_cap(str(out)))}</pre>"
|
| 124 |
+
elif name in ("glob", "find"):
|
| 125 |
+
pattern = inp.get("pattern") or inp.get("query") or ""
|
| 126 |
+
summary = f"π {name} <code>{_esc(pattern)}</code>"
|
| 127 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 128 |
+
elif name == "grep":
|
| 129 |
+
pattern = inp.get("pattern") or ""
|
| 130 |
+
path = inp.get("path") or ""
|
| 131 |
+
summary = f"π grep <code>{_esc(pattern)}</code>" + (
|
| 132 |
+
f" in <code>{_esc(path)}</code>" if path else ""
|
| 133 |
+
)
|
| 134 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 135 |
+
elif name == "todowrite":
|
| 136 |
+
todos = inp.get("todos") or []
|
| 137 |
+
summary = f"π todowrite ({len(todos)} items)"
|
| 138 |
+
body = "<ul>" + "".join(
|
| 139 |
+
f"<li>{_todo_icon(t.get('status'))} {_esc(t.get('content'))}</li>"
|
| 140 |
+
for t in todos
|
| 141 |
+
) + "</ul>"
|
| 142 |
+
elif name == "task":
|
| 143 |
+
desc = inp.get("description") or inp.get("prompt") or ""
|
| 144 |
+
summary = f"π§© task β {_esc(desc[:160])}"
|
| 145 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 146 |
+
elif name == "webfetch":
|
| 147 |
+
summary = f"π webfetch <code>{_esc(inp.get('url'))}</code>"
|
| 148 |
+
body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 149 |
+
else:
|
| 150 |
+
summary = f"π§ {_esc(name)}"
|
| 151 |
+
body = (
|
| 152 |
+
f"<div class='lbl'>input</div><pre>{_esc(_cap(json.dumps(inp, indent=2, default=str), 4000))}</pre>"
|
| 153 |
+
f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 4000))}</pre>"
|
| 154 |
+
)
|
| 155 |
+
return (
|
| 156 |
+
"<details class='tool' open>"
|
| 157 |
+
f"<summary>{summary} <span class='badge {badge}'>{_esc(status)}</span></summary>"
|
| 158 |
+
f"<div class='tbody'>{body}</div>"
|
| 159 |
+
"</details>"
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def render_transcript(
|
| 164 |
+
parts: list[dict[str, Any]], errors: list[str] | None = None
|
| 165 |
+
) -> str:
|
| 166 |
+
"""Render a parts list as HTML cards. Emits wrapped CSS-friendly markup.
|
| 167 |
+
|
| 168 |
+
Consumers should inject the CSS from :data:`TRANSCRIPT_CSS`.
|
| 169 |
+
"""
|
| 170 |
+
out: list[str] = []
|
| 171 |
+
if errors:
|
| 172 |
+
out.append(
|
| 173 |
+
"<div class='errbox'><b>β οΈ errors</b><ul>"
|
| 174 |
+
+ "".join(f"<li>{_esc(e)}</li>" for e in errors[:8])
|
| 175 |
+
+ "</ul></div>"
|
| 176 |
+
)
|
| 177 |
+
if not parts:
|
| 178 |
+
out.append("<div class='empty'>waiting for first partβ¦</div>")
|
| 179 |
+
return "".join(out)
|
| 180 |
+
out.append("<div class='chat'>")
|
| 181 |
+
for p in parts:
|
| 182 |
+
t = p.get("type")
|
| 183 |
+
if t == "step-start":
|
| 184 |
+
out.append("<div class='step'>ββ new step ββ</div>")
|
| 185 |
+
elif t == "reasoning":
|
| 186 |
+
txt = (p.get("text") or "").strip()
|
| 187 |
+
if txt:
|
| 188 |
+
out.append(
|
| 189 |
+
"<details class='reasoning'><summary>π§ reasoning</summary>"
|
| 190 |
+
f"<pre>{_esc(_cap(txt, 4000))}</pre></details>"
|
| 191 |
+
)
|
| 192 |
+
elif t == "text":
|
| 193 |
+
txt = (p.get("text") or "").strip()
|
| 194 |
+
if txt:
|
| 195 |
+
out.append(f"<div class='assistant'><pre>{_esc(txt)}</pre></div>")
|
| 196 |
+
elif t == "tool":
|
| 197 |
+
out.append(fmt_tool(p.get("tool") or "?", p.get("state") or {}, p))
|
| 198 |
+
elif t == "step-finish":
|
| 199 |
+
tokens = p.get("tokens") or (p.get("state") or {}).get("tokens") or {}
|
| 200 |
+
if tokens:
|
| 201 |
+
out.append(
|
| 202 |
+
f"<div class='stepfin'>tokens: "
|
| 203 |
+
f"{_esc(json.dumps(tokens, default=str))}</div>"
|
| 204 |
+
)
|
| 205 |
+
out.append("</div>")
|
| 206 |
+
return "".join(out)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
TRANSCRIPT_CSS = """
|
| 210 |
+
.chat { font-size:14px; }
|
| 211 |
+
.assistant pre { background:#0e1013; padding:10px; border-radius:8px;
|
| 212 |
+
white-space:pre-wrap; color:#eee; margin:6px 0; }
|
| 213 |
+
.reasoning { opacity:0.8; margin:4px 0; }
|
| 214 |
+
.reasoning pre { background:#0a0b0d; color:#aab; padding:8px; white-space:pre-wrap; }
|
| 215 |
+
.tool { border:1px solid #2a2f3a; border-radius:8px; padding:6px 10px;
|
| 216 |
+
margin:6px 0; background:#12161c; }
|
| 217 |
+
.tool summary { cursor:pointer; color:#ddd; }
|
| 218 |
+
.tool code { background:#222; color:#9cf; padding:1px 4px; border-radius:3px; }
|
| 219 |
+
.tbody { margin-top:6px; }
|
| 220 |
+
.tbody pre { background:#0a0b0d; padding:8px; border-radius:4px;
|
| 221 |
+
white-space:pre-wrap; max-height:400px; overflow:auto;
|
| 222 |
+
font-size:12px; color:#ddd; margin:2px 0; }
|
| 223 |
+
.tbody pre.add { border-left:3px solid #2e6; }
|
| 224 |
+
.tbody pre.del { border-left:3px solid #e53; }
|
| 225 |
+
.tbody .lbl { color:#888; font-size:11px; margin-top:6px; }
|
| 226 |
+
.badge { padding:1px 6px; border-radius:8px; font-size:11px;
|
| 227 |
+
background:#333; color:#ddd; }
|
| 228 |
+
.badge.ok { background:#1f6f43; color:white; }
|
| 229 |
+
.badge.err { background:#7a1e1e; color:white; }
|
| 230 |
+
.badge.run { background:#7a5c1e; color:white; }
|
| 231 |
+
.step { color:#555; text-align:center; margin:10px 0; font-size:11px; }
|
| 232 |
+
.stepfin { color:#666; font-size:11px; margin:4px 0 12px; }
|
| 233 |
+
.empty { color:#666; font-style:italic; padding:12px; }
|
| 234 |
+
.errbox { background:#2a1414; border:1px solid #7a1e1e; border-radius:6px;
|
| 235 |
+
padding:6px 10px; margin:6px 0; color:#f88; font-size:13px; }
|
| 236 |
+
.errbox ul { margin:2px 0 0 18px; }
|
| 237 |
+
"""
|