Spaces:

AdithyaSK
/

opencode-env-rollout

Sleeping

App Files Files Community

AdithyaSK HF Staff commited on 11 days ago

Commit

d4d3fde

verified ·

1 Parent(s): 19f2fe7

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.dockerignore +10 -0
.env.example +11 -0
.gitignore +9 -0
README.md +0 -1
local_ui.py +517 -0
server/catalog.py +200 -0
server/gradio_ui.py +441 -138
server/opencode_environment.py +106 -36
server/sandbox_smoke.py +320 -0
server/transcript.py +237 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,10 @@

+.venv
+__pycache__
+*.pyc
+*.pyo
+*.egg-info
+.env
+.git
+.gitignore
+README.md
+*.md

.env.example ADDED Viewed

	@@ -0,0 +1,11 @@

+# Required
+E2B_API_KEY=e2b_...
+# Optional - for Mode A rollouts against real OpenAI
+OPENAI_API_KEY=sk-...
+# Optional - max concurrent sandbox sessions per environment (default: 4)
+MAX_CONCURRENT_ENVS=4
+# Optional - enable the Gradio UI mounted at /
+ENABLE_WEB_INTERFACE=true

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.venv/
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+.env
+*.egg-info
+.pytest_cache/
+.gradio/

README.md CHANGED Viewed

@@ -6,7 +6,6 @@ colorTo: pink
 sdk: docker
 app_port: 8000
 pinned: false
-base_path: /web
 ---
 # opencode-openenv

 sdk: docker
 app_port: 8000
 pinned: false
 ---
 # opencode-openenv

local_ui.py ADDED Viewed

	@@ -0,0 +1,517 @@

+"""Chat-style Gradio UI for a locally-running ``opencode serve``.
+Prereq: ``opencode serve`` on http://127.0.0.1:4096.
+Run:
+    uv run --with gradio --with httpx python local_ui.py
+"""
+from __future__ import annotations
+import html as _html
+import json
+import threading
+import time
+from typing import Any, Generator
+import gradio as gr
+import httpx
+BASE = "http://127.0.0.1:4096"
+# ── HTTP helpers ────────────────────────────────────────────────────────────
+def _get(path: str, **kw) -> Any:
+    r = httpx.get(f"{BASE}{path}", timeout=15, **kw)
+    r.raise_for_status()
+    return r.json()
+def _create_session() -> str:
+    return httpx.post(f"{BASE}/session", json={"title": "gradio"}, timeout=15).json()["id"]
+def _fire_async(sid: str, prompt: str) -> None:
+    httpx.post(
+        f"{BASE}/session/{sid}/prompt_async",
+        json={"parts": [{"type": "text", "text": prompt}]},
+        timeout=30,
+    ).raise_for_status()
+def _abort(sid: str) -> None:
+    try:
+        httpx.post(f"{BASE}/session/{sid}/abort", timeout=10)
+    except Exception:
+        pass
+def _session_diff(sid: str) -> list[dict]:
+    try:
+        return _get(f"/session/{sid}/diff") or []
+    except Exception:
+        return []
+def _session_todo(sid: str) -> list[dict]:
+    try:
+        return _get(f"/session/{sid}/todo") or []
+    except Exception:
+        return []
+# ── Server identity ────────────────────────────────────────────────────────
+def _banner() -> str:
+    try:
+        h = _get("/global/health")
+        c = _get("/config")
+        prov = (c.get("provider") or {}).get("vllm") or {}
+        opts = prov.get("options") or {}
+        model = c.get("model") or "?"
+        base_url = opts.get("baseURL") or "?"
+        limit = next(iter(prov.get("models", {}).values()), {}).get("limit") or {}
+        try:
+            tools = _get("/experimental/tool/ids") or []
+        except Exception:
+            tools = []
+        tool_line = (
+            f"<div class='tools'>tools: {', '.join(_esc(t) for t in tools)}</div>"
+            if tools else ""
+        )
+        return (
+            "<div class='banner'>"
+            f"<span class='chip ok'>opencode v{_esc(h.get('version','?'))}</span> "
+            f"<span class='chip'>model: <code>{_esc(model)}</code></span> "
+            f"<span class='chip'>baseURL: <code>{_esc(base_url)}</code></span> "
+            f"<span class='chip'>ctx: <code>{limit.get('context','?')}</code></span> "
+            f"<span class='chip'>out: <code>{limit.get('output','?')}</code></span>"
+            f"</div>{tool_line}"
+        )
+    except Exception as exc:
+        return f"<div class='banner'><span class='chip err'>server unreachable: {_esc(exc)}</span></div>"
+# ── SSE ────────────────────────────────────────────────────────────────────
+def _stream(sid_filter: str, events: list, stop: threading.Event) -> None:
+    """Tail GET /event, append every frame (caller filters)."""
+    try:
+        with httpx.stream("GET", f"{BASE}/event", timeout=None) as r:
+            for line in r.iter_lines():
+                if stop.is_set():
+                    return
+                if not line or not line.startswith("data:"):
+                    continue
+                try:
+                    events.append(json.loads(line[5:].strip()))
+                except Exception:
+                    pass
+    except Exception:
+        return
+# ── Part + delta assembly ──────────────────────────────────────────────────
+def _assemble(events: list[dict]) -> tuple[list[dict], list[str]]:
+    """Reduce events to ordered parts and collect any error reasons.
+    - ``message.part.updated`` is authoritative per ``part.id``.
+    - ``message.part.delta`` frames for a text part whose last snapshot is
+      shorter than the accumulated delta are appended live so streaming
+      looks smooth.
+    """
+    order: list[str] = []
+    latest: dict[str, dict] = {}
+    deltas: dict[str, str] = {}
+    errors: list[str] = []
+    for ev in events:
+        t = ev.get("type")
+        props = ev.get("properties") or {}
+        if t == "message.part.updated":
+            p = props.get("part") or {}
+            pid = p.get("id")
+            if not pid:
+                continue
+            if pid not in latest:
+                order.append(pid)
+            latest[pid] = p
+            if (p.get("state") or {}).get("status") == "error":
+                err = (p.get("state") or {}).get("error") or "tool error"
+                errors.append(f"{p.get('tool','?')}: {err}")
+        elif t == "message.part.delta":
+            p = props.get("part") or {}
+            pid = p.get("partID") or p.get("id")
+            if not pid:
+                continue
+            delta = p.get("delta") or p.get("text") or ""
+            if isinstance(delta, str) and delta:
+                deltas[pid] = deltas.get(pid, "") + delta
+        elif t in ("error", "client.error"):
+            errors.append(_esc(props.get("reason") or ev.get("reason") or "unknown"))
+    # Splice in any deltas that exceed the latest snapshot (live streaming).
+    parts: list[dict] = []
+    for pid in order:
+        p = dict(latest[pid])
+        if p.get("type") == "text" and pid in deltas:
+            if len(deltas[pid]) > len(p.get("text") or ""):
+                p["text"] = deltas[pid]
+        parts.append(p)
+    return parts, errors
+# ── Rendering ──────────────────────────────────────────────────────────────
+def _esc(s: Any) -> str:
+    return _html.escape("" if s is None else str(s))
+def _cap(s: str, n: int = 6000) -> str:
+    if len(s) <= n:
+        return s
+    return s[:n] + f"\n… ({len(s) - n} chars hidden)"
+def _fmt_tool(name: str, state: dict, raw: dict) -> str:
+    status = (state or {}).get("status") or "?"
+    inp = (state or {}).get("input") or raw.get("input") or {}
+    out = (state or {}).get("output") or raw.get("output") or ""
+    badge = {"completed": "ok", "error": "err", "running": "run"}.get(status, "")
+    if name == "read":
+        summary = f"📖 read <code>{_esc(inp.get('filePath') or inp.get('path'))}</code>"
+        body = f"<pre>{_esc(_cap(str(out)))}</pre>"
+    elif name == "write":
+        path = inp.get("filePath") or inp.get("path")
+        content = inp.get("content") or ""
+        summary = f"✍️ write <code>{_esc(path)}</code> ({len(content)} chars)"
+        body = f"<pre>{_esc(_cap(content))}</pre>"
+    elif name == "edit":
+        path = inp.get("filePath") or inp.get("path")
+        old = inp.get("oldString") or ""
+        new = inp.get("newString") or ""
+        summary = f"✏️ edit <code>{_esc(path)}</code>"
+        body = (
+            f"<div class='lbl'>- old</div><pre class='del'>{_esc(_cap(old, 3000))}</pre>"
+            f"<div class='lbl'>+ new</div><pre class='add'>{_esc(_cap(new, 3000))}</pre>"
+        )
+        if out:
+            body += f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 2000))}</pre>"
+    elif name == "bash":
+        cmd = inp.get("command") or inp.get("cmd") or ""
+        summary = f"⚡ bash <code>{_esc(cmd[:160])}</code>"
+        body = f"<pre>{_esc(_cap(str(out)))}</pre>"
+    elif name in ("glob", "find"):
+        pattern = inp.get("pattern") or inp.get("query") or ""
+        summary = f"🔎 {name} <code>{_esc(pattern)}</code>"
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    elif name == "grep":
+        pattern = inp.get("pattern") or ""
+        path = inp.get("path") or ""
+        summary = f"🔎 grep <code>{_esc(pattern)}</code>" + (
+            f" in <code>{_esc(path)}</code>" if path else ""
+        )
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    elif name == "todowrite":
+        todos = inp.get("todos") or []
+        summary = f"📝 todowrite ({len(todos)} items)"
+        body = "<ul>" + "".join(
+            f"<li>{_todo_icon(t.get('status'))} {_esc(t.get('content'))}</li>"
+            for t in todos
+        ) + "</ul>"
+    elif name == "task":
+        desc = inp.get("description") or inp.get("prompt") or ""
+        summary = f"🧩 task — {_esc(desc[:160])}"
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    elif name == "webfetch":
+        summary = f"🌐 webfetch <code>{_esc(inp.get('url'))}</code>"
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    else:
+        summary = f"🔧 {_esc(name)}"
+        body = (
+            f"<div class='lbl'>input</div><pre>{_esc(_cap(json.dumps(inp, indent=2, default=str), 4000))}</pre>"
+            f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 4000))}</pre>"
+        )
+    return (
+        "<details class='tool' open>"
+        f"<summary>{summary} <span class='badge {badge}'>{_esc(status)}</span></summary>"
+        f"<div class='tbody'>{body}</div>"
+        "</details>"
+    )
+def _todo_icon(status: str | None) -> str:
+    return {"completed": "✅", "in_progress": "🔄"}.get(status or "", "⏳")
+def _render_transcript(parts: list[dict], errors: list[str]) -> str:
+    out: list[str] = []
+    if errors:
+        out.append(
+            "<div class='errbox'><b>⚠️ errors</b><ul>"
+            + "".join(f"<li>{_esc(e)}</li>" for e in errors[:8])
+            + "</ul></div>"
+        )
+    if not parts:
+        out.append("<div class='empty'>waiting for first part…</div>")
+        return "".join(out)
+    out.append("<div class='chat'>")
+    for p in parts:
+        t = p.get("type")
+        if t == "step-start":
+            out.append("<div class='step'>── new step ──</div>")
+        elif t == "reasoning":
+            txt = (p.get("text") or "").strip()
+            if txt:
+                out.append(
+                    "<details class='reasoning'><summary>🧠 reasoning</summary>"
+                    f"<pre>{_esc(_cap(txt, 4000))}</pre></details>"
+                )
+        elif t == "text":
+            txt = (p.get("text") or "").strip()
+            if txt:
+                out.append(f"<div class='assistant'><pre>{_esc(txt)}</pre></div>")
+        elif t == "tool":
+            out.append(_fmt_tool(p.get("tool") or "?", p.get("state") or {}, p))
+        elif t == "step-finish":
+            tokens = p.get("tokens") or (p.get("state") or {}).get("tokens") or {}
+            if tokens:
+                out.append(f"<div class='stepfin'>tokens: {_esc(json.dumps(tokens, default=str))}</div>")
+    out.append("</div>")
+    return "".join(out)
+def _render_todo(todos: list[dict]) -> str:
+    if not todos:
+        return ""
+    items = "".join(
+        f"<li>{_todo_icon(t.get('status'))} {_esc(t.get('content') or t.get('text',''))}</li>"
+        for t in todos
+    )
+    return f"<div class='todostrip'><b>plan</b><ul>{items}</ul></div>"
+def _render_diff(diffs: list[dict]) -> str:
+    if not diffs:
+        return ""
+    blocks = []
+    for d in diffs:
+        path = d.get("path") or d.get("file") or "?"
+        patch = d.get("patch") or d.get("diff") or ""
+        blocks.append(
+            f"<details class='diff'><summary>{_esc(path)}</summary>"
+            f"<pre>{_esc(_cap(patch, 6000))}</pre></details>"
+        )
+    return (
+        "<details class='diff-wrap' open>"
+        f"<summary>📋 session diff ({len(diffs)} files)</summary>"
+        f"{''.join(blocks)}</details>"
+    )
+# ── State ──────────────────────────────────────────────────────────────────
+class _State:
+    sid: str = ""  # empty → next Run creates a new session
+    stop: threading.Event | None = None
+    events: list[dict] = []  # reset per session
+    sse_thread: threading.Thread | None = None
+_STATE = _State()
+def _ensure_session() -> str:
+    """Create a session if none exists; reuse across runs for multi-turn."""
+    if _STATE.sid:
+        return _STATE.sid
+    _STATE.sid = _create_session()
+    _STATE.stop = threading.Event()
+    _STATE.events = []
+    _STATE.sse_thread = threading.Thread(
+        target=_stream, args=(_STATE.sid, _STATE.events, _STATE.stop), daemon=True
+    )
+    _STATE.sse_thread.start()
+    time.sleep(0.15)
+    return _STATE.sid
+def _new_session_cb() -> tuple[str, str, str, str]:
+    """Tear down any existing SSE and clear state. Next Run opens a fresh session."""
+    if _STATE.stop:
+        _STATE.stop.set()
+    if _STATE.sid:
+        _abort(_STATE.sid)
+    _STATE.sid = ""
+    _STATE.stop = None
+    _STATE.events = []
+    return (
+        "✨ new session — Run to start",
+        "",      # transcript
+        "",      # todo
+        "",      # diff
+    )
+# ── Main ───────────────────────────────────────────────────────────────────
+def run(prompt: str) -> Generator[tuple[str, str, str, str], None, None]:
+    try:
+        sid = _ensure_session()
+    except Exception as exc:
+        yield f"❌ session create failed: {exc}", "", "", ""
+        return
+    # Snapshot the event index BEFORE firing — "idle for THIS turn" must be
+    # scoped to events that arrive after the prompt is sent, otherwise the
+    # idle frame from the previous turn fires the break immediately.
+    turn_start = len(_STATE.events)
+    try:
+        _fire_async(sid, prompt)
+    except Exception as exc:
+        yield f"❌ prompt failed: {exc}", "", "", ""
+        return
+    t0 = time.time()
+    last_todo_refresh = 0.0
+    todos: list[dict] = []
+    while time.time() - t0 < 600:
+        new_events = _STATE.events[turn_start:]
+        idle = any(e.get("type") in ("session.idle", "idle") for e in new_events)
+        parts, errors = _assemble(_STATE.events)
+        if time.time() - last_todo_refresh > 3.0:
+            todos = _session_todo(sid)
+            last_todo_refresh = time.time()
+        status = (
+            f"{'✅ idle' if idle else '⚡ running'} · "
+            f"session <code>{sid[:18]}…</code> · "
+            f"{time.time()-t0:.1f}s · {len(parts)} parts · {len(_STATE.events)} events"
+        )
+        diff_html = ""
+        if idle:
+            diff_html = _render_diff(_session_diff(sid))
+        yield status, _render_transcript(parts, errors), _render_todo(todos), diff_html
+        if idle:
+            break
+        time.sleep(0.4)
+def abort_cb() -> str:
+    if _STATE.sid:
+        _abort(_STATE.sid)
+    # leave SSE open so user sees the abort-related events; actual teardown on new session
+    return "⏹ aborted (session kept — click New session to clear)"
+def refresh_banner() -> str:
+    return _banner()
+# ── CSS ────────────────────────────────────────────────────────────────────
+_CSS = """
+.banner { margin:4px 0 2px; }
+.tools { font-size:11px; color:#888; margin:2px 0 8px; }
+.chip { display:inline-block; padding:2px 8px; margin:2px; border-radius:10px;
+        background:#2b2d31; color:#ddd; font-size:12px; }
+.chip.ok  { background:#1f6f43; }
+.chip.err { background:#7a1e1e; }
+.chip code { background:transparent; color:#9ad; }
+.errbox { background:#2a1414; border:1px solid #7a1e1e; border-radius:6px;
+          padding:6px 10px; margin:6px 0; color:#f88; font-size:13px; }
+.errbox ul { margin:2px 0 0 18px; }
+.chat { font-size:14px; }
+.assistant pre { background:#0e1013; padding:10px; border-radius:8px;
+                 white-space:pre-wrap; color:#eee; margin:6px 0; }
+.reasoning { opacity:0.8; margin:4px 0; }
+.reasoning pre { background:#0a0b0d; color:#aab; padding:8px; white-space:pre-wrap; }
+.tool { border:1px solid #2a2f3a; border-radius:8px; padding:6px 10px;
+        margin:6px 0; background:#12161c; }
+.tool summary { cursor:pointer; color:#ddd; }
+.tool code { background:#222; color:#9cf; padding:1px 4px; border-radius:3px; }
+.tbody { margin-top:6px; }
+.tbody pre { background:#0a0b0d; padding:8px; border-radius:4px;
+             white-space:pre-wrap; max-height:400px; overflow:auto;
+             font-size:12px; color:#ddd; margin:2px 0; }
+.tbody pre.add { border-left:3px solid #2e6; }
+.tbody pre.del { border-left:3px solid #e53; }
+.tbody .lbl { color:#888; font-size:11px; margin-top:6px; }
+.badge { padding:1px 6px; border-radius:8px; font-size:11px;
+         background:#333; color:#ddd; }
+.badge.ok { background:#1f6f43; color:white; }
+.badge.err { background:#7a1e1e; color:white; }
+.badge.run { background:#7a5c1e; color:white; }
+.step { color:#555; text-align:center; margin:10px 0; font-size:11px; }
+.stepfin { color:#666; font-size:11px; margin:4px 0 12px; }
+.empty { color:#666; font-style:italic; padding:12px; }
+.todostrip { background:#14181e; border:1px solid #2a2f3a; border-radius:6px;
+             padding:6px 10px; margin:6px 0; font-size:13px; }
+.todostrip ul { margin:4px 0 0 18px; }
+.diff-wrap { margin:8px 0; }
+.diff summary { cursor:pointer; color:#9ad; font-family:monospace; }
+.diff pre { background:#0a0b0d; padding:8px; border-radius:4px;
+            white-space:pre; font-size:12px; color:#ddd; overflow:auto; }
+"""
+# ── Layout ─────────────────────────────────────────────────────────────────
+with gr.Blocks(title="opencode serve", css=_CSS) as demo:
+    banner_html = gr.HTML(value="_(loading…)_")
+    status_md = gr.Markdown()
+    todo_html = gr.HTML()
+    transcript_html = gr.HTML(value="<div class='empty'>run a prompt to start</div>")
+    diff_html = gr.HTML()
+    with gr.Row():
+        prompt = gr.Textbox(
+            label="Prompt",
+            value="Write fizzbuzz.py that prints FizzBuzz for 1..15 and run it.",
+            lines=3,
+            scale=5,
+        )
+        run_btn = gr.Button("▶ Run", variant="primary", scale=1)
+        with gr.Column(scale=1, min_width=120):
+            abort_btn = gr.Button("⏹ Abort", variant="stop")
+            new_btn = gr.Button("✨ New session")
+    run_btn.click(
+        run,
+        inputs=[prompt],
+        outputs=[status_md, transcript_html, todo_html, diff_html],
+    )
+    abort_btn.click(abort_cb, outputs=[status_md])
+    new_btn.click(
+        _new_session_cb,
+        outputs=[status_md, transcript_html, todo_html, diff_html],
+    )
+    demo.load(refresh_banner, outputs=[banner_html])
+if __name__ == "__main__":
+    import os
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=int(os.environ.get("GRADIO_PORT", "7861")),
+        share=True,
+        show_error=True,
+    )

server/catalog.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""Curated Qwen model catalog for the OpenCode OpenEnv server.
+Lives in the server (not the primitive) because routing decisions —
+which HF router backend to pick for a given Qwen repo, what counts as
+the "default" model, whether a model supports thinking — are
+deployment concerns, not harness concerns. The primitive remains
+provider-agnostic; this catalog is what the Gradio UI and the MCP
+tools consult to turn a UI selection into a concrete
+``(base_url, api_key, model_string, disable_thinking)`` quadruple.
+Backends supported:
+- ``vllm``       — user-supplied OpenAI-compatible endpoint (e.g. cloudflared
+                   tunnel to ``vllm serve``, or a colocated vLLM server).
+- ``hf_router``  — Hugging Face Inference Providers router at
+                   ``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``.
+                   Model id carries a ``:provider`` suffix to pick the HF
+                   backend (``:together``, ``:scaleway``, ``:nscale``, ...).
+Only HF providers verified to return ``logprobs`` are listed (see
+``DOCS/HF/hf_inference_providers_logprobs.md``).
+"""
+from __future__ import annotations
+from typing import Literal
+from pydantic import BaseModel
+BackendKind = Literal["vllm", "hf_router"]
+HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"
+class CatalogModel(BaseModel):
+    """One model entry in the curated Qwen catalog."""
+    #: Canonical HF-Hub repo id (no ``:provider`` suffix).
+    repo: str
+    #: Backend kind — drives routing + auth shape.
+    backend: BackendKind
+    #: For ``hf_router`` entries, the ``:<provider>`` suffix HF uses to
+    #: force a specific backend inference provider. Empty for ``vllm``.
+    hf_route: str = ""
+    #: Whether this model supports Qwen-style thinking mode.
+    supports_thinking: bool = False
+    #: Short human-readable label for UI dropdowns.
+    label: str = ""
+    @property
+    def dropdown_key(self) -> str:
+        """Stable unique key for UI selectors."""
+        if self.backend == "hf_router":
+            return f"hf-router://{self.repo}{self.hf_route}"
+        return f"vllm://{self.repo}"
+    @property
+    def opencode_model_string(self) -> str:
+        """Model id opencode should send to the endpoint.
+        For HF router we bake the ``:provider`` suffix into the model
+        string so the HF router picks the right backend.
+        """
+        if self.backend == "hf_router":
+            return f"{self.repo}{self.hf_route}"
+        return self.repo
+# Ordered: self-hosted vLLM first (default), then HF router options.
+CATALOG: list[CatalogModel] = [
+    # --- Local vLLM (tunneled or colocated) ---
+    CatalogModel(
+        repo="Qwen/Qwen3.5-4B",
+        backend="vllm",
+        supports_thinking=True,
+        label="Qwen3.5-4B (self-hosted vLLM)",
+    ),
+    # --- HF Inference Router (Together / Scaleway / Nscale) ---
+    CatalogModel(
+        repo="Qwen/Qwen3.5-397B-A17B",
+        backend="hf_router",
+        hf_route=":together",
+        supports_thinking=True,
+        label="Qwen3.5-397B-A17B — HF/Together",
+    ),
+    CatalogModel(
+        repo="Qwen/Qwen3.5-397B-A17B",
+        backend="hf_router",
+        hf_route=":scaleway",
+        supports_thinking=True,
+        label="Qwen3.5-397B-A17B — HF/Scaleway",
+    ),
+    CatalogModel(
+        repo="Qwen/Qwen3-Coder-480B-A35B-Instruct",
+        backend="hf_router",
+        hf_route=":together",
+        supports_thinking=False,
+        label="Qwen3-Coder-480B — HF/Together",
+    ),
+    CatalogModel(
+        repo="Qwen/Qwen3-235B-A22B-Instruct-2507",
+        backend="hf_router",
+        hf_route=":nscale",
+        supports_thinking=False,
+        label="Qwen3-235B-A22B-2507 — HF/Nscale",
+    ),
+    CatalogModel(
+        repo="Qwen/Qwen3-4B-Instruct-2507",
+        backend="hf_router",
+        hf_route=":nscale",
+        supports_thinking=False,
+        label="Qwen3-4B-Instruct-2507 — HF/Nscale",
+    ),
+    CatalogModel(
+        repo="Qwen/Qwen3-Coder-30B-A3B-Instruct",
+        backend="hf_router",
+        hf_route=":scaleway",
+        supports_thinking=False,
+        label="Qwen3-Coder-30B-A3B — HF/Scaleway",
+    ),
+]
+def by_key(key: str) -> CatalogModel:
+    """Look up a catalog entry by ``dropdown_key``.
+    Falls back to synthesising an ad-hoc entry from the key's prefix so
+    users can enter a custom vLLM model id or a custom HF-router model
+    id without editing the catalog:
+    - ``"vllm://<repo>"`` → ad-hoc vllm entry with ``repo`` as the model id.
+    - ``"hf-router://<repo>[:<provider>]"`` → ad-hoc hf_router entry; the
+      provider suffix (if present) is preserved verbatim in ``hf_route``.
+    """
+    for m in CATALOG:
+        if m.dropdown_key == key:
+            return m
+    if key.startswith("vllm://"):
+        repo = key[len("vllm://"):].strip()
+        if not repo:
+            raise KeyError(f"missing model id in key: {key!r}")
+        return CatalogModel(
+            repo=repo, backend="vllm", supports_thinking=False,
+            label=f"{repo} (custom vLLM)",
+        )
+    if key.startswith("hf-router://"):
+        rest = key[len("hf-router://"):].strip()
+        if not rest:
+            raise KeyError(f"missing model id in key: {key!r}")
+        if ":" in rest:
+            repo, _, suffix = rest.partition(":")
+            hf_route = ":" + suffix
+        else:
+            repo, hf_route = rest, ""
+        return CatalogModel(
+            repo=repo, backend="hf_router", hf_route=hf_route,
+            supports_thinking=False,
+            label=f"{repo}{hf_route} (custom HF Router)",
+        )
+    raise KeyError(f"unknown model key: {key!r}")
+def default_model() -> CatalogModel:
+    """First entry (self-hosted vLLM 4B)."""
+    return CATALOG[0]
+def resolve_endpoint(
+    model_key: str,
+    *,
+    vllm_url: str = "",
+    hf_token: str = "",
+) -> tuple[str, str, str, "CatalogModel"]:
+    """Translate a UI selection into ``(base_url, api_key, model_string, entry)``.
+    Raises ``ValueError`` with a clear message when a required secret is
+    missing so the UI can render a precise "please fill in X" message.
+    """
+    m = by_key(model_key)
+    if m.backend == "vllm":
+        vllm_url = (vllm_url or "").strip()
+        if not vllm_url:
+            raise ValueError(
+                f"model {m.dropdown_key!r} requires a vLLM base URL "
+                "(the tunneled or in-cluster /v1 endpoint)."
+            )
+        base = vllm_url.rstrip("/")
+        if not base.endswith("/v1"):
+            base = base + "/v1"
+        return base, "anything", m.opencode_model_string, m
+    if m.backend == "hf_router":
+        hf_token = (hf_token or "").strip()
+        if not hf_token:
+            raise ValueError(
+                f"model {m.dropdown_key!r} requires an HF token "
+                "(hf_... from https://huggingface.co/settings/tokens)."
+            )
+        return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m
+    raise ValueError(f"unknown backend: {m.backend}")

server/gradio_ui.py CHANGED Viewed

@@ -15,11 +15,27 @@ ticker instead of a frozen page.
 from __future__ import annotations
 import json
 import time
 from typing import Any
 import gradio as gr
 # ── Preset tasks ──────────────────────────────────────────────────────────
 # Shown in the dropdown. Each has instruction + matching bash verifier.
@@ -141,12 +157,20 @@ PRESET_TASKS: dict[str, tuple[str, str]] = {
 }
-_EXAMPLE_MODELS = [
-    "Qwen/Qwen3.5-4B",
-    "Qwen/Qwen3-Coder-Next",
-    "openai/gpt-4o-mini",
-    "openai/gpt-5.3-chat-latest",
 ]
 def opencode_ui_builder(
@@ -174,7 +198,7 @@ def opencode_ui_builder(
             _env_cache["instance"] = inst
         return inst
-    with gr.Blocks(title=title, analytics_enabled=False) as demo:
         gr.Markdown(
             f"# {title}\n"
             "Run one OpenCode rollout against any OpenAI-compatible endpoint. "
@@ -184,27 +208,56 @@ def opencode_ui_builder(
         )
         # ── Config ─────────────────────────────────────────────────────────
         with gr.Row():
-            with gr.Column(scale=1):
-                vllm_url = gr.Textbox(
-                    label="vLLM / LLM base URL",
-                    value="https://<your-public-llm-host>/v1",
-                    placeholder="https://.../v1",
                 )
-                model = gr.Textbox(
-                    label="Model id",
-                    value=_EXAMPLE_MODELS[0],
-                    placeholder="Qwen/Qwen3.5-4B",
-                )
-                provider = gr.Dropdown(
-                    label="Provider",
-                    choices=["openai_compatible", "openai", "anthropic"],
-                    value="openai_compatible",
                 )
-                api_key = gr.Textbox(
-                    label="API key (ignored by vLLM)",
-                    value="intercepted",
-                    type="password",
                 )
             with gr.Column(scale=1):
                 mode = gr.Dropdown(
@@ -221,82 +274,132 @@ def opencode_ui_builder(
                     minimum=60, maximum=1200, value=300, step=30,
                 )
-        # ── Task preset + fields ───────────────────────────────────────────
-        with gr.Row():
-            task_preset = gr.Dropdown(
-                label="Task preset",
-                choices=list(PRESET_TASKS.keys()),
-                value="hello",
-            )
-            task_id = gr.Textbox(
-                label="Task id (optional label)",
-                value="hello_demo",
             )
-        instruction = gr.Textbox(
-            label="Instruction",
-            value=PRESET_TASKS["hello"][0],
-            lines=4,
-        )
-        test_script = gr.Code(
-            label="test.sh — bash verifier. Must write a float reward to /home/user/logs/verifier/reward.txt",
-            value=PRESET_TASKS["hello"][1],
-            language="shell",
         )
-        setup_shell = gr.Textbox(
-            label="Setup shell (optional, runs before opencode)",
-            value="",
-            placeholder="e.g. pip install polars",
         )
-        # Wire dropdown → populate instruction + test.sh
-        def _on_preset_change(name: str):
-            ins, tst = PRESET_TASKS.get(name, ("", ""))
-            return gr.update(value=ins), gr.update(value=tst)
-        task_preset.change(
-            _on_preset_change,
-            inputs=[task_preset],
-            outputs=[instruction, test_script],
         )
         with gr.Row():
-            check_btn = gr.Button("🔎 Check LLM URL")
-            run_btn = gr.Button("▶ Run rollout (streaming)", variant="primary")
-            reset_btn = gr.Button("🔄 Reset", variant="secondary")
-        # ── Output panels ─────────────────────────────────────────────────
-        status = gr.Markdown()
         with gr.Row():
-            reward_out = gr.Number(label="reward", value=None, interactive=False)
-            wall_out = gr.Number(label="wall_s", value=None, interactive=False)
-            exit_out = gr.Number(label="exit_code", value=None, interactive=False)
-            turns_out = gr.Number(label="proxy_turns", value=None, interactive=False)
-        with gr.Accordion("Workdir files", open=True):
             workdir_md = gr.Markdown()
-        with gr.Accordion("Proxy trace (per turn)", open=False):
             proxy_trace_json = gr.JSON(label=None)
-        with gr.Accordion("Verifier stdout / stderr", open=False):
-            verifier_out = gr.Textbox(label="stdout", lines=8)
-            verifier_err = gr.Textbox(label="stderr", lines=4)
         with gr.Accordion("Raw result JSON", open=False):
             raw_json = gr.JSON(label=None)
         # ── Streaming Run handler ─────────────────────────────────────────
         def _run_streaming(
             vllm_url_v: str,
-            model_v: str,
-            provider_v: str,
-            api_key_v: str,
             mode_v: str,
             max_tokens_cap_v: int,
             agent_timeout_s_v: float,
             task_id_v: str,
             instruction_v: str,
-            test_script_v: str,
             setup_shell_v: str,
         ):
             """Gradio generator: yields UI updates as the rollout progresses.
             Uses the non-blocking fine-grained tools:
@@ -305,29 +408,52 @@ def opencode_ui_builder(
             import httpx
             from openenv.core.env_server.mcp_types import CallToolAction
-            # 0) Pre-flight: verify the LLM URL is reachable before burning
-            # an E2B sandbox on a URL typo.
             yield (
-                "🔎 **Validating LLM endpoint...**",
                 None, None, None, 0,
-                "", [], "", "", {"stage": "validate"},
             )
-            probe_url = vllm_url_v.rstrip("/")
-            if not probe_url.endswith("/v1"):
-                probe_url = probe_url + "/v1"
             try:
-                r = httpx.get(f"{probe_url}/models", timeout=15)
                 if r.status_code != 200:
                     yield _error_tuple(
-                        f"LLM URL {probe_url}/models returned HTTP {r.status_code}: {r.text[:200]}"
                     )
                     return
             except Exception as exc:
                 yield _error_tuple(
-                    f"LLM URL unreachable: {type(exc).__name__}: {exc}"
                 )
                 return
             try:
                 env = _get_env()
                 env.reset()
@@ -335,27 +461,23 @@ def opencode_ui_builder(
                 yield _error_tuple(f"env init failed: {type(exc).__name__}: {exc}")
                 return
-            # 1) start_rollout (returns in <1s — registry bookkeeping only)
             try:
                 start_obs = env.step(
                     CallToolAction(
                         tool_name="start_rollout",
                         arguments={
                             "vllm_url": vllm_url_v,
-                            "model": model_v,
                             "instruction": instruction_v,
                             "test_script": test_script_v,
                             "task_id": task_id_v,
                             "setup_shell": setup_shell_v,
                             "upload_files": {},
-                            "provider": provider_v,
-                            "api_key": api_key_v,
                             "mode": mode_v,
-                            # chat_template_kwargs.enable_thinking=false is a
-                            # harmless no-op for non-Qwen models (vLLM silently
-                            # ignores unknown template kwargs). Keep it on by
-                            # default so Qwen3/Qwen3.5 don't dump think blocks.
-                            "disable_thinking": True,
                             "max_tokens_cap": int(max_tokens_cap_v),
                             "agent_timeout_s": float(agent_timeout_s_v),
                         },
@@ -372,17 +494,21 @@ def opencode_ui_builder(
                 yield _error_tuple(f"start_rollout returned no rollout_id: {start_payload}")
                 return
-            # Initial UI update
             yield (
-                f"**Started rollout** `{rollout_id}` — waiting for first turn…",
                 None, None, None, 0,
                 "_(no files yet)_", [], "", "", start_payload,
             )
-            # 2) poll get_state every 2s, stream progress
             deadline = time.time() + float(agent_timeout_s_v) + 120
             status_str = "running"
-            state_payload: dict[str, Any] = {}
             while time.time() < deadline:
                 try:
                     state_obs = env.step(
@@ -390,27 +516,59 @@ def opencode_ui_builder(
                             tool_name="get_state",
                             arguments={"rollout_id": rollout_id},
                         ),
-                        timeout_s=30,
                     )
                     state_payload = _parse_result(state_obs)
                 except Exception as exc:
                     state_payload = {"error": f"{type(exc).__name__}: {exc}"}
                 status_str = state_payload.get("status", "?")
-                turns_so_far = state_payload.get("proxy_turns_so_far", 0)
-                elapsed = time.time() - float(start_payload.get("started_at") or time.time())
                 yield (
-                    f"**Rollout** `{rollout_id}` · status=`{status_str}` · "
-                    f"turns so far: `{turns_so_far}` · elapsed: `{elapsed:.1f}s`",
-                    None, None, None, turns_so_far,
                     "_(workdir populated on finalize)_",
                     [], "", "", state_payload,
                 )
                 if status_str == "done":
                     break
-                time.sleep(2.0)
             # 3) finalize_rollout — run verifier + collect full result
             try:
@@ -429,6 +587,38 @@ def opencode_ui_builder(
             status_md = _summarize_status(result)
             wd_md = _render_workdir(result.get("workdir_files") or {})
             turns = result.get("proxy_turns") or []
             yield (
                 status_md,
                 result.get("reward"),
@@ -437,69 +627,156 @@ def opencode_ui_builder(
                 len(turns),
                 wd_md,
                 turns,
-                (result.get("verifier_stdout") or "")[:4000],
-                (result.get("verifier_stderr") or "")[:2000],
                 result,
             )
         run_btn.click(
             _run_streaming,
             inputs=[
-                vllm_url, model, provider, api_key, mode,
                 max_tokens_cap, agent_timeout_s,
-                task_id, instruction, test_script, setup_shell,
-            ],
-            outputs=[
-                status, reward_out, wall_out, exit_out, turns_out,
-                workdir_md, proxy_trace_json,
-                verifier_out, verifier_err, raw_json,
             ],
         )
-        # Check-URL handler — cheap GET /v1/models probe. Wires here so the
-        # outputs (status, etc.) are already defined.
-        def _check_url(vllm_url_v: str) -> str:
             import httpx
-            url = vllm_url_v.rstrip("/")
-            if not url.endswith("/v1"):
-                url = url + "/v1"
-            models_url = f"{url}/models"
             try:
-                r = httpx.get(models_url, timeout=15)
             except Exception as exc:
                 return f"❌ `{models_url}` unreachable: `{type(exc).__name__}: {exc}`"
             if r.status_code != 200:
                 return f"❌ `{models_url}` → HTTP {r.status_code}\n```\n{r.text[:400]}\n```"
             try:
-                body = r.json()
-                ids = [m.get("id") for m in body.get("data", []) if m.get("id")]
             except Exception:
                 ids = []
             if ids:
-                return f"✅ reachable · served models: `{', '.join(ids)}`"
-            return "⚠️ reachable (HTTP 200) but no `data[*].id` in response"
-        check_btn.click(_check_url, inputs=[vllm_url], outputs=[status])
-        # Reset handler — drop cached env so the next Run creates a fresh
-        # OpenCodeEnvironment (new rollout registry, new state).
-        def _reset() -> tuple:
             _env_cache["instance"] = None
             return (
-                "🔄 **Reset.** Next Run will create a fresh environment.",
                 None, None, None, None,
                 "_(workdir cleared)_",
                 [], "", "", {"reset": True},
             )
         reset_btn.click(
             _reset,
-            inputs=[],
-            outputs=[
-                status, reward_out, wall_out, exit_out, turns_out,
-                workdir_md, proxy_trace_json,
-                verifier_out, verifier_err, raw_json,
-            ],
         )
     return demo
@@ -508,11 +785,37 @@ def opencode_ui_builder(
 # ── Helpers ─────────────────────────────────────────────────────────────────
-def _error_tuple(msg: str) -> tuple:
     return (
         f"❌ **Error:** `{msg}`",
         None, None, None, None,
         "", [], "", "", {"error": msg},
     )

 from __future__ import annotations
 import json
+import os
 import time
 from typing import Any
 import gradio as gr
+try:
+    from .catalog import CATALOG, by_key, default_model, resolve_endpoint
+    from .transcript import (
+        TRANSCRIPT_CSS,
+        collect_parts_from_messages,
+        render_transcript,
+    )
+except ImportError:  # pragma: no cover — support running as a script
+    from catalog import CATALOG, by_key, default_model, resolve_endpoint  # type: ignore
+    from transcript import (  # type: ignore
+        TRANSCRIPT_CSS,
+        collect_parts_from_messages,
+        render_transcript,
+    )
 # ── Preset tasks ──────────────────────────────────────────────────────────
 # Shown in the dropdown. Each has instruction + matching bash verifier.
 }
+_HF_MODEL_CHOICES = [
+    (m.label, m.dropdown_key) for m in CATALOG if m.backend == "hf_router"
 ]
+# Sentinel value used for the "type your own HF-router id" dropdown option.
+_CUSTOM_HF_KEY = "__custom_hf__"
+_HF_MODEL_CHOICES.append(("Custom — enter HF Router model id below", _CUSTOM_HF_KEY))
+_DEFAULT_HF_KEY = _HF_MODEL_CHOICES[0][1]
+_HF_TOKEN_ENV = os.environ.get("HF_TOKEN", "")
+# Suggested / recent vllm model ids (user can type anything).
+_VLLM_MODEL_SUGGESTIONS = [
+    m.repo for m in CATALOG if m.backend == "vllm"
+] + ["Qwen/Qwen3.5-4B", "Qwen/Qwen2.5-7B-Instruct"]
 def opencode_ui_builder(
             _env_cache["instance"] = inst
         return inst
+    with gr.Blocks(title=title, analytics_enabled=False, css=TRANSCRIPT_CSS) as demo:
         gr.Markdown(
             f"# {title}\n"
             "Run one OpenCode rollout against any OpenAI-compatible endpoint. "
         )
         # ── Config ─────────────────────────────────────────────────────────
+        # Two backends:
+        #   1. Self-hosted vLLM — user supplies model id + base URL.
+        #   2. Hosted (HF Router) — user picks from the curated Qwen
+        #      catalog, or selects "Custom" and types their own HF-router
+        #      model id (e.g. ``Qwen/Qwen3-8B:together``).
         with gr.Row():
+            with gr.Column(scale=3):
+                backend_mode = gr.Radio(
+                    label="Backend",
+                    choices=["Self-hosted vLLM", "Hosted (HF Router)"],
+                    value="Hosted (HF Router)",
                 )
+                # --- Self-hosted vLLM fields (shown only when selected) ---
+                with gr.Row(visible=False) as vllm_row:
+                    vllm_model = gr.Textbox(
+                        label="Model id (as served by your vLLM)",
+                        value=_VLLM_MODEL_SUGGESTIONS[0],
+                        placeholder="Qwen/Qwen3.5-4B",
+                        scale=1,
+                    )
+                    vllm_url = gr.Textbox(
+                        label="vLLM base URL",
+                        value="",
+                        placeholder="https://.../v1",
+                        scale=2,
+                    )
+                # --- Hosted HF Router fields (default visible) ---
+                with gr.Row(visible=True) as hf_row:
+                    hosted_model = gr.Dropdown(
+                        label="Hosted model",
+                        choices=_HF_MODEL_CHOICES,
+                        value=_DEFAULT_HF_KEY,
+                        scale=2,
+                    )
+                    hf_token = gr.Textbox(
+                        label="HF token",
+                        value=_HF_TOKEN_ENV,
+                        type="password",
+                        placeholder="hf_...",
+                        scale=2,
+                    )
+                hosted_custom_id = gr.Textbox(
+                    label="Custom HF-router model id",
+                    value="",
+                    placeholder="Qwen/Qwen3-8B:together (org/repo[:provider])",
+                    visible=False,
                 )
+                thinking = gr.Checkbox(
+                    label="Thinking mode (Qwen3.5 only)",
+                    value=False,
                 )
             with gr.Column(scale=1):
                 mode = gr.Dropdown(
                     minimum=60, maximum=1200, value=300, step=30,
                 )
+        def _on_backend_change(mode_v: str):
+            is_vllm = mode_v == "Self-hosted vLLM"
+            return (
+                gr.update(visible=is_vllm),       # vllm_row
+                gr.update(visible=not is_vllm),   # hf_row
+                gr.update(visible=False),          # hosted_custom_id reset
             )
+        def _on_hosted_change(choice: str):
+            return gr.update(visible=(choice == _CUSTOM_HF_KEY))
+        backend_mode.change(
+            _on_backend_change,
+            inputs=[backend_mode],
+            outputs=[vllm_row, hf_row, hosted_custom_id],
         )
+        hosted_model.change(
+            _on_hosted_change,
+            inputs=[hosted_model],
+            outputs=[hosted_custom_id],
         )
+        # ── Task fields ────────────────────────────────────────────────────
+        # Verifier (test.sh) is intentionally not surfaced here — it's only
+        # needed for scored training. For interactive use, leave it empty
+        # and just have the agent finish with something observable (e.g.
+        # "print DONE at the end"). MCP tools already accept
+        # ``test_script=""`` and skip scoring when empty.
+        instruction = gr.Textbox(
+            label="Instruction",
+            value=(
+                "Write `hello.py` in the current directory that prints "
+                "`hello` (no quotes). Then run it and print `DONE` when "
+                "you are finished."
+            ),
+            lines=4,
         )
         with gr.Row():
+            task_id = gr.Textbox(
+                label="Task id (optional label)",
+                value="interactive",
+                scale=1,
+            )
+            setup_shell = gr.Textbox(
+                label="Setup shell (optional, runs before opencode)",
+                value="",
+                placeholder="e.g. pip install polars",
+                scale=3,
+            )
         with gr.Row():
+            run_btn = gr.Button("▶ Run", variant="primary", scale=2)
+            abort_btn = gr.Button("⏹ Abort", variant="stop", scale=1)
+            reset_btn = gr.Button("��� Reset", variant="secondary", scale=1)
+            check_btn = gr.Button("🔎 Check endpoint", scale=1)
+        # ── Output: chat-style single-column ──────────────────────────────
+        # Transcript is the hero. The status line above it carries a
+        # sandbox-boot phase indicator so users know whether we're
+        # spawning E2B, installing opencode, or waiting for the agent.
+        # Everything else (reward, files, logprob trace, verifier, raw
+        # JSON) lives in collapsed accordions below. Matches the chat
+        # shape of local_ui.py.
+        status = gr.Markdown()
+        # Shared state: the active rollout_id so Abort and Reset can find it.
+        rollout_state = gr.State("")
+        transcript_html = gr.HTML(
+            value="<div class='empty'>run a rollout to see the transcript</div>",
+        )
+        # Hidden outputs retained only so the streaming handler's tuple
+        # shape doesn't have to change. They never render in the UI.
+        reward_out = gr.Number(visible=False)
+        wall_out = gr.Number(visible=False)
+        exit_out = gr.Number(visible=False)
+        turns_out = gr.Number(visible=False)
+        with gr.Accordion("Workdir files", open=False):
             workdir_md = gr.Markdown()
+        with gr.Accordion("Proxy trace (per turn — logprobs)", open=False):
             proxy_trace_json = gr.JSON(label=None)
+        with gr.Accordion("Diagnostics (proxy · install · agent logs)", open=False):
+            verifier_out = gr.Textbox(label="proxy/install/agent log tails", lines=12)
+            verifier_err = gr.Textbox(label="primitive error (if any)", lines=3)
         with gr.Accordion("Raw result JSON", open=False):
             raw_json = gr.JSON(label=None)
         # ── Streaming Run handler ─────────────────────────────────────────
         def _run_streaming(
+            backend_mode_v: str,
+            vllm_model_v: str,
             vllm_url_v: str,
+            hosted_model_v: str,
+            hosted_custom_id_v: str,
+            hf_token_v: str,
+            thinking_v: bool,
             mode_v: str,
             max_tokens_cap_v: int,
             agent_timeout_s_v: float,
             task_id_v: str,
             instruction_v: str,
             setup_shell_v: str,
         ):
+            # Verifier is optional. For interactive use we pass an empty
+            # test_script so the finalizer skips scoring.
+            test_script_v = ""
+            # Assemble the uniform model_key from the UI's two-backend picker.
+            if backend_mode_v == "Self-hosted vLLM":
+                if not vllm_model_v.strip():
+                    yield _error_tuple("Self-hosted vLLM requires a model id.")
+                    return
+                model_key_v = f"vllm://{vllm_model_v.strip()}"
+            else:
+                if hosted_model_v == _CUSTOM_HF_KEY:
+                    cid = hosted_custom_id_v.strip()
+                    if not cid:
+                        yield _error_tuple(
+                            "Hosted 'Custom' picked but no model id entered."
+                        )
+                        return
+                    if not cid.startswith("hf-router://"):
+                        # Accept either plain "Org/Repo[:provider]" or a
+                        # fully-prefixed key.
+                        cid = f"hf-router://{cid}"
+                    model_key_v = cid
+                else:
+                    model_key_v = hosted_model_v
             """Gradio generator: yields UI updates as the rollout progresses.
             Uses the non-blocking fine-grained tools:
             import httpx
             from openenv.core.env_server.mcp_types import CallToolAction
+            # 0) Resolve the catalog pick into (base_url, api_key, model).
+            # This validates the secret matches the selected backend.
+            try:
+                base_url, _api_key, _model, entry = resolve_endpoint(
+                    model_key_v,
+                    vllm_url=vllm_url_v,
+                    hf_token=hf_token_v,
+                )
+            except Exception as exc:
+                yield _error_tuple(f"config: {exc}")
+                return
+            # 1) Pre-flight: verify the endpoint is reachable before burning
+            # an E2B sandbox on a URL typo / bad token.
             yield (
+                "🔎 **validating endpoint…**",
                 None, None, None, 0,
+                "", [], "", "", {"stage": "validate", "backend": entry.backend},
+                "<div class='empty'>validating endpoint…</div>",
+                "",
             )
+            probe_headers: dict[str, str] = {}
+            if entry.backend == "hf_router":
+                probe_headers["Authorization"] = f"Bearer {hf_token_v}"
             try:
+                r = httpx.get(
+                    f"{base_url}/models", headers=probe_headers, timeout=15,
+                )
                 if r.status_code != 200:
                     yield _error_tuple(
+                        f"{entry.backend} probe {base_url}/models → HTTP {r.status_code}: "
+                        f"{r.text[:200]}"
                     )
                     return
             except Exception as exc:
                 yield _error_tuple(
+                    f"endpoint unreachable: {type(exc).__name__}: {exc}"
                 )
                 return
+            yield (
+                "🟡 **initialising env (creating MCP registry)…**",
+                None, None, None, 0, "", [], "", "", {"stage": "env_init"},
+                "<div class='empty'>initialising env…</div>",
+                "",
+            )
             try:
                 env = _get_env()
                 env.reset()
                 yield _error_tuple(f"env init failed: {type(exc).__name__}: {exc}")
                 return
+            # 2) start_rollout — uniform args: model_key + vllm_url + hf_token
+            # + thinking. The env resolves via the catalog server-side.
             try:
                 start_obs = env.step(
                     CallToolAction(
                         tool_name="start_rollout",
                         arguments={
+                            "model_key": model_key_v,
                             "vllm_url": vllm_url_v,
+                            "hf_token": hf_token_v,
+                            "thinking": bool(thinking_v),
                             "instruction": instruction_v,
                             "test_script": test_script_v,
                             "task_id": task_id_v,
                             "setup_shell": setup_shell_v,
                             "upload_files": {},
                             "mode": mode_v,
                             "max_tokens_cap": int(max_tokens_cap_v),
                             "agent_timeout_s": float(agent_timeout_s_v),
                         },
                 yield _error_tuple(f"start_rollout returned no rollout_id: {start_payload}")
                 return
+            # Initial UI update — yield the rollout_id into shared state so
+            # Abort / Reset can target the right rollout.
             yield (
+                f"🟡 **rollout `{rollout_id}` started — booting sandbox…**",
                 None, None, None, 0,
                 "_(no files yet)_", [], "", "", start_payload,
+                "<div class='empty'>booting sandbox — this takes ~20–40s cold…</div>",
+                rollout_id,
             )
+            # 2) Poll get_state + get_messages at 1s cadence. Show a sandbox
+            # boot-phase label so users can tell "booting" from "stuck".
             deadline = time.time() + float(agent_timeout_s_v) + 120
+            t_started = float(start_payload.get("started_at") or time.time())
             status_str = "running"
             while time.time() < deadline:
                 try:
                     state_obs = env.step(
                             tool_name="get_state",
                             arguments={"rollout_id": rollout_id},
                         ),
+                        timeout_s=20,
                     )
                     state_payload = _parse_result(state_obs)
                 except Exception as exc:
                     state_payload = {"error": f"{type(exc).__name__}: {exc}"}
+                # Live transcript — only meaningful once opencode serve has
+                # created its session (state_payload carries serve_session_id
+                # in that case). Before that, get_messages returns an empty
+                # list with a ``note`` field.
+                parts_list: list = []
+                transcript = "<div class='empty'>waiting for first part…</div>"
+                try:
+                    msg_obs = env.step(
+                        CallToolAction(
+                            tool_name="get_messages",
+                            arguments={"rollout_id": rollout_id},
+                        ),
+                        timeout_s=20,
+                    )
+                    msg_payload = _parse_result(msg_obs)
+                    parts_list = collect_parts_from_messages(
+                        msg_payload.get("messages") or []
+                    )
+                    if parts_list:
+                        transcript = render_transcript(parts_list)
+                except Exception:
+                    pass
                 status_str = state_payload.get("status", "?")
+                elapsed = time.time() - t_started
+                msg_count = len(
+                    (state_payload.get("messages") if isinstance(state_payload, dict) else None) or []
+                )
+                # Prefer message count from the transcript payload.
+                try:
+                    msg_count = len(msg_payload.get("messages") or [])
+                except Exception:
+                    msg_count = 0
+                phase = _boot_phase(state_payload, msg_count, len(parts_list))
                 yield (
+                    f"{phase} · elapsed `{elapsed:.1f}s` · rollout `{rollout_id}`",
+                    None, None, None, state_payload.get("proxy_turns_so_far", 0),
                     "_(workdir populated on finalize)_",
                     [], "", "", state_payload,
+                    transcript,
+                    rollout_id,
                 )
                 if status_str == "done":
                     break
+                time.sleep(1.0)
             # 3) finalize_rollout — run verifier + collect full result
             try:
             status_md = _summarize_status(result)
             wd_md = _render_workdir(result.get("workdir_files") or {})
             turns = result.get("proxy_turns") or []
+            # One last transcript fetch — captures any final parts that
+            # arrived between the last poll and session.idle.
+            final_transcript = "<div class='empty'>(transcript unavailable)</div>"
+            try:
+                msg_obs = env.step(
+                    CallToolAction(
+                        tool_name="get_messages",
+                        arguments={"rollout_id": rollout_id},
+                    ),
+                    timeout_s=30,
+                )
+                msg_payload = _parse_result(msg_obs)
+                parts = collect_parts_from_messages(msg_payload.get("messages") or [])
+                final_transcript = render_transcript(parts)
+            except Exception:
+                pass
+            # Diagnostics pane: concat the three log tails so failures
+            # are visible without expanding the raw JSON.
+            diag_tail = "\n".join([
+                "--- PROXY LOG TAIL ---",
+                (result.get("proxy_log_tail") or "(empty)")[-2000:],
+                "",
+                "--- INSTALL LOG TAIL ---",
+                (result.get("install_log_tail") or "(empty)")[-1000:],
+                "",
+                "--- AGENT LOG TAIL ---",
+                (result.get("agent_log_tail") or "(empty)")[-2000:],
+            ])
+            err_line = result.get("error") or ""
             yield (
                 status_md,
                 result.get("reward"),
                 len(turns),
                 wd_md,
                 turns,
+                diag_tail,
+                err_line,
                 result,
+                final_transcript,
+                rollout_id,
             )
+        _output_widgets = [
+            status, reward_out, wall_out, exit_out, turns_out,
+            workdir_md, proxy_trace_json,
+            verifier_out, verifier_err, raw_json,
+            transcript_html, rollout_state,
+        ]
         run_btn.click(
             _run_streaming,
             inputs=[
+                backend_mode,
+                vllm_model, vllm_url,
+                hosted_model, hosted_custom_id, hf_token,
+                thinking, mode,
                 max_tokens_cap, agent_timeout_s,
+                task_id, instruction, setup_shell,
             ],
+            outputs=_output_widgets,
         )
+        # Check-endpoint handler — cheap GET /v1/models probe against the
+        # currently-configured backend.
+        def _check_endpoint(
+            backend_mode_v: str,
+            vllm_model_v: str, vllm_url_v: str,
+            hosted_model_v: str, hosted_custom_id_v: str, hf_token_v: str,
+        ) -> str:
             import httpx
+            if backend_mode_v == "Self-hosted vLLM":
+                model_key_v = f"vllm://{(vllm_model_v or '').strip()}"
+            else:
+                if hosted_model_v == _CUSTOM_HF_KEY:
+                    cid = (hosted_custom_id_v or "").strip()
+                    if not cid:
+                        return "❌ custom HF model id is empty"
+                    model_key_v = cid if cid.startswith("hf-router://") else f"hf-router://{cid}"
+                else:
+                    model_key_v = hosted_model_v
             try:
+                base_url, _key, _model, entry = resolve_endpoint(
+                    model_key_v, vllm_url=vllm_url_v, hf_token=hf_token_v,
+                )
+            except Exception as exc:
+                return f"❌ {exc}"
+            headers = {"Authorization": f"Bearer {hf_token_v}"} if entry.backend == "hf_router" else {}
+            models_url = f"{base_url}/models"
+            try:
+                r = httpx.get(models_url, headers=headers, timeout=15)
             except Exception as exc:
                 return f"❌ `{models_url}` unreachable: `{type(exc).__name__}: {exc}`"
             if r.status_code != 200:
                 return f"❌ `{models_url}` → HTTP {r.status_code}\n```\n{r.text[:400]}\n```"
             try:
+                ids = [m.get("id") for m in r.json().get("data", []) if m.get("id")]
             except Exception:
                 ids = []
+            hint = f" · backend=`{entry.backend}` · resolved=`{_model}`"
             if ids:
+                shown = ", ".join(ids[:5]) + (f", … (+{len(ids)-5} more)" if len(ids) > 5 else "")
+                return f"✅ reachable{hint} · models: `{shown}`"
+            return f"⚠️ reachable (HTTP 200) but no `data[*].id` in response{hint}"
+        check_btn.click(
+            _check_endpoint,
+            inputs=[backend_mode, vllm_model, vllm_url, hosted_model, hosted_custom_id, hf_token],
+            outputs=[status],
+        )
+        # ── Abort handler ────────────────────────────────────────────────
+        # Fire-and-forget abort on the active rollout. Keeps the env + UI
+        # state so the user can see what the transcript looked like at the
+        # moment of abort.
+        def _abort(current_rollout_id: str) -> tuple:
+            from openenv.core.env_server.mcp_types import CallToolAction
+            if not current_rollout_id:
+                return (
+                    "⚠️ nothing to abort (no active rollout).",
+                    None, None, None, None,
+                    "", [], "", "", {"abort": "no-op"},
+                    gr.update(), current_rollout_id,
+                )
+            try:
+                env = _get_env()
+                env.step(
+                    CallToolAction(
+                        tool_name="abort_rollout",
+                        arguments={"rollout_id": current_rollout_id},
+                    ),
+                    timeout_s=30,
+                )
+            except Exception as exc:  # noqa: BLE001
+                return (
+                    f"⚠️ abort failed: `{type(exc).__name__}: {exc}`",
+                    None, None, None, None,
+                    "", [], "", "", {"abort": str(exc)},
+                    gr.update(), current_rollout_id,
+                )
+            return (
+                f"⏹ **aborted** rollout `{current_rollout_id}`",
+                None, None, None, None,
+                "", [], "", "", {"abort": current_rollout_id},
+                gr.update(), current_rollout_id,
+            )
+        abort_btn.click(
+            _abort,
+            inputs=[rollout_state],
+            outputs=_output_widgets,
+        )
+        # ── Reset handler ────────────────────────────────────────────────
+        # Aborts any in-flight rollout, drops the cached env so the next Run
+        # creates a fresh :class:`OpenCodeEnvironment` (new MCP registry),
+        # and clears all UI panels including the transcript.
+        def _reset(current_rollout_id: str) -> tuple:
+            from openenv.core.env_server.mcp_types import CallToolAction
+            if current_rollout_id:
+                try:
+                    env = _get_env()
+                    env.step(
+                        CallToolAction(
+                            tool_name="abort_rollout",
+                            arguments={"rollout_id": current_rollout_id},
+                        ),
+                        timeout_s=30,
+                    )
+                except Exception:
+                    # Best-effort — if abort fails, still drop the env below
+                    # so the next Run starts clean.
+                    pass
             _env_cache["instance"] = None
             return (
+                "🔄 **reset.** next Run will create a fresh environment.",
                 None, None, None, None,
                 "_(workdir cleared)_",
                 [], "", "", {"reset": True},
+                "<div class='empty'>run a rollout to see the transcript</div>",
+                "",
             )
         reset_btn.click(
             _reset,
+            inputs=[rollout_state],
+            outputs=_output_widgets,
         )
     return demo
 # ── Helpers ─────────────────────────────────────────────────────────────────
+def _error_tuple(msg: str, rollout_id: str = "") -> tuple:
     return (
         f"❌ **Error:** `{msg}`",
         None, None, None, None,
         "", [], "", "", {"error": msg},
+        f"<div class='errbox'>❌ {msg}</div>",
+        rollout_id,
+    )
+def _boot_phase(state: dict, msg_count: int, parts_count: int) -> str:
+    """Human-readable sandbox + session boot phase label."""
+    if state.get("error"):
+        return f"⚠️ state error: `{state.get('error')}`"
+    status = state.get("status", "?")
+    if status == "unknown":
+        return "⏳ **starting rollout…**"
+    serve_sid = state.get("serve_session_id")
+    if not serve_sid:
+        return (
+            "🟡 **booting sandbox** — spawning E2B, installing opencode, "
+            "starting proxy + opencode serve (this takes ~20–40s cold)"
+        )
+    if msg_count == 0:
+        return "🟡 **creating session** — serve is up, prompt about to fire"
+    if parts_count == 0:
+        return "💭 **agent thinking** — first LLM call in flight"
+    turns = state.get("proxy_turns_so_far", 0)
+    return (
+        f"⚡ **running** · serve session `{serve_sid[:14]}…` · "
+        f"parts `{parts_count}` · turns `{turns}`"
     )

server/opencode_environment.py CHANGED Viewed

@@ -31,6 +31,11 @@ from fastmcp import FastMCP
 from openenv.core.env_server.mcp_environment import MCPEnvironment
 from openenv.core.env_server.types import Action, Observation
 load_dotenv()
@@ -168,56 +173,62 @@ class OpenCodeEnvironment(MCPEnvironment):
         @mcp.tool
         def run_rollout(
-            vllm_url: str,
-            model: str,
             instruction: str,
             test_script: str,
             task_id: str = "",
             setup_shell: str = "",
             upload_files: Optional[dict[str, str]] = None,
-            provider: str = "openai_compatible",
-            api_key: str = "intercepted",
             mode: str = "transparent_proxy",
-            disable_thinking: bool = False,
             max_tokens_cap: int = 4096,
             agent_timeout_s: float = 600.0,
         ) -> str:
             """Run one OpenCode rollout end-to-end.
             Args:
-                vllm_url: LLM endpoint (``https://host/v1``).
-                model: Model id the provider recognizes.
                 instruction: Prompt passed to ``opencode run``.
                 test_script: Bash verifier. Must write a float reward to
                     ``/home/user/logs/verifier/reward.txt``.
                 task_id: Optional identifier echoed back for traceability.
                 setup_shell: Optional shell run before opencode starts.
-                upload_files: Optional {remote_path: content} staged into the
-                    sandbox.
-                provider: OpenCodeConfig provider id. For vLLM use
-                    ``"openai_compatible"``; for real OpenAI ``"openai"``.
-                api_key: Provider API key. vLLM ignores this.
-                mode: ``"transparent_proxy"`` (captures per-turn logprobs) or
-                    ``"black_box"`` (direct connection, no logprobs).
-                disable_thinking: Qwen3/Qwen3.5 proxy-side thinking disable.
                 max_tokens_cap: Clamp forwarded ``max_tokens``.
                 agent_timeout_s: Max opencode runtime in seconds.
             Returns:
                 JSON-serialized :class:`RolloutResult`.
             """
             return self._run_rollout_impl(
-                vllm_url=vllm_url,
                 model=model,
                 instruction=instruction,
                 test_script=test_script,
                 task_id=task_id,
                 setup_shell=setup_shell,
                 upload_files=upload_files or {},
-                provider=provider,
                 api_key=api_key,
                 mode=mode,
-                disable_thinking=disable_thinking,
                 max_tokens_cap=max_tokens_cap,
                 agent_timeout_s=agent_timeout_s,
             )
@@ -230,41 +241,46 @@ class OpenCodeEnvironment(MCPEnvironment):
         @mcp.tool
         def start_rollout(
-            vllm_url: str,
-            model: str,
             instruction: str,
             test_script: str = "",
             task_id: str = "",
             setup_shell: str = "",
             upload_files: Optional[dict[str, str]] = None,
-            provider: str = "openai_compatible",
-            api_key: str = "intercepted",
             mode: str = "transparent_proxy",
-            disable_thinking: bool = False,
             max_tokens_cap: int = 4096,
             agent_timeout_s: float = 600.0,
         ) -> str:
             """Start a rollout asynchronously; return a ``rollout_id`` immediately.
-            Spawns a background worker that creates the sandbox, installs
-            opencode, boots ``opencode serve``, and fires the instruction.
-            The caller then uses ``subscribe_events`` / ``get_state`` /
-            ``abort_rollout`` / ``finalize`` with the returned id.
             """
             rid = uuid4().hex[:12]
             handle = self._spawn_async_rollout(
                 rollout_id=rid,
-                vllm_url=vllm_url,
                 model=model,
                 instruction=instruction,
                 test_script=test_script,
                 task_id=task_id,
                 setup_shell=setup_shell,
                 upload_files=upload_files or {},
-                provider=provider,
                 api_key=api_key,
                 mode=mode,
-                disable_thinking=disable_thinking,
                 max_tokens_cap=max_tokens_cap,
                 agent_timeout_s=agent_timeout_s,
             )
@@ -305,6 +321,53 @@ class OpenCodeEnvironment(MCPEnvironment):
                 "finished_at": handle.finished_at,
             })
         @mcp.tool
         def abort_rollout(rollout_id: str) -> str:
             """Cancel an in-flight rollout.
@@ -449,13 +512,15 @@ class OpenCodeEnvironment(MCPEnvironment):
         result = self._result_cls(task_id=task_id, mode=mode)
         t0 = time.time()
-        provider_model = _qualify_model(provider, model)
         config = self._OpenCodeConfig(
             provider=provider,
             base_url=vllm_url.rstrip("/"),
             api_key=api_key,
-            model=provider_model,
             agent_timeout_s=agent_timeout_s,
             proxy_disable_thinking=disable_thinking,
             proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
@@ -574,12 +639,13 @@ class OpenCodeEnvironment(MCPEnvironment):
             metadata={"task_id": task_id},
         )
-        provider_model = _qualify_model(provider, model)
         config = self._OpenCodeConfig(
             provider=provider,
             base_url=vllm_url.rstrip("/"),
             api_key=api_key,
-            model=provider_model,
             agent_timeout_s=agent_timeout_s,
             proxy_disable_thinking=disable_thinking,
             proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
@@ -597,12 +663,16 @@ class OpenCodeEnvironment(MCPEnvironment):
         def worker() -> None:
             try:
                 factory = self._OpenCodeSessionFactory(
                     config=config,
                     sandbox_backend=self._E2BSandboxBackend(),
                     mode=mode,
                     verifier=None,
-                    driver="cli",  # serve is a state store, not an agent runner
                 )
                 handle.session = factory.create(task=task)
                 try:

 from openenv.core.env_server.mcp_environment import MCPEnvironment
 from openenv.core.env_server.types import Action, Observation
+try:
+    from .catalog import resolve_endpoint
+except ImportError:  # pragma: no cover
+    from catalog import resolve_endpoint  # type: ignore
 load_dotenv()
         @mcp.tool
         def run_rollout(
+            model_key: str,
             instruction: str,
             test_script: str,
+            vllm_url: str = "",
+            hf_token: str = "",
+            thinking: bool = False,
             task_id: str = "",
             setup_shell: str = "",
             upload_files: Optional[dict[str, str]] = None,
             mode: str = "transparent_proxy",
             max_tokens_cap: int = 4096,
             agent_timeout_s: float = 600.0,
         ) -> str:
             """Run one OpenCode rollout end-to-end.
             Args:
+                model_key: Catalog key — one of the entries in
+                    :data:`server.catalog.CATALOG`. Shape is
+                    ``"vllm://<repo>"`` or ``"hf-router://<repo>:<provider>"``.
                 instruction: Prompt passed to ``opencode run``.
                 test_script: Bash verifier. Must write a float reward to
                     ``/home/user/logs/verifier/reward.txt``.
+                vllm_url: Required when ``model_key`` is a ``vllm://...``
+                    entry. The tunneled or in-cluster ``/v1`` endpoint.
+                hf_token: Required when ``model_key`` is a
+                    ``hf-router://...`` entry. User's HF token.
+                thinking: Enable Qwen-style thinking mode. Ignored for
+                    models where ``supports_thinking`` is False. Passed to
+                    the proxy as ``chat_template_kwargs.enable_thinking``.
                 task_id: Optional identifier echoed back for traceability.
                 setup_shell: Optional shell run before opencode starts.
+                upload_files: Optional ``{remote_path: content}`` staged
+                    into the sandbox.
+                mode: ``"transparent_proxy"`` (captures per-turn logprobs)
+                    or ``"black_box"`` (direct connection, no logprobs).
                 max_tokens_cap: Clamp forwarded ``max_tokens``.
                 agent_timeout_s: Max opencode runtime in seconds.
             Returns:
                 JSON-serialized :class:`RolloutResult`.
             """
+            base_url, api_key, model, _entry = resolve_endpoint(
+                model_key, vllm_url=vllm_url, hf_token=hf_token
+            )
             return self._run_rollout_impl(
+                vllm_url=base_url,
                 model=model,
                 instruction=instruction,
                 test_script=test_script,
                 task_id=task_id,
                 setup_shell=setup_shell,
                 upload_files=upload_files or {},
+                provider="openai_compatible",
                 api_key=api_key,
                 mode=mode,
+                disable_thinking=not bool(thinking),
                 max_tokens_cap=max_tokens_cap,
                 agent_timeout_s=agent_timeout_s,
             )
         @mcp.tool
         def start_rollout(
+            model_key: str,
             instruction: str,
             test_script: str = "",
+            vllm_url: str = "",
+            hf_token: str = "",
+            thinking: bool = False,
             task_id: str = "",
             setup_shell: str = "",
             upload_files: Optional[dict[str, str]] = None,
             mode: str = "transparent_proxy",
             max_tokens_cap: int = 4096,
             agent_timeout_s: float = 600.0,
         ) -> str:
             """Start a rollout asynchronously; return a ``rollout_id`` immediately.
+            Same uniform args as :func:`run_rollout`: ``model_key``, plus
+            ``vllm_url`` OR ``hf_token`` (depending on backend), plus
+            ``thinking``. Spawns a background worker that creates the
+            sandbox, installs opencode, boots ``opencode serve``, and
+            fires the instruction. The caller then uses
+            ``subscribe_events`` / ``get_state`` / ``abort_rollout`` /
+            ``finalize`` with the returned id.
             """
+            base_url, api_key, model, _entry = resolve_endpoint(
+                model_key, vllm_url=vllm_url, hf_token=hf_token
+            )
             rid = uuid4().hex[:12]
             handle = self._spawn_async_rollout(
                 rollout_id=rid,
+                vllm_url=base_url,
                 model=model,
                 instruction=instruction,
                 test_script=test_script,
                 task_id=task_id,
                 setup_shell=setup_shell,
                 upload_files=upload_files or {},
+                provider="openai_compatible",
                 api_key=api_key,
                 mode=mode,
+                disable_thinking=not bool(thinking),
                 max_tokens_cap=max_tokens_cap,
                 agent_timeout_s=agent_timeout_s,
             )
                 "finished_at": handle.finished_at,
             })
+        @mcp.tool
+        def get_messages(rollout_id: str) -> str:
+            """Return the sandbox-side opencode serve transcript for a rollout.
+            Shape matches opencode's ``GET /session/:id/message`` —
+            ``{"messages": [{info, parts}, ...]}``. Empty ``messages`` list
+            if the rollout hasn't created its serve session yet, isn't
+            running under the ``serve`` driver, or fetching the transcript
+            failed. Designed for UI polling to render a live chat view.
+            """
+            handle = self._registry.get(rollout_id)
+            if handle is None:
+                return json.dumps({"rollout_id": rollout_id, "messages": [], "status": "unknown"})
+            session = handle.session
+            status = "done" if handle.is_done() else "running"
+            if session is None:
+                return json.dumps({
+                    "rollout_id": rollout_id,
+                    "messages": [],
+                    "status": status,
+                    "error": handle.error,
+                })
+            serve_client = getattr(session, "serve_client", None)
+            serve_sid = getattr(session, "serve_session_id", None)
+            if serve_client is None or not serve_sid:
+                return json.dumps({
+                    "rollout_id": rollout_id,
+                    "messages": [],
+                    "status": status,
+                    "note": "no serve driver (transcript unavailable)",
+                })
+            try:
+                msgs = serve_client.list_messages(serve_sid) or []
+            except Exception as exc:  # noqa: BLE001
+                return json.dumps({
+                    "rollout_id": rollout_id,
+                    "messages": [],
+                    "status": status,
+                    "error": f"list_messages failed: {type(exc).__name__}: {exc}",
+                })
+            return json.dumps({
+                "rollout_id": rollout_id,
+                "messages": msgs,
+                "status": status,
+                "serve_session_id": serve_sid,
+            })
         @mcp.tool
         def abort_rollout(rollout_id: str) -> str:
             """Cancel an in-flight rollout.
         result = self._result_cls(task_id=task_id, mode=mode)
         t0 = time.time()
+        # Pass the resolved model id straight through — the primitive now
+        # preserves ``config.model`` verbatim as the upstream model override,
+        # so any ``_qualify_model`` wrapping here would double-prefix and
+        # cause a 404 (``openai_compatible/Qwen/Qwen3.5-4B does not exist``).
         config = self._OpenCodeConfig(
             provider=provider,
             base_url=vllm_url.rstrip("/"),
             api_key=api_key,
+            model=model,
             agent_timeout_s=agent_timeout_s,
             proxy_disable_thinking=disable_thinking,
             proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
             metadata={"task_id": task_id},
         )
+        # Pass model verbatim (no _qualify_model) — primitive now uses
+        # ``config.model`` as the upstream override directly.
         config = self._OpenCodeConfig(
             provider=provider,
             base_url=vllm_url.rstrip("/"),
             api_key=api_key,
+            model=model,
             agent_timeout_s=agent_timeout_s,
             proxy_disable_thinking=disable_thinking,
             proxy_max_tokens_cap=max_tokens_cap if max_tokens_cap > 0 else None,
         def worker() -> None:
             try:
+                # serve driver: opencode serve runs inside the sandbox, the
+                # primitive fires the prompt via POST /session/:id/prompt_async,
+                # and ``list_messages(serve_session_id)`` is what powers the
+                # live chat transcript exposed via the ``get_messages`` tool.
                 factory = self._OpenCodeSessionFactory(
                     config=config,
                     sandbox_backend=self._E2BSandboxBackend(),
                     mode=mode,
                     verifier=None,
+                    driver="serve",
                 )
                 handle.session = factory.create(task=task)
                 try:

server/sandbox_smoke.py ADDED Viewed

	@@ -0,0 +1,320 @@

+"""Stand-alone E2B sandbox smoke — boot opencode serve, expose it publicly.
+This script isolates "can a sandbox even stand up opencode serve?" from
+the rest of the env (no MCP server, no proxy, no primitive, no UI). Good
+for when a full rollout fails and you want to rule out the sandbox path.
+What it does:
+    1. Create a fresh E2B sandbox.
+    2. Write ``~/.config/opencode/opencode.json`` pointing at either:
+       - the HF Router (default, just needs HF_TOKEN), or
+       - a user-provided vLLM URL.
+    3. Install opencode via the upstream one-liner.
+    4. Start ``opencode serve --port 4096 --hostname 0.0.0.0`` in bg.
+    5. ``sandbox.get_host(4096)`` → a public ``https://4096-<sbx>.e2b.app``.
+    6. Poll ``{public}/doc`` until it answers 200.
+    7. Print the public URL + ``sandbox_id`` and keep the sandbox alive so
+       you can hit it manually. Ctrl-C closes the sandbox.
+Usage:
+    # HF Router (default)
+    HF_TOKEN=hf_... uv run python server/sandbox_smoke.py
+    # or self-hosted vLLM
+    uv run python server/sandbox_smoke.py \\
+        --backend vllm \\
+        --vllm-url https://my-tunnel.example/v1 \\
+        --model Qwen/Qwen3.5-4B
+Once it prints the URL you can:
+    curl https://4096-<sbx>.e2b.app/global/health
+    curl https://4096-<sbx>.e2b.app/config
+    # create + send prompt
+    SID=$(curl -s -X POST https://4096-<sbx>.e2b.app/session \\
+        -H 'content-type: application/json' \\
+        -d '{"title":"smoke"}' | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])')
+    curl -X POST https://4096-<sbx>.e2b.app/session/$SID/prompt_async \\
+        -H 'content-type: application/json' \\
+        -d '{"parts":[{"type":"text","text":"write hello.py"}]}'
+    curl -N https://4096-<sbx>.e2b.app/event
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import signal
+import sys
+import time
+from pathlib import Path
+from typing import Any
+# Load the env-server's .env (E2B_API_KEY, HF_TOKEN, etc.) before importing
+# anything that needs them. Walks up from this file to find ``openenv/.env``.
+try:
+    from dotenv import load_dotenv
+    _env_path = Path(__file__).resolve().parent.parent / ".env"
+    if _env_path.is_file():
+        load_dotenv(_env_path, override=False)
+        print(f"loaded env from {_env_path}")
+except ImportError:
+    pass
+try:
+    from e2b_code_interpreter import Sandbox
+except ImportError:
+    from e2b import Sandbox  # type: ignore
+SERVE_PORT = 4096
+CONFIG_DIR = "/home/user/.config/opencode"
+CONFIG_PATH = f"{CONFIG_DIR}/opencode.json"
+LOG_DIR = "/home/user/logs/agent"
+SERVE_LOG = f"{LOG_DIR}/serve.log"
+def build_opencode_json(
+    *,
+    backend: str,
+    model_id: str,
+    base_url: str,
+    api_key: str,
+    context_limit: int = 32768,
+    output_limit: int = 16384,
+) -> str:
+    """Emit a minimal, valid opencode.json for the chosen backend."""
+    provider_id = "vllm" if backend == "vllm" else "hf-router"
+    return json.dumps({
+        "$schema": "https://opencode.ai/config.json",
+        "model": f"{provider_id}/{model_id}",
+        "provider": {
+            provider_id: {
+                "npm": "@ai-sdk/openai-compatible",
+                "name": f"{provider_id} (smoke)",
+                "options": {
+                    "baseURL": base_url,
+                    "apiKey": api_key,
+                    "timeout": 600_000,
+                },
+                "models": {
+                    model_id: {
+                        "name": model_id,
+                        "limit": {"context": context_limit, "output": output_limit},
+                    },
+                },
+            },
+        },
+        "tools": {"webfetch": False, "question": False},
+    }, indent=2)
+_START = time.time()
+def log(msg: str) -> None:
+    """Timestamped progress line, flushed so it appears in real time."""
+    t = time.time() - _START
+    print(f"[{t:6.1f}s] {msg}", flush=True)
+def run_shell(sbx: Any, cmd: str, *, timeout_s: int = 120) -> tuple[int, str, str]:
+    """Run a shell command, return (exit_code, stdout, stderr)."""
+    out = sbx.commands.run(cmd, timeout=timeout_s)
+    return (out.exit_code, out.stdout or "", out.stderr or "")
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--backend", choices=["hf", "vllm"], default="hf")
+    ap.add_argument("--model", default="Qwen/Qwen3.5-397B-A17B:together")
+    ap.add_argument("--vllm-url", default="")
+    ap.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", ""))
+    ap.add_argument("--sandbox-timeout-s", type=int, default=900)
+    ap.add_argument("--idle-hold-s", type=int, default=1200,
+                    help="keep the sandbox alive for this many seconds after boot")
+    args = ap.parse_args()
+    if args.backend == "hf":
+        if not args.hf_token:
+            print("ERROR: --backend hf needs --hf-token or $HF_TOKEN", file=sys.stderr)
+            return 2
+        base_url = "https://router.huggingface.co/v1"
+        api_key = args.hf_token
+    else:
+        if not args.vllm_url:
+            print("ERROR: --backend vllm needs --vllm-url", file=sys.stderr)
+            return 2
+        base_url = args.vllm_url.rstrip("/")
+        if not base_url.endswith("/v1"):
+            base_url += "/v1"
+        api_key = "anything"
+    if not os.environ.get("E2B_API_KEY"):
+        print("ERROR: E2B_API_KEY not set", file=sys.stderr)
+        return 2
+    log(f"[1/7] creating sandbox (timeout={args.sandbox_timeout_s}s) …")
+    sbx = Sandbox.create(timeout=args.sandbox_timeout_s)
+    log(f"      sandbox_id = {sbx.sandbox_id}")
+    try:
+        log("[2/7] mkdir config + logs …")
+        rc, out, err = run_shell(sbx, f"mkdir -p {CONFIG_DIR} {LOG_DIR}")
+        if rc != 0:
+            log(f"      FAIL rc={rc} stderr={err[:500]}")
+            return 1
+        log(f"[3/7] writing {CONFIG_PATH} …")
+        cfg = build_opencode_json(
+            backend=args.backend,
+            model_id=args.model,
+            base_url=base_url,
+            api_key=api_key,
+        )
+        sbx.files.write(CONFIG_PATH, cfg)
+        log(f"      backend={args.backend}  model={args.model}")
+        log(f"      baseURL={base_url}")
+        log("[4/7] installing opencode via curl opencode.ai/install … (~10-30s cold)")
+        rc, out, err = run_shell(
+            sbx,
+            "curl -fsSL https://opencode.ai/install | bash 2>&1",
+            timeout_s=300,
+        )
+        log(f"      install rc={rc}")
+        if out:
+            for line in out.strip().splitlines()[-8:]:
+                log(f"      │ {line}")
+        if rc != 0:
+            log("      stderr tail:")
+            for line in (err or "").strip().splitlines()[-10:]:
+                log(f"      │ {line}")
+            return 1
+        log("[5/7] verifying opencode binary …")
+        rc, out, err = run_shell(sbx, '$HOME/.opencode/bin/opencode --version')
+        log(f"      opencode --version rc={rc} out={(out or '').strip()[:120]}")
+        if rc != 0:
+            log(f"      stderr: {(err or '')[:400]}")
+            return 1
+        log(f"[6/7] starting opencode serve in bg on :{SERVE_PORT} …")
+        serve_cmd = (
+            'export PATH="$HOME/.opencode/bin:$PATH" && '
+            f"opencode serve --port {SERVE_PORT} --hostname 0.0.0.0 "
+            f"> {SERVE_LOG} 2>&1"
+        )
+        serve_bg = sbx.commands.run(serve_cmd, background=True)
+        log(f"      serve pid = {getattr(serve_bg, 'pid', '?')}")
+        host = sbx.get_host(SERVE_PORT)
+        public_url = f"https://{host}"
+        log(f"      public URL = {public_url}")
+        log("[7/7] waiting for /doc to answer (polls every 0.5s for 60s) …")
+        import httpx
+        ok = False
+        for i in range(120):
+            try:
+                r = httpx.get(f"{public_url}/doc", timeout=5)
+                if r.status_code == 200:
+                    log(f"      /doc ok (poll #{i+1}, {i*0.5:.1f}s)")
+                    ok = True
+                    break
+                elif i % 6 == 5:  # ~every 3s print progress
+                    log(f"      /doc → HTTP {r.status_code} (still trying, {i*0.5:.1f}s)")
+            except Exception as exc:
+                if i % 6 == 5:
+                    log(f"      /doc unreachable ({type(exc).__name__}, {i*0.5:.1f}s)")
+            time.sleep(0.5)
+        if not ok:
+            log("      /doc never answered — tailing serve log (last 2KB):")
+            try:
+                tail = sbx.files.read(SERVE_LOG)[-2000:]
+            except Exception as exc:
+                tail = f"(could not read log: {exc})"
+            for line in tail.splitlines()[-40:]:
+                log(f"      │ {line}")
+            return 1
+        print("\n" + "=" * 70)
+        print("sandbox is up — manual probe recipes:")
+        print("=" * 70)
+        print(f"curl  -s  {public_url}/global/health | jq .")
+        print(f"curl  -s  {public_url}/config | jq '.model, .provider'")
+        print()
+        print(f"SID=$(curl -s -X POST {public_url}/session \\")
+        print("        -H 'content-type: application/json' \\")
+        print("        -d '{\"title\":\"smoke\"}' | jq -r .id)")
+        print(f"curl  -X POST {public_url}/session/$SID/prompt_async \\")
+        print("        -H 'content-type: application/json' \\")
+        print("        -d '{\"parts\":[{\"type\":\"text\",\"text\":\"write hello.py and run it\"}]}'")
+        print(f"curl  -N  {public_url}/event      # SSE stream")
+        print()
+        print(f"serve log:  sbx.files.read('{SERVE_LOG}')")
+        print(f"sandbox_id: {sbx.sandbox_id}")
+        print(f"holding for up to {args.idle_hold_s}s — Ctrl-C to close")
+        print("=" * 70 + "\n")
+        stopper = {"stop": False}
+        def _sigh(*_a):
+            print("\nsignal — closing sandbox")
+            stopper["stop"] = True
+        signal.signal(signal.SIGINT, _sigh)
+        signal.signal(signal.SIGTERM, _sigh)
+        # Periodic /doc ping so we catch opencode-serve crashes in real time.
+        # Any non-200 (incl. E2B's 502 "port not open") is a crash signal —
+        # dump serve.log and stop the hold.
+        import httpx
+        last_ok_ts = time.time()
+        deadline = time.time() + args.idle_hold_s
+        def _dump_serve_log() -> None:
+            try:
+                tail = sbx.files.read(SERVE_LOG)
+                log("    --- serve.log tail (last 4KB) ---")
+                for line in tail[-4000:].splitlines()[-60:]:
+                    log(f"    │ {line}")
+                log("    --- end serve.log ---")
+            except Exception as exc2:
+                log(f"    could not read serve.log: {exc2}")
+            # Also list workdir so we can see if the agent did anything.
+            try:
+                rc, out, err = run_shell(sbx, "ls -la /home/user/workdir 2>&1 | head -40")
+                log("    --- workdir ls ---")
+                for line in (out or err).splitlines():
+                    log(f"    │ {line}")
+            except Exception:
+                pass
+        while time.time() < deadline and not stopper["stop"]:
+            try:
+                r = httpx.get(f"{public_url}/doc", timeout=5)
+                if r.status_code == 200:
+                    last_ok_ts = time.time()
+                else:
+                    log(f"!!! /doc → HTTP {r.status_code}  "
+                        f"(last ok {time.time()-last_ok_ts:.1f}s ago) — "
+                        f"opencode serve appears dead, dumping log")
+                    _dump_serve_log()
+                    break
+            except Exception as exc:
+                log(f"!!! /doc probe failed: {type(exc).__name__}: {exc}  "
+                    f"(last ok {time.time()-last_ok_ts:.1f}s ago)")
+                _dump_serve_log()
+                break
+            time.sleep(10.0)
+        return 0
+    finally:
+        try:
+            print("killing sandbox …")
+            sbx.kill()
+        except Exception as exc:
+            print(f"  kill failed (probably already dead): {exc}")
+if __name__ == "__main__":
+    sys.exit(main())

server/transcript.py ADDED Viewed

	@@ -0,0 +1,237 @@

+"""Shared transcript rendering used by both UIs.
+Both ``local_ui.py`` (driving a raw ``opencode serve``) and the deployed
+``server/gradio_ui.py`` (driving an in-sandbox ``opencode serve`` through
+the env's MCP tools) consume the same opencode message+parts shape:
+    messages: [
+        {
+            "info":  {id, role, sessionID, time, ...},
+            "parts": [
+                {"type": "step-start", ...},
+                {"type": "reasoning", "text": ..., "id": ...},
+                {"type": "text", "text": ..., "id": ...},
+                {"type": "tool", "tool": "...", "state": {status, input, output}, ...},
+                {"type": "step-finish", "tokens": {...}, ...},
+            ],
+        },
+        ...
+    ]
+or the flat SSE form:
+    events: [{"type": "message.part.updated", "properties": {"part": {...}}}, ...]
+Both reduce to an ordered list of parts keyed on ``part.id``.
+"""
+from __future__ import annotations
+import html as _html
+import json
+from typing import Any
+# ── Part collection ────────────────────────────────────────────────────────
+def collect_parts_from_events(events: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Reduce SSE ``message.part.updated`` frames to latest snapshot per ``part.id``.
+    Used by ``local_ui.py`` (direct SSE consumer).
+    """
+    order: list[str] = []
+    latest: dict[str, dict[str, Any]] = {}
+    for ev in events:
+        if ev.get("type") != "message.part.updated":
+            continue
+        p = (ev.get("properties") or {}).get("part") or {}
+        pid = p.get("id")
+        if not pid:
+            continue
+        if pid not in latest:
+            order.append(pid)
+        latest[pid] = p
+    return [latest[i] for i in order]
+def collect_parts_from_messages(
+    messages: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Flatten the ``GET /session/:id/message`` shape into an ordered parts list.
+    Used by the deployed Gradio UI which polls via ``get_messages`` MCP tool.
+    Message order is preserved; within a message the server returns parts in
+    emission order so no further sorting is needed.
+    """
+    parts: list[dict[str, Any]] = []
+    for m in messages or []:
+        if not isinstance(m, dict):
+            continue
+        for p in m.get("parts") or []:
+            if isinstance(p, dict):
+                parts.append(p)
+    return parts
+# ── Rendering ──────────────────────────────────────────────────────────────
+def _esc(s: Any) -> str:
+    return _html.escape("" if s is None else str(s))
+def _cap(s: str, n: int = 6000) -> str:
+    if len(s) <= n:
+        return s
+    return s[:n] + f"\n… ({len(s) - n} chars hidden)"
+def _todo_icon(status: str | None) -> str:
+    return {"completed": "✅", "in_progress": "🔄"}.get(status or "", "⏳")
+def fmt_tool(name: str, state: dict[str, Any], raw: dict[str, Any]) -> str:
+    """Per-tool card — mirrors opencode's own UI shapes."""
+    status = (state or {}).get("status") or "?"
+    inp = (state or {}).get("input") or raw.get("input") or {}
+    out = (state or {}).get("output") or raw.get("output") or ""
+    badge = {"completed": "ok", "error": "err", "running": "run"}.get(status, "")
+    if name == "read":
+        summary = f"📖 read <code>{_esc(inp.get('filePath') or inp.get('path'))}</code>"
+        body = f"<pre>{_esc(_cap(str(out)))}</pre>"
+    elif name == "write":
+        path = inp.get("filePath") or inp.get("path")
+        content = inp.get("content") or ""
+        summary = f"✍️ write <code>{_esc(path)}</code> ({len(content)} chars)"
+        body = f"<pre>{_esc(_cap(content))}</pre>"
+    elif name == "edit":
+        path = inp.get("filePath") or inp.get("path")
+        old = inp.get("oldString") or ""
+        new = inp.get("newString") or ""
+        summary = f"✏️ edit <code>{_esc(path)}</code>"
+        body = (
+            f"<div class='lbl'>- old</div><pre class='del'>{_esc(_cap(old, 3000))}</pre>"
+            f"<div class='lbl'>+ new</div><pre class='add'>{_esc(_cap(new, 3000))}</pre>"
+        )
+        if out:
+            body += f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 2000))}</pre>"
+    elif name == "bash":
+        cmd = inp.get("command") or inp.get("cmd") or ""
+        summary = f"⚡ bash <code>{_esc(cmd[:160])}</code>"
+        body = f"<pre>{_esc(_cap(str(out)))}</pre>"
+    elif name in ("glob", "find"):
+        pattern = inp.get("pattern") or inp.get("query") or ""
+        summary = f"🔎 {name} <code>{_esc(pattern)}</code>"
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    elif name == "grep":
+        pattern = inp.get("pattern") or ""
+        path = inp.get("path") or ""
+        summary = f"🔎 grep <code>{_esc(pattern)}</code>" + (
+            f" in <code>{_esc(path)}</code>" if path else ""
+        )
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    elif name == "todowrite":
+        todos = inp.get("todos") or []
+        summary = f"📝 todowrite ({len(todos)} items)"
+        body = "<ul>" + "".join(
+            f"<li>{_todo_icon(t.get('status'))} {_esc(t.get('content'))}</li>"
+            for t in todos
+        ) + "</ul>"
+    elif name == "task":
+        desc = inp.get("description") or inp.get("prompt") or ""
+        summary = f"🧩 task — {_esc(desc[:160])}"
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    elif name == "webfetch":
+        summary = f"🌐 webfetch <code>{_esc(inp.get('url'))}</code>"
+        body = f"<pre>{_esc(_cap(str(out), 4000))}</pre>"
+    else:
+        summary = f"🔧 {_esc(name)}"
+        body = (
+            f"<div class='lbl'>input</div><pre>{_esc(_cap(json.dumps(inp, indent=2, default=str), 4000))}</pre>"
+            f"<div class='lbl'>output</div><pre>{_esc(_cap(str(out), 4000))}</pre>"
+        )
+    return (
+        "<details class='tool' open>"
+        f"<summary>{summary} <span class='badge {badge}'>{_esc(status)}</span></summary>"
+        f"<div class='tbody'>{body}</div>"
+        "</details>"
+    )
+def render_transcript(
+    parts: list[dict[str, Any]], errors: list[str] | None = None
+) -> str:
+    """Render a parts list as HTML cards. Emits wrapped CSS-friendly markup.
+    Consumers should inject the CSS from :data:`TRANSCRIPT_CSS`.
+    """
+    out: list[str] = []
+    if errors:
+        out.append(
+            "<div class='errbox'><b>⚠️ errors</b><ul>"
+            + "".join(f"<li>{_esc(e)}</li>" for e in errors[:8])
+            + "</ul></div>"
+        )
+    if not parts:
+        out.append("<div class='empty'>waiting for first part…</div>")
+        return "".join(out)
+    out.append("<div class='chat'>")
+    for p in parts:
+        t = p.get("type")
+        if t == "step-start":
+            out.append("<div class='step'>── new step ──</div>")
+        elif t == "reasoning":
+            txt = (p.get("text") or "").strip()
+            if txt:
+                out.append(
+                    "<details class='reasoning'><summary>🧠 reasoning</summary>"
+                    f"<pre>{_esc(_cap(txt, 4000))}</pre></details>"
+                )
+        elif t == "text":
+            txt = (p.get("text") or "").strip()
+            if txt:
+                out.append(f"<div class='assistant'><pre>{_esc(txt)}</pre></div>")
+        elif t == "tool":
+            out.append(fmt_tool(p.get("tool") or "?", p.get("state") or {}, p))
+        elif t == "step-finish":
+            tokens = p.get("tokens") or (p.get("state") or {}).get("tokens") or {}
+            if tokens:
+                out.append(
+                    f"<div class='stepfin'>tokens: "
+                    f"{_esc(json.dumps(tokens, default=str))}</div>"
+                )
+    out.append("</div>")
+    return "".join(out)
+TRANSCRIPT_CSS = """
+.chat { font-size:14px; }
+.assistant pre { background:#0e1013; padding:10px; border-radius:8px;
+                 white-space:pre-wrap; color:#eee; margin:6px 0; }
+.reasoning { opacity:0.8; margin:4px 0; }
+.reasoning pre { background:#0a0b0d; color:#aab; padding:8px; white-space:pre-wrap; }
+.tool { border:1px solid #2a2f3a; border-radius:8px; padding:6px 10px;
+        margin:6px 0; background:#12161c; }
+.tool summary { cursor:pointer; color:#ddd; }
+.tool code { background:#222; color:#9cf; padding:1px 4px; border-radius:3px; }
+.tbody { margin-top:6px; }
+.tbody pre { background:#0a0b0d; padding:8px; border-radius:4px;
+             white-space:pre-wrap; max-height:400px; overflow:auto;
+             font-size:12px; color:#ddd; margin:2px 0; }
+.tbody pre.add { border-left:3px solid #2e6; }
+.tbody pre.del { border-left:3px solid #e53; }
+.tbody .lbl { color:#888; font-size:11px; margin-top:6px; }
+.badge { padding:1px 6px; border-radius:8px; font-size:11px;
+         background:#333; color:#ddd; }
+.badge.ok { background:#1f6f43; color:white; }
+.badge.err { background:#7a1e1e; color:white; }
+.badge.run { background:#7a5c1e; color:white; }
+.step { color:#555; text-align:center; margin:10px 0; font-size:11px; }
+.stepfin { color:#666; font-size:11px; margin:4px 0 12px; }
+.empty { color:#666; font-style:italic; padding:12px; }
+.errbox { background:#2a1414; border:1px solid #7a1e1e; border-radius:6px;
+          padding:6px 10px; margin:6px 0; color:#f88; font-size:13px; }
+.errbox ul { margin:2px 0 0 18px; }
+"""