Spaces:
Sleeping
Sleeping
arnavzz Claude Sonnet 4.6 commited on
Commit Β·
b450c0e
1
Parent(s): 51133cf
feat: add interactive landing page with live debug arena
Browse filesDark-themed landing page with:
- Hero section with live status, task count
- Interactive debug arena (pick task, edit code, submit, see tests animate)
- Terminal panel showing structured [START]/[STEP]/[END] logs
- How It Works 3-step flow
- Task explorer with difficulty badges
- Architecture diagram (OpenEnv RL loop)
- Performance metrics section
- Prism.js syntax highlighting, CSS animations, fade-in on scroll
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- code_debug_env/server/app.py +11 -0
- code_debug_env/static/index.html +748 -0
code_debug_env/server/app.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
|
|
|
|
|
| 1 |
from fastapi import FastAPI, HTTPException
|
|
|
|
| 2 |
|
| 3 |
from ..models import (
|
| 4 |
DebugState,
|
|
@@ -17,6 +20,14 @@ app = FastAPI(
|
|
| 17 |
|
| 18 |
env = CodeDebugEnvironment()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
@app.get("/health")
|
| 22 |
async def health():
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
from fastapi import FastAPI, HTTPException
|
| 4 |
+
from fastapi.responses import HTMLResponse
|
| 5 |
|
| 6 |
from ..models import (
|
| 7 |
DebugState,
|
|
|
|
| 20 |
|
| 21 |
env = CodeDebugEnvironment()
|
| 22 |
|
| 23 |
+
_STATIC_DIR = Path(__file__).parent.parent / "static"
|
| 24 |
+
_INDEX_HTML = (_STATIC_DIR / "index.html").read_text(encoding="utf-8")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@app.get("/", response_class=HTMLResponse)
|
| 28 |
+
async def landing():
|
| 29 |
+
return _INDEX_HTML
|
| 30 |
+
|
| 31 |
|
| 32 |
@app.get("/health")
|
| 33 |
async def health():
|
code_debug_env/static/index.html
ADDED
|
@@ -0,0 +1,748 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Code Debug Arena | OpenEnv</title>
|
| 7 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism-themes/1.9.0/prism-vsc-dark-plus.min.css">
|
| 8 |
+
<style>
|
| 9 |
+
/* ββ Reset & Base βββββββββββββββββββββββββββββββββββββββββ */
|
| 10 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 11 |
+
|
| 12 |
+
:root {
|
| 13 |
+
--bg: #0d1117;
|
| 14 |
+
--bg-card: #161b22;
|
| 15 |
+
--bg-editor: #1e1e1e;
|
| 16 |
+
--bg-input: #21252b;
|
| 17 |
+
--bg-hover: #30363d;
|
| 18 |
+
--text: #e6edf3;
|
| 19 |
+
--text-muted: #8b949e;
|
| 20 |
+
--text-dim: #484f58;
|
| 21 |
+
--accent: #58a6ff;
|
| 22 |
+
--green: #3fb950;
|
| 23 |
+
--orange: #d29922;
|
| 24 |
+
--red: #f85149;
|
| 25 |
+
--purple: #bc8cff;
|
| 26 |
+
--border: #30363d;
|
| 27 |
+
--radius: 12px;
|
| 28 |
+
--max-w: 1100px;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
html { scroll-behavior: smooth; }
|
| 32 |
+
|
| 33 |
+
body {
|
| 34 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
|
| 35 |
+
background: var(--bg);
|
| 36 |
+
color: var(--text);
|
| 37 |
+
line-height: 1.6;
|
| 38 |
+
overflow-x: hidden;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
code, pre, textarea, .mono { font-family: 'Cascadia Code', 'Fira Code', 'JetBrains Mono', Consolas, monospace; }
|
| 42 |
+
|
| 43 |
+
a { color: var(--accent); text-decoration: none; }
|
| 44 |
+
a:hover { text-decoration: underline; }
|
| 45 |
+
|
| 46 |
+
.container { max-width: var(--max-w); margin: 0 auto; padding: 0 24px; }
|
| 47 |
+
|
| 48 |
+
/* ββ Fade-in animation ββββββββββββββββββββββββββββββββββββ */
|
| 49 |
+
.fade-in {
|
| 50 |
+
opacity: 0;
|
| 51 |
+
transform: translateY(28px);
|
| 52 |
+
transition: opacity 0.65s ease-out, transform 0.65s ease-out;
|
| 53 |
+
}
|
| 54 |
+
.fade-in.visible { opacity: 1; transform: translateY(0); }
|
| 55 |
+
|
| 56 |
+
/* ββ Status dot βββββββββββββββββββββββββββββββββββββββββββ */
|
| 57 |
+
@keyframes pulse-ring {
|
| 58 |
+
0% { transform: scale(.85); opacity: 1; }
|
| 59 |
+
100% { transform: scale(2.4); opacity: 0; }
|
| 60 |
+
}
|
| 61 |
+
.status-dot {
|
| 62 |
+
position: relative; display: inline-block;
|
| 63 |
+
width: 10px; height: 10px; border-radius: 50%;
|
| 64 |
+
background: var(--green); vertical-align: middle; margin-right: 6px;
|
| 65 |
+
}
|
| 66 |
+
.status-dot::before {
|
| 67 |
+
content: ''; position: absolute; inset: 0;
|
| 68 |
+
border-radius: 50%; background: var(--green);
|
| 69 |
+
animation: pulse-ring 1.6s ease-out infinite;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* ββ Blinking cursor ββββββββββββββββββββββββββββββββββββββ */
|
| 73 |
+
@keyframes blink { 0%,50%{opacity:1} 51%,100%{opacity:0} }
|
| 74 |
+
.cursor {
|
| 75 |
+
display: inline-block; width: 8px; height: 16px;
|
| 76 |
+
background: var(--green); vertical-align: text-bottom;
|
| 77 |
+
animation: blink 1s step-end infinite;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
/* ββ Progress bar βββββββββββββββββββββββββββββββββββββββββ */
|
| 81 |
+
@keyframes fillBar { from { width: 0; } }
|
| 82 |
+
.progress-track {
|
| 83 |
+
height: 10px; border-radius: 5px;
|
| 84 |
+
background: var(--bg-hover); overflow: hidden; margin-top: 6px;
|
| 85 |
+
}
|
| 86 |
+
.progress-fill {
|
| 87 |
+
height: 100%; border-radius: 5px;
|
| 88 |
+
background: linear-gradient(90deg, var(--green), var(--accent));
|
| 89 |
+
animation: fillBar 1.2s ease-out forwards;
|
| 90 |
+
transition: width 0.6s ease-out;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/* ββ Badge ββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 94 |
+
.badge {
|
| 95 |
+
display: inline-block; padding: 2px 10px; border-radius: 20px;
|
| 96 |
+
font-size: 12px; font-weight: 600; text-transform: uppercase; letter-spacing: .5px;
|
| 97 |
+
}
|
| 98 |
+
.badge-easy { background: rgba(63,185,80,.15); color: var(--green); }
|
| 99 |
+
.badge-medium { background: rgba(210,153,34,.15); color: var(--orange); }
|
| 100 |
+
.badge-hard { background: rgba(248,81,73,.15); color: var(--red); }
|
| 101 |
+
.badge-meta { background: rgba(88,166,255,.12); color: var(--accent); border: 1px solid rgba(88,166,255,.25); }
|
| 102 |
+
|
| 103 |
+
/* ββ Buttons ββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 104 |
+
.btn {
|
| 105 |
+
display: inline-block; padding: 10px 24px; border-radius: 8px;
|
| 106 |
+
font-size: 14px; font-weight: 600; cursor: pointer; border: none;
|
| 107 |
+
transition: all .2s;
|
| 108 |
+
}
|
| 109 |
+
.btn-primary { background: var(--accent); color: #fff; }
|
| 110 |
+
.btn-primary:hover { background: #79c0ff; text-decoration: none; }
|
| 111 |
+
.btn-outline { background: transparent; color: var(--text); border: 1px solid var(--border); }
|
| 112 |
+
.btn-outline:hover { background: var(--bg-hover); text-decoration: none; }
|
| 113 |
+
.btn-green { background: var(--green); color: #fff; }
|
| 114 |
+
.btn-green:hover { background: #56d364; text-decoration: none; }
|
| 115 |
+
.btn:disabled { opacity: .4; cursor: not-allowed; }
|
| 116 |
+
|
| 117 |
+
/* ββ Cards ββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 118 |
+
.card {
|
| 119 |
+
background: var(--bg-card); border: 1px solid var(--border);
|
| 120 |
+
border-radius: var(--radius); padding: 24px;
|
| 121 |
+
transition: border-color .2s, transform .2s;
|
| 122 |
+
}
|
| 123 |
+
.card:hover { border-color: var(--accent); transform: translateY(-2px); }
|
| 124 |
+
|
| 125 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 126 |
+
HERO
|
| 127 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 128 |
+
.hero {
|
| 129 |
+
text-align: center; padding: 80px 0 60px;
|
| 130 |
+
background: radial-gradient(ellipse at 50% 0%, rgba(88,166,255,.08) 0%, transparent 70%);
|
| 131 |
+
}
|
| 132 |
+
.hero h1 {
|
| 133 |
+
font-size: clamp(32px, 5vw, 52px); font-weight: 800;
|
| 134 |
+
letter-spacing: -1px; line-height: 1.15; margin-bottom: 16px;
|
| 135 |
+
}
|
| 136 |
+
.hero h1 span { color: var(--accent); }
|
| 137 |
+
.hero p.subtitle {
|
| 138 |
+
font-size: 18px; color: var(--text-muted); max-width: 620px; margin: 0 auto 28px;
|
| 139 |
+
}
|
| 140 |
+
.hero-badges { margin-bottom: 28px; display: flex; gap: 12px; justify-content: center; flex-wrap: wrap; }
|
| 141 |
+
.hero-cta { display: flex; gap: 12px; justify-content: center; flex-wrap: wrap; }
|
| 142 |
+
.hero-stats {
|
| 143 |
+
display: flex; gap: 36px; justify-content: center; margin-top: 40px; flex-wrap: wrap;
|
| 144 |
+
}
|
| 145 |
+
.hero-stat { text-align: center; }
|
| 146 |
+
.hero-stat .num { font-size: 28px; font-weight: 700; color: var(--accent); }
|
| 147 |
+
.hero-stat .label { font-size: 13px; color: var(--text-muted); }
|
| 148 |
+
|
| 149 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 150 |
+
LIVE ARENA
|
| 151 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 152 |
+
.arena { padding: 60px 0; }
|
| 153 |
+
.arena h2 { font-size: 28px; font-weight: 700; margin-bottom: 8px; }
|
| 154 |
+
.arena p.desc { color: var(--text-muted); margin-bottom: 24px; }
|
| 155 |
+
|
| 156 |
+
.arena-grid {
|
| 157 |
+
display: grid; grid-template-columns: 1fr 1fr; gap: 16px;
|
| 158 |
+
}
|
| 159 |
+
@media (max-width: 768px) { .arena-grid { grid-template-columns: 1fr; } }
|
| 160 |
+
|
| 161 |
+
.arena-panel {
|
| 162 |
+
background: var(--bg-editor); border: 1px solid var(--border);
|
| 163 |
+
border-radius: var(--radius); overflow: hidden;
|
| 164 |
+
}
|
| 165 |
+
.panel-header {
|
| 166 |
+
padding: 10px 16px; background: var(--bg-card);
|
| 167 |
+
border-bottom: 1px solid var(--border);
|
| 168 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 169 |
+
font-size: 13px; font-weight: 600; color: var(--text-muted);
|
| 170 |
+
}
|
| 171 |
+
.panel-body { padding: 0; }
|
| 172 |
+
.panel-body pre { margin: 0; padding: 16px; font-size: 13px; max-height: 320px; overflow: auto; }
|
| 173 |
+
.panel-body textarea {
|
| 174 |
+
width: 100%; min-height: 280px; padding: 16px;
|
| 175 |
+
background: var(--bg-editor); color: var(--text); border: none;
|
| 176 |
+
font-size: 13px; resize: vertical; outline: none;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
.arena-controls {
|
| 180 |
+
display: flex; gap: 12px; align-items: center; margin-top: 16px; flex-wrap: wrap;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
.task-select {
|
| 184 |
+
padding: 8px 14px; border-radius: 8px;
|
| 185 |
+
background: var(--bg-card); color: var(--text); border: 1px solid var(--border);
|
| 186 |
+
font-size: 14px; cursor: pointer;
|
| 187 |
+
}
|
| 188 |
+
.task-select option { background: var(--bg-card); }
|
| 189 |
+
|
| 190 |
+
/* ββ Test results βββββββββββββββββββββββββββββββββββββββββ */
|
| 191 |
+
.test-results { margin-top: 16px; }
|
| 192 |
+
.test-row {
|
| 193 |
+
display: flex; align-items: center; gap: 10px;
|
| 194 |
+
padding: 8px 14px; border-radius: 8px; margin-bottom: 6px;
|
| 195 |
+
font-size: 13px; opacity: 0; transform: translateX(-16px);
|
| 196 |
+
transition: all .4s ease-out;
|
| 197 |
+
}
|
| 198 |
+
.test-row.show { opacity: 1; transform: translateX(0); }
|
| 199 |
+
.test-row.pass { background: rgba(63,185,80,.1); border-left: 3px solid var(--green); }
|
| 200 |
+
.test-row.fail { background: rgba(248,81,73,.1); border-left: 3px solid var(--red); }
|
| 201 |
+
.test-icon { font-size: 16px; }
|
| 202 |
+
.test-name { flex: 1; }
|
| 203 |
+
.test-detail { color: var(--text-muted); font-size: 12px; }
|
| 204 |
+
|
| 205 |
+
/* ββ Reward display βββββββββββββββββββββββββββββββββββββββ */
|
| 206 |
+
.reward-display {
|
| 207 |
+
margin-top: 20px; padding: 16px 20px;
|
| 208 |
+
background: var(--bg-card); border: 1px solid var(--border); border-radius: var(--radius);
|
| 209 |
+
}
|
| 210 |
+
.reward-label { font-size: 13px; color: var(--text-muted); margin-bottom: 4px; }
|
| 211 |
+
.reward-value { font-size: 24px; font-weight: 700; }
|
| 212 |
+
|
| 213 |
+
/* ββ Terminal βββββββββββββββββββββββββββββββββββββββββββββ */
|
| 214 |
+
.terminal {
|
| 215 |
+
margin-top: 16px; background: #0a0a0a; border: 1px solid var(--border);
|
| 216 |
+
border-radius: var(--radius); overflow: hidden;
|
| 217 |
+
}
|
| 218 |
+
.terminal-header {
|
| 219 |
+
padding: 8px 16px; background: #1a1a1a; border-bottom: 1px solid #333;
|
| 220 |
+
font-size: 12px; color: var(--text-dim); display: flex; align-items: center; gap: 8px;
|
| 221 |
+
}
|
| 222 |
+
.terminal-dots { display: flex; gap: 6px; }
|
| 223 |
+
.terminal-dots span { width: 10px; height: 10px; border-radius: 50%; }
|
| 224 |
+
.terminal-dots .dot-r { background: #f85149; }
|
| 225 |
+
.terminal-dots .dot-y { background: #d29922; }
|
| 226 |
+
.terminal-dots .dot-g { background: #3fb950; }
|
| 227 |
+
.terminal-body {
|
| 228 |
+
padding: 14px 16px; font-size: 12px; color: var(--green);
|
| 229 |
+
max-height: 200px; overflow: auto; white-space: pre-wrap; line-height: 1.7;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 233 |
+
HOW IT WORKS
|
| 234 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 235 |
+
.how-it-works { padding: 60px 0; }
|
| 236 |
+
.how-it-works h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 40px; }
|
| 237 |
+
.steps {
|
| 238 |
+
display: grid; grid-template-columns: repeat(3, 1fr); gap: 24px;
|
| 239 |
+
}
|
| 240 |
+
@media (max-width: 768px) { .steps { grid-template-columns: 1fr; } }
|
| 241 |
+
.step { text-align: center; position: relative; }
|
| 242 |
+
.step-num {
|
| 243 |
+
width: 48px; height: 48px; border-radius: 50%;
|
| 244 |
+
background: rgba(88,166,255,.12); color: var(--accent);
|
| 245 |
+
display: flex; align-items: center; justify-content: center;
|
| 246 |
+
font-size: 20px; font-weight: 700; margin: 0 auto 16px;
|
| 247 |
+
}
|
| 248 |
+
.step h3 { font-size: 16px; margin-bottom: 8px; }
|
| 249 |
+
.step p { font-size: 14px; color: var(--text-muted); }
|
| 250 |
+
.step-arrow {
|
| 251 |
+
position: absolute; top: 24px; right: -18px;
|
| 252 |
+
color: var(--text-dim); font-size: 20px;
|
| 253 |
+
}
|
| 254 |
+
@media (max-width: 768px) { .step-arrow { display: none; } }
|
| 255 |
+
|
| 256 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 257 |
+
TASK EXPLORER
|
| 258 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 259 |
+
.task-explorer { padding: 60px 0; }
|
| 260 |
+
.task-explorer h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 12px; }
|
| 261 |
+
.task-explorer > .container > p { text-align: center; color: var(--text-muted); margin-bottom: 32px; }
|
| 262 |
+
.task-grid {
|
| 263 |
+
display: grid; grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); gap: 16px;
|
| 264 |
+
}
|
| 265 |
+
.task-card { cursor: pointer; }
|
| 266 |
+
.task-card h3 { font-size: 15px; margin: 12px 0 8px; }
|
| 267 |
+
.task-card p { font-size: 13px; color: var(--text-muted); }
|
| 268 |
+
.task-meta { display: flex; gap: 12px; margin-top: 12px; font-size: 12px; color: var(--text-dim); }
|
| 269 |
+
|
| 270 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 271 |
+
ARCHITECTURE
|
| 272 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 273 |
+
.architecture { padding: 60px 0; }
|
| 274 |
+
.architecture h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 32px; }
|
| 275 |
+
.arch-flow {
|
| 276 |
+
display: flex; align-items: center; justify-content: center;
|
| 277 |
+
gap: 0; flex-wrap: wrap; padding: 32px 0;
|
| 278 |
+
}
|
| 279 |
+
.arch-node {
|
| 280 |
+
padding: 16px 24px; border-radius: var(--radius);
|
| 281 |
+
background: var(--bg-card); border: 1px solid var(--border);
|
| 282 |
+
text-align: center; min-width: 130px;
|
| 283 |
+
}
|
| 284 |
+
.arch-node .icon { font-size: 24px; margin-bottom: 6px; }
|
| 285 |
+
.arch-node .name { font-size: 14px; font-weight: 600; }
|
| 286 |
+
.arch-node .desc { font-size: 11px; color: var(--text-muted); }
|
| 287 |
+
.arch-arrow { font-size: 20px; color: var(--text-dim); padding: 0 10px; }
|
| 288 |
+
@media (max-width: 600px) {
|
| 289 |
+
.arch-flow { flex-direction: column; }
|
| 290 |
+
.arch-arrow { transform: rotate(90deg); padding: 8px 0; }
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 294 |
+
METRICS
|
| 295 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 296 |
+
.metrics { padding: 60px 0; }
|
| 297 |
+
.metrics h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 32px; }
|
| 298 |
+
.metrics-grid {
|
| 299 |
+
display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px;
|
| 300 |
+
}
|
| 301 |
+
@media (max-width: 768px) { .metrics-grid { grid-template-columns: repeat(2, 1fr); } }
|
| 302 |
+
.metric-card {
|
| 303 |
+
background: var(--bg-card); border: 1px solid var(--border);
|
| 304 |
+
border-radius: var(--radius); padding: 24px; text-align: center;
|
| 305 |
+
}
|
| 306 |
+
.metric-card .num { font-size: 32px; font-weight: 800; margin-bottom: 4px; }
|
| 307 |
+
.metric-card .label { font-size: 13px; color: var(--text-muted); }
|
| 308 |
+
|
| 309 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 310 |
+
FOOTER
|
| 311 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 312 |
+
.footer {
|
| 313 |
+
padding: 40px 0; border-top: 1px solid var(--border); margin-top: 40px;
|
| 314 |
+
text-align: center; font-size: 14px; color: var(--text-muted);
|
| 315 |
+
}
|
| 316 |
+
.footer-links { display: flex; gap: 20px; justify-content: center; margin-bottom: 12px; }
|
| 317 |
+
</style>
|
| 318 |
+
</head>
|
| 319 |
+
<body>
|
| 320 |
+
|
| 321 |
+
<!-- βββ HERO βββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 322 |
+
<section class="hero">
|
| 323 |
+
<div class="container">
|
| 324 |
+
<div class="hero-badges">
|
| 325 |
+
<span class="badge badge-meta">Meta x PyTorch Hackathon</span>
|
| 326 |
+
<span class="badge badge-meta">OpenEnv Compatible</span>
|
| 327 |
+
</div>
|
| 328 |
+
<h1>AI-Powered <span>Code Debugging</span> Arena</h1>
|
| 329 |
+
<p class="subtitle">
|
| 330 |
+
A real-world OpenEnv environment where AI agents learn to fix broken Python code.
|
| 331 |
+
Submit buggy code, get instant fixes, and watch tests pass in real time.
|
| 332 |
+
</p>
|
| 333 |
+
<div class="hero-cta">
|
| 334 |
+
<a href="#arena" class="btn btn-primary">Try It Live</a>
|
| 335 |
+
<a href="/docs" class="btn btn-outline">API Docs</a>
|
| 336 |
+
</div>
|
| 337 |
+
<div class="hero-stats" id="hero-stats">
|
| 338 |
+
<div class="hero-stat">
|
| 339 |
+
<div class="num" id="stat-status"><span class="status-dot"></span> Live</div>
|
| 340 |
+
<div class="label">Environment Status</div>
|
| 341 |
+
</div>
|
| 342 |
+
<div class="hero-stat">
|
| 343 |
+
<div class="num" id="stat-tasks">-</div>
|
| 344 |
+
<div class="label">Debug Tasks</div>
|
| 345 |
+
</div>
|
| 346 |
+
<div class="hero-stat">
|
| 347 |
+
<div class="num">0.0 – 1.0</div>
|
| 348 |
+
<div class="label">Reward Range</div>
|
| 349 |
+
</div>
|
| 350 |
+
<div class="hero-stat">
|
| 351 |
+
<div class="num">3</div>
|
| 352 |
+
<div class="label">Difficulty Levels</div>
|
| 353 |
+
</div>
|
| 354 |
+
</div>
|
| 355 |
+
</div>
|
| 356 |
+
</section>
|
| 357 |
+
|
| 358 |
+
<!-- βββ LIVE ARENA βββββββββββββββββββββββββββββββββββββββββ -->
|
| 359 |
+
<section class="arena fade-in" id="arena">
|
| 360 |
+
<div class="container">
|
| 361 |
+
<h2>Live Debug Arena</h2>
|
| 362 |
+
<p class="desc">Pick a task, review the buggy code, write your fix, and submit. Watch tests execute in real time.</p>
|
| 363 |
+
|
| 364 |
+
<div class="arena-controls">
|
| 365 |
+
<select class="task-select" id="task-select"><option value="">Loading tasks...</option></select>
|
| 366 |
+
<button class="btn btn-primary" id="btn-reset" disabled>Load Task</button>
|
| 367 |
+
<button class="btn btn-green" id="btn-submit" disabled>Submit Fix</button>
|
| 368 |
+
<span id="step-info" class="mono" style="font-size:13px;color:var(--text-muted)"></span>
|
| 369 |
+
</div>
|
| 370 |
+
|
| 371 |
+
<div class="arena-grid" style="margin-top:16px">
|
| 372 |
+
<!-- Left: buggy code -->
|
| 373 |
+
<div class="arena-panel">
|
| 374 |
+
<div class="panel-header"><span>Buggy Code</span><span id="task-difficulty"></span></div>
|
| 375 |
+
<div class="panel-body"><pre><code class="language-python" id="buggy-code">Select a task and click "Load Task" to begin.</code></pre></div>
|
| 376 |
+
</div>
|
| 377 |
+
<!-- Right: editor -->
|
| 378 |
+
<div class="arena-panel">
|
| 379 |
+
<div class="panel-header"><span>Your Fix</span></div>
|
| 380 |
+
<div class="panel-body">
|
| 381 |
+
<textarea id="code-editor" placeholder="Paste or write your fixed code here..." spellcheck="false"></textarea>
|
| 382 |
+
</div>
|
| 383 |
+
</div>
|
| 384 |
+
</div>
|
| 385 |
+
|
| 386 |
+
<!-- Test results -->
|
| 387 |
+
<div class="test-results" id="test-results"></div>
|
| 388 |
+
|
| 389 |
+
<!-- Reward -->
|
| 390 |
+
<div class="reward-display" id="reward-display" style="display:none">
|
| 391 |
+
<div style="display:flex;align-items:center;justify-content:space-between">
|
| 392 |
+
<div>
|
| 393 |
+
<div class="reward-label">Reward Score</div>
|
| 394 |
+
<div class="reward-value" id="reward-value">0.00</div>
|
| 395 |
+
</div>
|
| 396 |
+
<div style="flex:1;margin-left:24px">
|
| 397 |
+
<div class="reward-label">Tests Passed: <span id="tests-summary">0/0</span></div>
|
| 398 |
+
<div class="progress-track"><div class="progress-fill" id="progress-fill" style="width:0"></div></div>
|
| 399 |
+
</div>
|
| 400 |
+
</div>
|
| 401 |
+
</div>
|
| 402 |
+
|
| 403 |
+
<!-- Terminal -->
|
| 404 |
+
<div class="terminal" id="terminal" style="display:none">
|
| 405 |
+
<div class="terminal-header">
|
| 406 |
+
<div class="terminal-dots"><span class="dot-r"></span><span class="dot-y"></span><span class="dot-g"></span></div>
|
| 407 |
+
<span>Structured Logs</span>
|
| 408 |
+
</div>
|
| 409 |
+
<div class="terminal-body mono" id="terminal-body"></div>
|
| 410 |
+
</div>
|
| 411 |
+
</div>
|
| 412 |
+
</section>
|
| 413 |
+
|
| 414 |
+
<!-- βββ HOW IT WORKS ββββββββββββββββββββββββββββββββββββββ= -->
|
| 415 |
+
<section class="how-it-works fade-in">
|
| 416 |
+
<div class="container">
|
| 417 |
+
<h2>How It Works</h2>
|
| 418 |
+
<div class="steps">
|
| 419 |
+
<div class="step">
|
| 420 |
+
<div class="step-num">1</div>
|
| 421 |
+
<h3>Reset Environment</h3>
|
| 422 |
+
<p>The agent receives buggy Python code and descriptions of the tests it must pass.</p>
|
| 423 |
+
<span class="step-arrow">→</span>
|
| 424 |
+
</div>
|
| 425 |
+
<div class="step">
|
| 426 |
+
<div class="step-num">2</div>
|
| 427 |
+
<h3>Submit a Fix</h3>
|
| 428 |
+
<p>The agent analyzes the code, identifies the bug, and submits corrected code via <code>step()</code>.</p>
|
| 429 |
+
<span class="step-arrow">→</span>
|
| 430 |
+
</div>
|
| 431 |
+
<div class="step">
|
| 432 |
+
<div class="step-num">3</div>
|
| 433 |
+
<h3>Earn Reward</h3>
|
| 434 |
+
<p>Tests run in a sandboxed subprocess. Reward = fraction passing (0.0–1.0). Iterate until all pass.</p>
|
| 435 |
+
</div>
|
| 436 |
+
</div>
|
| 437 |
+
</div>
|
| 438 |
+
</section>
|
| 439 |
+
|
| 440 |
+
<!-- βββ TASK EXPLORER βββββββββββββββββββββββββββββββββββββ= -->
|
| 441 |
+
<section class="task-explorer fade-in">
|
| 442 |
+
<div class="container">
|
| 443 |
+
<h2>Task Explorer</h2>
|
| 444 |
+
<p>6 real-world debugging challenges across 3 difficulty levels.</p>
|
| 445 |
+
<div class="task-grid" id="task-grid"></div>
|
| 446 |
+
</div>
|
| 447 |
+
</section>
|
| 448 |
+
|
| 449 |
+
<!-- βββ ARCHITECTURE ββββββββββββββββββββββββββββββββββββββ= -->
|
| 450 |
+
<section class="architecture fade-in">
|
| 451 |
+
<div class="container">
|
| 452 |
+
<h2>Architecture</h2>
|
| 453 |
+
<div class="arch-flow">
|
| 454 |
+
<div class="arch-node">
|
| 455 |
+
<div class="icon">🤖</div>
|
| 456 |
+
<div class="name">AI Agent</div>
|
| 457 |
+
<div class="desc">LLM via OpenAI Client</div>
|
| 458 |
+
</div>
|
| 459 |
+
<div class="arch-arrow">→</div>
|
| 460 |
+
<div class="arch-node" style="border-color:var(--accent)">
|
| 461 |
+
<div class="icon">⚙</div>
|
| 462 |
+
<div class="name">step(code)</div>
|
| 463 |
+
<div class="desc">OpenEnv API</div>
|
| 464 |
+
</div>
|
| 465 |
+
<div class="arch-arrow">→</div>
|
| 466 |
+
<div class="arch-node">
|
| 467 |
+
<div class="icon">🛠</div>
|
| 468 |
+
<div class="name">Executor</div>
|
| 469 |
+
<div class="desc">Subprocess + Timeout</div>
|
| 470 |
+
</div>
|
| 471 |
+
<div class="arch-arrow">→</div>
|
| 472 |
+
<div class="arch-node">
|
| 473 |
+
<div class="icon">✅</div>
|
| 474 |
+
<div class="name">Tests</div>
|
| 475 |
+
<div class="desc">Pass / Fail</div>
|
| 476 |
+
</div>
|
| 477 |
+
<div class="arch-arrow">→</div>
|
| 478 |
+
<div class="arch-node" style="border-color:var(--green)">
|
| 479 |
+
<div class="icon">🏆</div>
|
| 480 |
+
<div class="name">Reward</div>
|
| 481 |
+
<div class="desc">0.0 – 1.0</div>
|
| 482 |
+
</div>
|
| 483 |
+
</div>
|
| 484 |
+
<div style="text-align:center;margin-top:20px">
|
| 485 |
+
<code class="mono" style="font-size:13px;color:var(--text-muted)">
|
| 486 |
+
reset() → Observation | step(action) → StepResult | state() → State
|
| 487 |
+
</code>
|
| 488 |
+
</div>
|
| 489 |
+
</div>
|
| 490 |
+
</section>
|
| 491 |
+
|
| 492 |
+
<!-- βββ METRICS βββββββββββββββββββββββββββββββββββββββββββ= -->
|
| 493 |
+
<section class="metrics fade-in">
|
| 494 |
+
<div class="container">
|
| 495 |
+
<h2>Performance</h2>
|
| 496 |
+
<div class="metrics-grid">
|
| 497 |
+
<div class="metric-card">
|
| 498 |
+
<div class="num" style="color:var(--green)">6/6</div>
|
| 499 |
+
<div class="label">Tasks Solved by Baseline</div>
|
| 500 |
+
</div>
|
| 501 |
+
<div class="metric-card">
|
| 502 |
+
<div class="num" style="color:var(--accent)">1.000</div>
|
| 503 |
+
<div class="label">Average Score</div>
|
| 504 |
+
</div>
|
| 505 |
+
<div class="metric-card">
|
| 506 |
+
<div class="num" style="color:var(--purple)">< 2s</div>
|
| 507 |
+
<div class="label">Avg Response Time</div>
|
| 508 |
+
</div>
|
| 509 |
+
<div class="metric-card">
|
| 510 |
+
<div class="num" style="color:var(--orange)">100%</div>
|
| 511 |
+
<div class="label">OpenEnv Spec Compliant</div>
|
| 512 |
+
</div>
|
| 513 |
+
</div>
|
| 514 |
+
</div>
|
| 515 |
+
</section>
|
| 516 |
+
|
| 517 |
+
<!-- βββ FOOTER ββββββββββββββββββββββββββββββββββββββββββββ= -->
|
| 518 |
+
<footer class="footer">
|
| 519 |
+
<div class="container">
|
| 520 |
+
<div class="footer-links">
|
| 521 |
+
<a href="https://github.com/arnavzz/openenv-code-debugger" target="_blank">GitHub</a>
|
| 522 |
+
<a href="/docs" target="_blank">API Docs</a>
|
| 523 |
+
<a href="/health" target="_blank">Health Check</a>
|
| 524 |
+
</div>
|
| 525 |
+
<p>Built for the Meta x PyTorch OpenEnv Hackathon 2026</p>
|
| 526 |
+
</div>
|
| 527 |
+
</footer>
|
| 528 |
+
|
| 529 |
+
<!-- βββ SCRIPTS βββββββββββββββββββββββββββββββββββββββββββ= -->
|
| 530 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/prism.min.js"></script>
|
| 531 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/components/prism-python.min.js"></script>
|
| 532 |
+
<script>
|
| 533 |
+
(function() {
|
| 534 |
+
const API = ''; // same origin
|
| 535 |
+
let episodeId = null;
|
| 536 |
+
let currentTask = null;
|
| 537 |
+
let stepCount = 0;
|
| 538 |
+
let logs = [];
|
| 539 |
+
|
| 540 |
+
const $ = s => document.querySelector(s);
|
| 541 |
+
const $$ = s => document.querySelectorAll(s);
|
| 542 |
+
|
| 543 |
+
// ββ Fade-in observer βββββββββββββββββββββββββββββββββββββ
|
| 544 |
+
const obs = new IntersectionObserver(entries => {
|
| 545 |
+
entries.forEach(e => { if (e.isIntersecting) { e.target.classList.add('visible'); obs.unobserve(e.target); } });
|
| 546 |
+
}, { threshold: 0.1 });
|
| 547 |
+
$$('.fade-in').forEach(el => obs.observe(el));
|
| 548 |
+
|
| 549 |
+
// ββ Load tasks βββββββββββββββββββββββββββββββββββββββββββ
|
| 550 |
+
async function loadTasks() {
|
| 551 |
+
try {
|
| 552 |
+
const res = await fetch(API + '/tasks');
|
| 553 |
+
const tasks = await res.json();
|
| 554 |
+
$('#stat-tasks').textContent = tasks.length;
|
| 555 |
+
renderTaskSelect(tasks);
|
| 556 |
+
renderTaskGrid(tasks);
|
| 557 |
+
} catch(e) {
|
| 558 |
+
console.error('Failed to load tasks:', e);
|
| 559 |
+
}
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
function renderTaskSelect(tasks) {
|
| 563 |
+
const sel = $('#task-select');
|
| 564 |
+
sel.innerHTML = '<option value="">-- Select a task --</option>';
|
| 565 |
+
tasks.forEach(t => {
|
| 566 |
+
sel.innerHTML += `<option value="${t.task_id}">[${t.difficulty.toUpperCase()}] ${t.description.slice(0,60)}</option>`;
|
| 567 |
+
});
|
| 568 |
+
$('#btn-reset').disabled = false;
|
| 569 |
+
}
|
| 570 |
+
|
| 571 |
+
function renderTaskGrid(tasks) {
|
| 572 |
+
const grid = $('#task-grid');
|
| 573 |
+
const order = {easy:0,medium:1,hard:2};
|
| 574 |
+
tasks.sort((a,b) => order[a.difficulty] - order[b.difficulty]);
|
| 575 |
+
grid.innerHTML = tasks.map(t => `
|
| 576 |
+
<div class="card task-card" data-id="${t.task_id}">
|
| 577 |
+
<span class="badge badge-${t.difficulty}">${t.difficulty}</span>
|
| 578 |
+
<h3>${t.task_id.replace(/^task_\d+_/, '').replace(/_/g,' ')}</h3>
|
| 579 |
+
<p>${t.description}</p>
|
| 580 |
+
<div class="task-meta">
|
| 581 |
+
<span>Max steps: ${t.max_steps}</span>
|
| 582 |
+
<span>Tests: ${t.total_tests}</span>
|
| 583 |
+
</div>
|
| 584 |
+
</div>
|
| 585 |
+
`).join('');
|
| 586 |
+
|
| 587 |
+
$$('.task-card').forEach(card => {
|
| 588 |
+
card.addEventListener('click', () => {
|
| 589 |
+
$('#task-select').value = card.dataset.id;
|
| 590 |
+
resetEnv();
|
| 591 |
+
document.getElementById('arena').scrollIntoView({ behavior: 'smooth' });
|
| 592 |
+
});
|
| 593 |
+
});
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
// ββ Reset environment ββββββββββββββββββββββββββββββββββββ
|
| 597 |
+
async function resetEnv() {
|
| 598 |
+
const taskId = $('#task-select').value;
|
| 599 |
+
if (!taskId) return;
|
| 600 |
+
$('#btn-reset').disabled = true;
|
| 601 |
+
$('#btn-submit').disabled = true;
|
| 602 |
+
clearResults();
|
| 603 |
+
|
| 604 |
+
try {
|
| 605 |
+
const res = await fetch(API + '/reset', {
|
| 606 |
+
method: 'POST',
|
| 607 |
+
headers: {'Content-Type':'application/json'},
|
| 608 |
+
body: JSON.stringify({ task_id: taskId })
|
| 609 |
+
});
|
| 610 |
+
const data = await res.json();
|
| 611 |
+
episodeId = data.episode_id;
|
| 612 |
+
currentTask = data.observation;
|
| 613 |
+
stepCount = 0;
|
| 614 |
+
logs = [];
|
| 615 |
+
|
| 616 |
+
// Render buggy code
|
| 617 |
+
const codeEl = $('#buggy-code');
|
| 618 |
+
codeEl.textContent = currentTask.buggy_code;
|
| 619 |
+
Prism.highlightElement(codeEl);
|
| 620 |
+
|
| 621 |
+
// Set difficulty badge
|
| 622 |
+
const d = currentTask.difficulty;
|
| 623 |
+
$('#task-difficulty').innerHTML = `<span class="badge badge-${d}">${d}</span>`;
|
| 624 |
+
|
| 625 |
+
// Pre-fill editor with buggy code
|
| 626 |
+
$('#code-editor').value = currentTask.buggy_code;
|
| 627 |
+
$('#btn-submit').disabled = false;
|
| 628 |
+
$('#step-info').textContent = `Step 0/${currentTask.max_steps}`;
|
| 629 |
+
|
| 630 |
+
// Log [START]
|
| 631 |
+
addLog(`[START] task=${taskId} env=arena model=interactive`);
|
| 632 |
+
|
| 633 |
+
} catch(e) {
|
| 634 |
+
console.error('Reset failed:', e);
|
| 635 |
+
}
|
| 636 |
+
$('#btn-reset').disabled = false;
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
// ββ Submit fix βββββββββββββββββββββββββββββββββββββββββββ
|
| 640 |
+
async function submitFix() {
|
| 641 |
+
if (!episodeId) return;
|
| 642 |
+
const code = $('#code-editor').value;
|
| 643 |
+
if (!code.trim()) return;
|
| 644 |
+
$('#btn-submit').disabled = true;
|
| 645 |
+
|
| 646 |
+
try {
|
| 647 |
+
const res = await fetch(API + `/step/${episodeId}`, {
|
| 648 |
+
method: 'POST',
|
| 649 |
+
headers: {'Content-Type':'application/json'},
|
| 650 |
+
body: JSON.stringify({ action: { code } })
|
| 651 |
+
});
|
| 652 |
+
const data = await res.json();
|
| 653 |
+
const obs = data.observation;
|
| 654 |
+
stepCount = obs.step_count;
|
| 655 |
+
|
| 656 |
+
$('#step-info').textContent = `Step ${stepCount}/${obs.max_steps}`;
|
| 657 |
+
|
| 658 |
+
// Animate test results
|
| 659 |
+
renderTestResults(obs.test_results);
|
| 660 |
+
|
| 661 |
+
// Show reward
|
| 662 |
+
showReward(data.reward, obs.tests_passed, obs.total_tests);
|
| 663 |
+
|
| 664 |
+
// Log
|
| 665 |
+
const actionShort = code.replace(/\n/g, '\\n').slice(0, 60);
|
| 666 |
+
addLog(`[STEP] step=${stepCount} action="${actionShort}..." reward=${data.reward.toFixed(2)} done=${data.done} error=null`);
|
| 667 |
+
|
| 668 |
+
if (data.done) {
|
| 669 |
+
addLog(`[END] success=${data.reward === 1.0} steps=${stepCount} score=${data.reward.toFixed(3)}`);
|
| 670 |
+
if (data.reward === 1.0) {
|
| 671 |
+
$('#step-info').innerHTML = `<span style="color:var(--green)">✓ All tests passed!</span>`;
|
| 672 |
+
} else {
|
| 673 |
+
$('#step-info').innerHTML = `<span style="color:var(--red)">Episode ended (max steps reached)</span>`;
|
| 674 |
+
}
|
| 675 |
+
} else {
|
| 676 |
+
$('#btn-submit').disabled = false;
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
} catch(e) {
|
| 680 |
+
console.error('Step failed:', e);
|
| 681 |
+
$('#btn-submit').disabled = false;
|
| 682 |
+
}
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
// ββ Render test results with stagger animation βββββββββββ
|
| 686 |
+
function renderTestResults(results) {
|
| 687 |
+
const container = $('#test-results');
|
| 688 |
+
container.innerHTML = '';
|
| 689 |
+
results.forEach((t, i) => {
|
| 690 |
+
const row = document.createElement('div');
|
| 691 |
+
row.className = `test-row ${t.passed ? 'pass' : 'fail'}`;
|
| 692 |
+
row.innerHTML = `
|
| 693 |
+
<span class="test-icon">${t.passed ? '✓' : '✗'}</span>
|
| 694 |
+
<span class="test-name mono">${t.test_name}</span>
|
| 695 |
+
${!t.passed ? `<span class="test-detail">expected: ${t.expected} | actual: ${t.actual}</span>` : ''}
|
| 696 |
+
`;
|
| 697 |
+
container.appendChild(row);
|
| 698 |
+
setTimeout(() => row.classList.add('show'), 100 + i * 120);
|
| 699 |
+
});
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
// ββ Show reward βββοΏ½οΏ½οΏ½ββββββββββββββββββββββββββββββββββββββ
|
| 703 |
+
function showReward(reward, passed, total) {
|
| 704 |
+
const display = $('#reward-display');
|
| 705 |
+
display.style.display = 'block';
|
| 706 |
+
const pct = Math.round(reward * 100);
|
| 707 |
+
$('#reward-value').textContent = reward.toFixed(2);
|
| 708 |
+
$('#reward-value').style.color = reward === 1.0 ? 'var(--green)' : reward > 0 ? 'var(--orange)' : 'var(--red)';
|
| 709 |
+
$('#tests-summary').textContent = `${passed}/${total}`;
|
| 710 |
+
const fill = $('#progress-fill');
|
| 711 |
+
fill.style.width = '0';
|
| 712 |
+
requestAnimationFrame(() => { fill.style.width = pct + '%'; });
|
| 713 |
+
}
|
| 714 |
+
|
| 715 |
+
// ββ Terminal log βββββββββββββββββββββββββββββββββββββββββ
|
| 716 |
+
function addLog(line) {
|
| 717 |
+
logs.push(line);
|
| 718 |
+
const term = $('#terminal');
|
| 719 |
+
term.style.display = 'block';
|
| 720 |
+
const body = $('#terminal-body');
|
| 721 |
+
body.innerHTML = logs.map(l => {
|
| 722 |
+
if (l.startsWith('[START]')) return `<span style="color:var(--accent)">${esc(l)}</span>`;
|
| 723 |
+
if (l.startsWith('[STEP]')) return `<span style="color:var(--text)">${esc(l)}</span>`;
|
| 724 |
+
if (l.startsWith('[END]')) return `<span style="color:${l.includes('success=true')?'var(--green)':'var(--red)'}">${esc(l)}</span>`;
|
| 725 |
+
return esc(l);
|
| 726 |
+
}).join('\n') + ' <span class="cursor"></span>';
|
| 727 |
+
body.scrollTop = body.scrollHeight;
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
function esc(s) { return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>'); }
|
| 731 |
+
|
| 732 |
+
function clearResults() {
|
| 733 |
+
$('#test-results').innerHTML = '';
|
| 734 |
+
$('#reward-display').style.display = 'none';
|
| 735 |
+
$('#terminal').style.display = 'none';
|
| 736 |
+
logs = [];
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
// ββ Event listeners ββββββββββββββββββββββββββββββββββββββ
|
| 740 |
+
$('#btn-reset').addEventListener('click', resetEnv);
|
| 741 |
+
$('#btn-submit').addEventListener('click', submitFix);
|
| 742 |
+
|
| 743 |
+
// ββ Init βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 744 |
+
loadTasks();
|
| 745 |
+
})();
|
| 746 |
+
</script>
|
| 747 |
+
</body>
|
| 748 |
+
</html>
|