Spaces:
Running
Running
Nitish commited on
Commit ·
1af53e9
1
Parent(s): 0d83deb
feat: add mac-style window and tabbed navigation
Browse files- static/index.html +109 -69
- static/main.js +25 -1
- static/style.css +94 -2
static/index.html
CHANGED
|
@@ -21,89 +21,129 @@
|
|
| 21 |
<p>Interactive baseline evaluation for AI Agents.</p>
|
| 22 |
</header>
|
| 23 |
|
| 24 |
-
<div class="
|
| 25 |
-
<
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
<
|
| 29 |
-
<
|
| 30 |
-
<span id="badge-difficulty" class="badge">Loading...</span>
|
| 31 |
-
<span id="badge-step" class="badge">Step 0/0</span>
|
| 32 |
-
</div>
|
| 33 |
</div>
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
<
|
|
|
|
| 37 |
</div>
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
</div>
|
| 47 |
-
<pre><code id="code-snippet" class="language-python"># Awaiting initialization...</code></pre>
|
| 48 |
-
</div>
|
| 49 |
-
</section>
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
<h2>Agent Action</h2>
|
| 55 |
-
</div>
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
<
|
| 63 |
-
</
|
| 64 |
-
</div>
|
| 65 |
|
| 66 |
-
|
| 67 |
-
<
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<option value="security-vulnerability">Security Vulnerability</option>
|
| 72 |
-
<option value="null-dereference">Null Dereference</option>
|
| 73 |
-
<option value="none">None</option>
|
| 74 |
-
</select>
|
| 75 |
-
</div>
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
</select>
|
| 86 |
-
</div>
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
</div>
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
</div>
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
<
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
</div>
|
| 108 |
|
| 109 |
<!-- Sticky Status Toast -->
|
|
|
|
| 21 |
<p>Interactive baseline evaluation for AI Agents.</p>
|
| 22 |
</header>
|
| 23 |
|
| 24 |
+
<div class="mac-window">
|
| 25 |
+
<div class="mac-title-bar">
|
| 26 |
+
<div class="mac-dots">
|
| 27 |
+
<span class="dot red"></span>
|
| 28 |
+
<span class="dot yellow"></span>
|
| 29 |
+
<span class="dot green"></span>
|
|
|
|
|
|
|
|
|
|
| 30 |
</div>
|
| 31 |
+
<div class="mac-tabs">
|
| 32 |
+
<button class="mac-tab active" data-tab="playground">Playground</button>
|
| 33 |
+
<button class="mac-tab" data-tab="details">Model Details</button>
|
| 34 |
+
<button class="mac-tab" data-tab="specs">API Specs</button>
|
| 35 |
</div>
|
| 36 |
+
</div>
|
| 37 |
|
| 38 |
+
<div class="window-content">
|
| 39 |
+
<div id="tab-playground" class="tab-pane active">
|
| 40 |
+
<div class="dashboard">
|
| 41 |
+
<!-- Left Column: Environment Observation -->
|
| 42 |
+
<section class="panel observation-panel" id="observation-section">
|
| 43 |
+
<div class="panel-header">
|
| 44 |
+
<h2>Environment State</h2>
|
| 45 |
+
<div class="badge-row">
|
| 46 |
+
<span id="badge-difficulty" class="badge">Loading...</span>
|
| 47 |
+
<span id="badge-step" class="badge">Step 0/0</span>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
|
| 51 |
+
<div class="task-info">
|
| 52 |
+
<strong>Task:</strong> <span id="task-description">Initializing environment...</span>
|
| 53 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
<div id="feedback-container" class="feedback-info hidden">
|
| 56 |
+
<strong>Previous Feedback:</strong> <span id="previous-feedback"></span>
|
| 57 |
+
</div>
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
<div class="code-container">
|
| 60 |
+
<div class="code-header">
|
| 61 |
+
<span id="lang-badge">Language: Unknown</span>
|
| 62 |
+
</div>
|
| 63 |
+
<pre><code id="code-snippet" class="language-python"># Awaiting initialization...</code></pre>
|
| 64 |
+
</div>
|
| 65 |
+
</section>
|
|
|
|
| 66 |
|
| 67 |
+
<!-- Right Column: Agent Action Form -->
|
| 68 |
+
<section class="panel action-panel" id="action-section">
|
| 69 |
+
<div class="panel-header">
|
| 70 |
+
<h2>Agent Action</h2>
|
| 71 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
<form id="action-form">
|
| 74 |
+
<div class="form-group toggle-group">
|
| 75 |
+
<label for="input-bug-identified">Bug Identified</label>
|
| 76 |
+
<select id="input-bug-identified" required>
|
| 77 |
+
<option value="true" selected>Yes</option>
|
| 78 |
+
<option value="false">No</option>
|
| 79 |
+
</select>
|
| 80 |
+
</div>
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
<div class="form-group">
|
| 83 |
+
<label for="input-bug-type">Bug Type</label>
|
| 84 |
+
<select id="input-bug-type" required>
|
| 85 |
+
<option value="off-by-one">Off-by-one</option>
|
| 86 |
+
<option value="logic-error">Logic Error</option>
|
| 87 |
+
<option value="security-vulnerability">Security Vulnerability</option>
|
| 88 |
+
<option value="null-dereference">Null Dereference</option>
|
| 89 |
+
<option value="none">None</option>
|
| 90 |
+
</select>
|
| 91 |
+
</div>
|
| 92 |
+
|
| 93 |
+
<div class="form-group">
|
| 94 |
+
<label for="input-severity">Severity</label>
|
| 95 |
+
<select id="input-severity" required>
|
| 96 |
+
<option value="none">None</option>
|
| 97 |
+
<option value="low">Low</option>
|
| 98 |
+
<option value="medium">Medium</option>
|
| 99 |
+
<option value="high">High</option>
|
| 100 |
+
<option value="critical">Critical</option>
|
| 101 |
+
</select>
|
| 102 |
+
</div>
|
| 103 |
+
|
| 104 |
+
<div class="form-group">
|
| 105 |
+
<label for="input-bug-location">Bug Location</label>
|
| 106 |
+
<input type="text" id="input-bug-location" placeholder="e.g., fetch_records() line 4" required>
|
| 107 |
+
</div>
|
| 108 |
+
|
| 109 |
+
<div class="form-group">
|
| 110 |
+
<label for="input-bug-description">Description</label>
|
| 111 |
+
<textarea id="input-bug-description" rows="3" placeholder="Explain the vulnerability..." required></textarea>
|
| 112 |
+
</div>
|
| 113 |
+
|
| 114 |
+
<div class="form-group">
|
| 115 |
+
<label for="input-suggested-fix">Suggested Fix</label>
|
| 116 |
+
<textarea id="input-suggested-fix" rows="3" placeholder="Provide corrected code or explanation..." required></textarea>
|
| 117 |
+
</div>
|
| 118 |
|
| 119 |
+
<button type="submit" id="btn-submit-action" class="primary-btn">Submit Action</button>
|
| 120 |
+
<button type="button" id="btn-reset-env" class="secondary-btn">Reset Environment</button>
|
| 121 |
+
</form>
|
| 122 |
+
</section>
|
| 123 |
</div>
|
| 124 |
+
</div>
|
| 125 |
|
| 126 |
+
<div id="tab-details" class="tab-pane">
|
| 127 |
+
<div class="panel">
|
| 128 |
+
<h2>Model Details</h2>
|
| 129 |
+
<p style="margin-top: 1rem;">OpenEnv is an RL environment designed for security validation. This baseline uses standard reward signals to calibrate agents.</p>
|
| 130 |
+
<ul style="margin-top: 1rem; color: var(--text-muted); list-style-position: inside;">
|
| 131 |
+
<li>Deterministic Reward Signals</li>
|
| 132 |
+
<li>Multi-step Episode Support</li>
|
| 133 |
+
<li>Security-focused Task Sets</li>
|
| 134 |
+
</ul>
|
| 135 |
</div>
|
| 136 |
+
</div>
|
| 137 |
|
| 138 |
+
<div id="tab-specs" class="tab-pane">
|
| 139 |
+
<div class="panel">
|
| 140 |
+
<h2>API Specifications</h2>
|
| 141 |
+
<pre style="margin-top: 1rem; background: #000; padding: 1rem; border-radius: 4px;">POST /reset?difficulty={easy|medium|hard}
|
| 142 |
+
POST /step {bug_identified, bug_type, ...}
|
| 143 |
+
GET /state</pre>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
</div>
|
| 148 |
|
| 149 |
<!-- Sticky Status Toast -->
|
static/main.js
CHANGED
|
@@ -24,11 +24,35 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 24 |
inputSeverity: document.getElementById('input-severity'),
|
| 25 |
inputBugLocation: document.getElementById('input-bug-location'),
|
| 26 |
inputBugDescription: document.getElementById('input-bug-description'),
|
| 27 |
-
inputSuggestedFix: document.getElementById('input-suggested-fix')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
};
|
| 29 |
|
| 30 |
let isDone = false;
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
// Initialize Environment
|
| 33 |
async function resetEnvironment(difficulty = 'easy') {
|
| 34 |
elements.submitBtn.disabled = true;
|
|
|
|
| 24 |
inputSeverity: document.getElementById('input-severity'),
|
| 25 |
inputBugLocation: document.getElementById('input-bug-location'),
|
| 26 |
inputBugDescription: document.getElementById('input-bug-description'),
|
| 27 |
+
inputSuggestedFix: document.getElementById('input-suggested-fix'),
|
| 28 |
+
|
| 29 |
+
// Tab elements
|
| 30 |
+
tabs: document.querySelectorAll('.mac-tab'),
|
| 31 |
+
panes: document.querySelectorAll('.tab-pane')
|
| 32 |
};
|
| 33 |
|
| 34 |
let isDone = false;
|
| 35 |
|
| 36 |
+
// Tab Switching Logic
|
| 37 |
+
elements.tabs.forEach(tab => {
|
| 38 |
+
tab.addEventListener('click', () => {
|
| 39 |
+
const target = tab.getAttribute('data-tab');
|
| 40 |
+
|
| 41 |
+
// Update tabs
|
| 42 |
+
elements.tabs.forEach(t => t.classList.remove('active'));
|
| 43 |
+
tab.classList.add('active');
|
| 44 |
+
|
| 45 |
+
// Update panes
|
| 46 |
+
elements.panes.forEach(pane => {
|
| 47 |
+
if (pane.id === `tab-${target}`) {
|
| 48 |
+
pane.classList.add('active');
|
| 49 |
+
} else {
|
| 50 |
+
pane.classList.remove('active');
|
| 51 |
+
}
|
| 52 |
+
});
|
| 53 |
+
});
|
| 54 |
+
});
|
| 55 |
+
|
| 56 |
// Initialize Environment
|
| 57 |
async function resetEnvironment(difficulty = 'easy') {
|
| 58 |
elements.submitBtn.disabled = true;
|
static/style.css
CHANGED
|
@@ -55,6 +55,98 @@ header {
|
|
| 55 |
border-bottom: 1px solid var(--border-card);
|
| 56 |
}
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
h1 {
|
| 59 |
font-size: 2.25rem;
|
| 60 |
font-weight: 700;
|
|
@@ -80,11 +172,11 @@ p {
|
|
| 80 |
}
|
| 81 |
|
| 82 |
.panel {
|
| 83 |
-
background:
|
| 84 |
border: 1px solid var(--border-card);
|
| 85 |
border-radius: 8px;
|
| 86 |
padding: 1.75rem;
|
| 87 |
-
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.5)
|
| 88 |
}
|
| 89 |
|
| 90 |
.dashboard {
|
|
|
|
| 55 |
border-bottom: 1px solid var(--border-card);
|
| 56 |
}
|
| 57 |
|
| 58 |
+
h1 {
|
| 59 |
+
font-size: 2.25rem;
|
| 60 |
+
font-weight: 700;
|
| 61 |
+
letter-spacing: -0.02em;
|
| 62 |
+
color: var(--text-main);
|
| 63 |
+
margin-bottom: 0.5rem;
|
| 64 |
+
text-align: center;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
/* Mac Window Styling */
|
| 68 |
+
.mac-window {
|
| 69 |
+
background: var(--bg-card);
|
| 70 |
+
border: 1px solid var(--border-card);
|
| 71 |
+
border-radius: 12px;
|
| 72 |
+
overflow: hidden;
|
| 73 |
+
box-shadow: 0 20px 50px rgba(0, 0, 0, 0.5);
|
| 74 |
+
margin-top: 1rem;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
.mac-title-bar {
|
| 78 |
+
background: #1a1a1a;
|
| 79 |
+
height: 44px;
|
| 80 |
+
display: flex;
|
| 81 |
+
align-items: center;
|
| 82 |
+
padding: 0 16px;
|
| 83 |
+
border-bottom: 1px solid var(--border-card);
|
| 84 |
+
position: relative;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.mac-dots {
|
| 88 |
+
display: flex;
|
| 89 |
+
gap: 8px;
|
| 90 |
+
position: absolute;
|
| 91 |
+
left: 16px;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.dot {
|
| 95 |
+
width: 12px;
|
| 96 |
+
height: 12px;
|
| 97 |
+
border-radius: 50%;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
.dot.red { background: #ff5f57; }
|
| 101 |
+
.dot.yellow { background: #febc2e; }
|
| 102 |
+
.dot.green { background: #28c840; }
|
| 103 |
+
|
| 104 |
+
.mac-tabs {
|
| 105 |
+
display: flex;
|
| 106 |
+
margin: 0 auto;
|
| 107 |
+
background: #000;
|
| 108 |
+
border-radius: 6px;
|
| 109 |
+
padding: 2px;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.mac-tab {
|
| 113 |
+
background: transparent;
|
| 114 |
+
border: none;
|
| 115 |
+
color: var(--text-muted);
|
| 116 |
+
padding: 6px 16px;
|
| 117 |
+
font-size: 0.85rem;
|
| 118 |
+
font-weight: 500;
|
| 119 |
+
cursor: pointer;
|
| 120 |
+
border-radius: 4px;
|
| 121 |
+
transition: all 0.2s;
|
| 122 |
+
width: auto;
|
| 123 |
+
margin-bottom: 0;
|
| 124 |
+
text-transform: none;
|
| 125 |
+
letter-spacing: normal;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.mac-tab:hover {
|
| 129 |
+
color: var(--text-main);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.mac-tab.active {
|
| 133 |
+
background: var(--bg-input);
|
| 134 |
+
color: var(--accent-primary);
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.window-content {
|
| 138 |
+
padding: 2rem;
|
| 139 |
+
min-height: 500px;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.tab-pane {
|
| 143 |
+
display: none;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.tab-pane.active {
|
| 147 |
+
display: block;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
h1 {
|
| 151 |
font-size: 2.25rem;
|
| 152 |
font-weight: 700;
|
|
|
|
| 172 |
}
|
| 173 |
|
| 174 |
.panel {
|
| 175 |
+
background: #1a1a1b;
|
| 176 |
border: 1px solid var(--border-card);
|
| 177 |
border-radius: 8px;
|
| 178 |
padding: 1.75rem;
|
| 179 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.5);
|
| 180 |
}
|
| 181 |
|
| 182 |
.dashboard {
|