Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- server/app.py +84 -40
- server/env/environment.py +0 -1
- server/env/graders.py +8 -1
- server/static/app.js +120 -20
server/app.py
CHANGED
|
@@ -14,7 +14,7 @@ Key Features:
|
|
| 14 |
|
| 15 |
import os
|
| 16 |
import json
|
| 17 |
-
from typing import Dict, Any
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
|
| 20 |
load_dotenv()
|
|
@@ -46,23 +46,67 @@ STATIC_DIR = os.path.join(BASE_DIR, "static")
|
|
| 46 |
if os.path.exists(STATIC_DIR):
|
| 47 |
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 48 |
|
| 49 |
-
@app.get("/", response_class=HTMLResponse, tags=["UI"])
|
| 50 |
-
async def read_dashboard():
|
| 51 |
-
"""Serves the ContentGuard monitoring dashboard."""
|
| 52 |
-
index_path = os.path.join(STATIC_DIR, "index.html")
|
| 53 |
-
if os.path.exists(index_path):
|
| 54 |
-
with open(index_path, "r", encoding="utf-8") as f:
|
| 55 |
-
return f.read()
|
| 56 |
-
return "Dashboard UI source missing."
|
| 57 |
-
|
| 58 |
sessions: Dict[str, ContentGuardEnv] = {}
|
| 59 |
|
| 60 |
# LLM Inference Client (Defaulting to Hackathon standard endpoints)
|
| 61 |
-
|
| 62 |
-
api_key=os.environ.get("HF_TOKEN", "sk-placeholder"),
|
| 63 |
-
base_url=os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
|
| 64 |
-
)
|
| 65 |
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
class ResetRequest(BaseModel):
|
| 68 |
task_id: str = Field(default="easy", description="Difficulty tier: easy | medium | hard")
|
|
@@ -115,7 +159,7 @@ async def policy_trace_socket(websocket: WebSocket):
|
|
| 115 |
"""Streams real-time reasoning traces and environment telemetry."""
|
| 116 |
await websocket.accept()
|
| 117 |
env: ContentGuardEnv | None = None
|
| 118 |
-
session_client: AsyncOpenAI | None =
|
| 119 |
session_model: str = MODEL_NAME
|
| 120 |
|
| 121 |
try:
|
|
@@ -129,20 +173,8 @@ async def policy_trace_socket(websocket: WebSocket):
|
|
| 129 |
|
| 130 |
cmd = msg.get("action")
|
| 131 |
# Universal Credential Injector (Session-based)
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
api_key = cfg["api_key"]
|
| 135 |
-
base_url = cfg.get("base_url") or "https://api.openai.com/v1"
|
| 136 |
-
|
| 137 |
-
# Intelligent Router logic
|
| 138 |
-
if api_key.startswith("hf_") and "openai.com" in base_url:
|
| 139 |
-
base_url = "https://api-inference.huggingface.co/v1"
|
| 140 |
-
|
| 141 |
-
session_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
| 142 |
-
session_model = cfg.get("model") or (MODEL_NAME if "openai.com" in base_url else "meta-llama/Llama-3-70b-instruct")
|
| 143 |
-
elif session_client is None:
|
| 144 |
-
session_client = aclient
|
| 145 |
-
session_model = MODEL_NAME
|
| 146 |
|
| 147 |
if cmd == "reset":
|
| 148 |
env = ContentGuardEnv()
|
|
@@ -167,8 +199,14 @@ async def policy_trace_socket(websocket: WebSocket):
|
|
| 167 |
if not env:
|
| 168 |
await websocket.send_json({"type": "error", "message": "Session inactive."})
|
| 169 |
continue
|
|
|
|
|
|
|
|
|
|
| 170 |
try:
|
| 171 |
await websocket.send_json({"type": "stream", "content": f"[START] ep={env.episode_id} task={env.task_id}\n"})
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
sys_prompt = "Expert Safety Moderator. Respond with JSON only. Strictly align with platform policies."
|
| 174 |
user_prompt = f"Policy Task: {env._task_config['description']}\n\nEvidence:\n{json.dumps(env.case)}\n\nSubmit ruling in JSON."
|
|
@@ -186,6 +224,9 @@ async def policy_trace_socket(websocket: WebSocket):
|
|
| 186 |
if content:
|
| 187 |
full_response += content
|
| 188 |
await websocket.send_json({"type": "stream", "content": content})
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
# Clean/Parse Output
|
| 191 |
js_str = full_response.strip()
|
|
@@ -201,22 +242,25 @@ async def policy_trace_socket(websocket: WebSocket):
|
|
| 201 |
await websocket.send_json({"type": "stream", "content": f"[END] Result: Success. Reward: {result['reward']:.4f}\n"})
|
| 202 |
|
| 203 |
except Exception as e:
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
|
| 207 |
-
sim_action = {
|
| 208 |
-
"violation": env.case.get("detected_violation", "safe"),
|
| 209 |
-
"severity": 4,
|
| 210 |
-
"action": env.case.get("action_taken", "remove")
|
| 211 |
-
}
|
| 212 |
-
if env.task_id == "easy": sim_action = {"violation": sim_action["violation"]}
|
| 213 |
|
| 214 |
try:
|
| 215 |
-
result = await env.step(sim_action, client=
|
| 216 |
await websocket.send_json({"type": "step", "result": result})
|
| 217 |
await websocket.send_json({"type": "stream", "content": f"\n[DEMO] Passive Ruling Emitted. Final Reward: {result['reward']:.4f}\n"})
|
| 218 |
-
except RuntimeError:
|
| 219 |
-
await websocket.send_json({"type": "error", "message":
|
| 220 |
|
| 221 |
elif cmd == "state":
|
| 222 |
if env: await websocket.send_json({"type": "state", "state": env.state()})
|
|
|
|
| 14 |
|
| 15 |
import os
|
| 16 |
import json
|
| 17 |
+
from typing import Dict, Any, Optional
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
|
| 20 |
load_dotenv()
|
|
|
|
| 46 |
if os.path.exists(STATIC_DIR):
|
| 47 |
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
sessions: Dict[str, ContentGuardEnv] = {}
|
| 50 |
|
| 51 |
# LLM Inference Client (Defaulting to Hackathon standard endpoints)
|
| 52 |
+
DEFAULT_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
|
|
|
|
|
|
|
|
|
|
| 53 |
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
|
| 54 |
+
DEFAULT_API_KEY = os.environ.get("HF_TOKEN") or os.environ.get("OPENAI_API_KEY")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _is_placeholder_api_key(api_key: Optional[str]) -> bool:
|
| 58 |
+
if not api_key:
|
| 59 |
+
return True
|
| 60 |
+
lowered = api_key.strip().lower()
|
| 61 |
+
return lowered in {"sk-placeholder", "your_api_key", "changeme"}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _resolve_session_client(cfg: Optional[Dict[str, Any]]) -> tuple[Optional[AsyncOpenAI], str]:
|
| 65 |
+
"""Build a runtime client from UI config with provider-aware routing rules."""
|
| 66 |
+
if not cfg:
|
| 67 |
+
return aclient, MODEL_NAME
|
| 68 |
+
|
| 69 |
+
api_key = (cfg.get("api_key") or "").strip()
|
| 70 |
+
base_url = (cfg.get("base_url") or "").strip() or DEFAULT_BASE_URL
|
| 71 |
+
model = (cfg.get("model") or "").strip() or MODEL_NAME
|
| 72 |
+
|
| 73 |
+
if _is_placeholder_api_key(api_key):
|
| 74 |
+
# No runtime key provided: use server default if configured, otherwise deterministic grading fallback.
|
| 75 |
+
return aclient, MODEL_NAME
|
| 76 |
+
|
| 77 |
+
if api_key.startswith("hf_") and "openai.com" in base_url:
|
| 78 |
+
base_url = "https://api-inference.huggingface.co/v1"
|
| 79 |
+
elif api_key.startswith("sk-") and "huggingface.co" in base_url:
|
| 80 |
+
base_url = "https://api.openai.com/v1"
|
| 81 |
+
|
| 82 |
+
if api_key.startswith("hf_") and not (cfg.get("model") or "").strip():
|
| 83 |
+
model = "meta-llama/Llama-3-70b-instruct"
|
| 84 |
+
|
| 85 |
+
return AsyncOpenAI(api_key=api_key, base_url=base_url), model
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _build_demo_action(env: ContentGuardEnv) -> Dict[str, Any]:
|
| 89 |
+
"""Generate a task-valid deterministic action when live inference is unavailable."""
|
| 90 |
+
gt = env.ground_truth or {}
|
| 91 |
+
if env.task_id == "easy":
|
| 92 |
+
return {"violation": gt.get("violation", "safe")}
|
| 93 |
+
if env.task_id == "medium":
|
| 94 |
+
return {
|
| 95 |
+
"action": gt.get("action", "no_action"),
|
| 96 |
+
"severity": int(gt.get("severity", 3)),
|
| 97 |
+
"reasoning": "Deterministic demo fallback due unavailable inference credentials.",
|
| 98 |
+
}
|
| 99 |
+
return {
|
| 100 |
+
"ruling": gt.get("ruling", "upheld"),
|
| 101 |
+
"policy_references": gt.get("policy_references", []),
|
| 102 |
+
"explanation": "Deterministic fallback path used because model inference is unavailable.",
|
| 103 |
+
"user_guidance": "Review platform standards and avoid repeating flagged behavior.",
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
aclient: Optional[AsyncOpenAI] = None
|
| 108 |
+
if not _is_placeholder_api_key(DEFAULT_API_KEY):
|
| 109 |
+
aclient = AsyncOpenAI(api_key=DEFAULT_API_KEY.strip(), base_url=DEFAULT_BASE_URL)
|
| 110 |
|
| 111 |
class ResetRequest(BaseModel):
|
| 112 |
task_id: str = Field(default="easy", description="Difficulty tier: easy | medium | hard")
|
|
|
|
| 159 |
"""Streams real-time reasoning traces and environment telemetry."""
|
| 160 |
await websocket.accept()
|
| 161 |
env: ContentGuardEnv | None = None
|
| 162 |
+
session_client: AsyncOpenAI | None = aclient
|
| 163 |
session_model: str = MODEL_NAME
|
| 164 |
|
| 165 |
try:
|
|
|
|
| 173 |
|
| 174 |
cmd = msg.get("action")
|
| 175 |
# Universal Credential Injector (Session-based)
|
| 176 |
+
if "config" in msg:
|
| 177 |
+
session_client, session_model = _resolve_session_client(msg.get("config"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
if cmd == "reset":
|
| 180 |
env = ContentGuardEnv()
|
|
|
|
| 199 |
if not env:
|
| 200 |
await websocket.send_json({"type": "error", "message": "Session inactive."})
|
| 201 |
continue
|
| 202 |
+
if env.done:
|
| 203 |
+
await websocket.send_json({"type": "error", "message": "Episode finished. Call reset() for a new case."})
|
| 204 |
+
continue
|
| 205 |
try:
|
| 206 |
await websocket.send_json({"type": "stream", "content": f"[START] ep={env.episode_id} task={env.task_id}\n"})
|
| 207 |
+
|
| 208 |
+
if session_client is None:
|
| 209 |
+
raise RuntimeError("No API credentials configured.")
|
| 210 |
|
| 211 |
sys_prompt = "Expert Safety Moderator. Respond with JSON only. Strictly align with platform policies."
|
| 212 |
user_prompt = f"Policy Task: {env._task_config['description']}\n\nEvidence:\n{json.dumps(env.case)}\n\nSubmit ruling in JSON."
|
|
|
|
| 224 |
if content:
|
| 225 |
full_response += content
|
| 226 |
await websocket.send_json({"type": "stream", "content": content})
|
| 227 |
+
|
| 228 |
+
if not full_response.strip():
|
| 229 |
+
raise ValueError("Model returned an empty response.")
|
| 230 |
|
| 231 |
# Clean/Parse Output
|
| 232 |
js_str = full_response.strip()
|
|
|
|
| 242 |
await websocket.send_json({"type": "stream", "content": f"[END] Result: Success. Reward: {result['reward']:.4f}\n"})
|
| 243 |
|
| 244 |
except Exception as e:
|
| 245 |
+
err_text = str(e)
|
| 246 |
+
lowered_err = err_text.lower()
|
| 247 |
+
if "invalid_api_key" in lowered_err or "incorrect api key" in lowered_err or "api key" in lowered_err or "401" in lowered_err:
|
| 248 |
+
err_text = "Authentication failed for the configured provider."
|
| 249 |
+
|
| 250 |
+
await websocket.send_json({"type": "stream", "content": f"\n\n[NOTICE] Inference Unavailable: {err_text}\nInitiating Passive Grader demo...\n"})
|
| 251 |
+
|
| 252 |
+
if env.done:
|
| 253 |
+
await websocket.send_json({"type": "error", "message": "Episode finished. Call reset() for a new case."})
|
| 254 |
+
continue
|
| 255 |
|
| 256 |
+
sim_action = _build_demo_action(env)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
try:
|
| 259 |
+
result = await env.step(sim_action, client=None, model=session_model)
|
| 260 |
await websocket.send_json({"type": "step", "result": result})
|
| 261 |
await websocket.send_json({"type": "stream", "content": f"\n[DEMO] Passive Ruling Emitted. Final Reward: {result['reward']:.4f}\n"})
|
| 262 |
+
except RuntimeError as step_error:
|
| 263 |
+
await websocket.send_json({"type": "error", "message": str(step_error)})
|
| 264 |
|
| 265 |
elif cmd == "state":
|
| 266 |
if env: await websocket.send_json({"type": "state", "state": env.state()})
|
server/env/environment.py
CHANGED
|
@@ -6,7 +6,6 @@ social media content moderation — the same challenge Meta faces at
|
|
| 6 |
100 billion+ content items per week.
|
| 7 |
"""
|
| 8 |
|
| 9 |
-
import uuid
|
| 10 |
import uuid
|
| 11 |
import random
|
| 12 |
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
| 6 |
100 billion+ content items per week.
|
| 7 |
"""
|
| 8 |
|
|
|
|
| 9 |
import uuid
|
| 10 |
import random
|
| 11 |
from typing import Any, Dict, List, Optional, Tuple
|
server/env/graders.py
CHANGED
|
@@ -286,8 +286,15 @@ async def grade_action_async(
|
|
| 286 |
|
| 287 |
except Exception as e:
|
| 288 |
# Robust Fallback to Heuristic Grader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
reward, feedback, rationale = grade_action(action, ground_truth, task_id, case)
|
| 290 |
-
return reward, f"[FALLBACK] {feedback} (AI Judge Error: {
|
| 291 |
|
| 292 |
|
| 293 |
def grade_action(
|
|
|
|
| 286 |
|
| 287 |
except Exception as e:
|
| 288 |
# Robust Fallback to Heuristic Grader
|
| 289 |
+
err_text = str(e)
|
| 290 |
+
lowered = err_text.lower()
|
| 291 |
+
if "invalid_api_key" in lowered or "incorrect api key" in lowered or "api key" in lowered or "401" in lowered:
|
| 292 |
+
err_text = "Authentication failed for the configured provider."
|
| 293 |
+
elif "model" in lowered and "not found" in lowered:
|
| 294 |
+
err_text = "Configured model is unavailable for the selected provider."
|
| 295 |
+
|
| 296 |
reward, feedback, rationale = grade_action(action, ground_truth, task_id, case)
|
| 297 |
+
return reward, f"[FALLBACK] {feedback} (AI Judge Error: {err_text})", rationale
|
| 298 |
|
| 299 |
|
| 300 |
def grade_action(
|
server/static/app.js
CHANGED
|
@@ -10,6 +10,8 @@ const app = {
|
|
| 10 |
terminalActiveLine: null,
|
| 11 |
typewriterTimeout: null,
|
| 12 |
isAutoTraining: false,
|
|
|
|
|
|
|
| 13 |
metrics: { count: 0, sumReward: 0 },
|
| 14 |
scrollPending: false,
|
| 15 |
|
|
@@ -52,7 +54,14 @@ const app = {
|
|
| 52 |
};
|
| 53 |
|
| 54 |
this.ws.onmessage = (e) => {
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
if (data.type === 'reset') {
|
| 57 |
this.handleReset(data.observation);
|
| 58 |
} else if (data.type === 'stream') {
|
|
@@ -60,9 +69,7 @@ const app = {
|
|
| 60 |
} else if (data.type === 'step') {
|
| 61 |
this.handleStep(data.result);
|
| 62 |
} else if (data.type === 'error') {
|
| 63 |
-
this.
|
| 64 |
-
// Professional Toast instead of alert
|
| 65 |
-
console.error("Vault Error:", data.message);
|
| 66 |
}
|
| 67 |
};
|
| 68 |
},
|
|
@@ -70,6 +77,9 @@ const app = {
|
|
| 70 |
// ===== OPERATIONAL FLOW =====
|
| 71 |
startEpisode: function(taskId) {
|
| 72 |
this.currentTask = taskId;
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
// Update Sidebar States
|
| 75 |
document.querySelectorAll('.nav-item[data-task]').forEach(btn => {
|
|
@@ -80,6 +90,13 @@ const app = {
|
|
| 80 |
const labels = { easy: 'Tier I: Detection', medium: 'Tier II: Action', hard: 'Tier III: Adjudication' };
|
| 81 |
document.getElementById('breadcrumb-task').textContent = labels[taskId] || taskId;
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
const config = JSON.parse(sessionStorage.getItem('env_config') || '{}');
|
| 84 |
this.ws.send(JSON.stringify({
|
| 85 |
action: "reset",
|
|
@@ -97,6 +114,7 @@ const app = {
|
|
| 97 |
|
| 98 |
handleReset: function(obs) {
|
| 99 |
this.currentEpisodeId = obs.episode_id;
|
|
|
|
| 100 |
const c = obs.content_case;
|
| 101 |
const b = obs.policy_briefing;
|
| 102 |
|
|
@@ -122,6 +140,13 @@ const app = {
|
|
| 122 |
this.typeWriterEffect('val-content', `"${c.content}"`, 10);
|
| 123 |
this.renderActionForm(obs.action_space);
|
| 124 |
this.terminalPrint(`INFO: Ingesting context payload. Case ID: ${c.post_id}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
},
|
| 126 |
|
| 127 |
updateMetric: function(id, val) {
|
|
@@ -160,6 +185,15 @@ const app = {
|
|
| 160 |
},
|
| 161 |
|
| 162 |
submitAction: function() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
const payload = {};
|
| 164 |
const inputs = document.querySelectorAll('[id^="input-"]');
|
| 165 |
inputs.forEach(input => {
|
|
@@ -181,16 +215,40 @@ const app = {
|
|
| 181 |
btn.innerHTML = '<i class="fa-solid fa-stop"></i> Terminate Loop';
|
| 182 |
btn.style.background = 'var(--zinc-50)';
|
| 183 |
btn.style.color = 'var(--zinc-950)';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
this.runAgent(true);
|
| 185 |
} else {
|
| 186 |
-
|
| 187 |
-
btn.style.background = '';
|
| 188 |
-
btn.style.color = '';
|
| 189 |
-
this.terminalPrint(`\nNOTICE: Autonomous training loop halted.`);
|
| 190 |
}
|
| 191 |
},
|
| 192 |
|
| 193 |
runAgent: function(isLooping) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
this.terminalPrint(`\nINFO: AI Judge invoked for Judicial Evaluation.`);
|
| 195 |
|
| 196 |
const term = document.getElementById('terminal-output');
|
|
@@ -207,6 +265,44 @@ const app = {
|
|
| 207 |
// Kinetic Active State
|
| 208 |
document.querySelectorAll('.card').forEach(c => c.style.borderColor = 'var(--indigo-500)');
|
| 209 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
handleStreamChunk: function(content) {
|
| 212 |
if (!this.terminalActiveLine) {
|
|
@@ -234,31 +330,32 @@ const app = {
|
|
| 234 |
this.terminalActiveLine = null;
|
| 235 |
}
|
| 236 |
|
|
|
|
|
|
|
| 237 |
// Reset Kinetic States
|
| 238 |
document.querySelectorAll('.card').forEach(c => c.style.borderColor = '');
|
| 239 |
|
| 240 |
const scoreDisplay = document.getElementById('reward-display');
|
| 241 |
const title = document.getElementById('diagnostic-title');
|
| 242 |
-
const
|
| 243 |
-
const
|
|
|
|
| 244 |
|
| 245 |
// --- Credential Interceptor (Vanguard Logic) ---
|
| 246 |
-
const
|
|
|
|
| 247 |
|
| 248 |
if (isAuthError) {
|
| 249 |
this.terminalPrint("ALERT: Security Handshake Failed. Halting operations.");
|
| 250 |
-
this.
|
| 251 |
title.textContent = "SECURITY_HANDSHAKE_FAILED";
|
| 252 |
title.style.color = "var(--rose-500)";
|
| 253 |
scoreDisplay.textContent = "FAULT";
|
| 254 |
scoreDisplay.style.color = "var(--rose-500)";
|
| 255 |
-
document.getElementById('btn-auto-loop').innerHTML = '<i class="fa-solid fa-bolt"></i> Training Loop';
|
| 256 |
-
document.getElementById('btn-auto-loop').style.background = '';
|
| 257 |
-
document.getElementById('btn-auto-loop').style.color = '';
|
| 258 |
} else {
|
| 259 |
title.textContent = "Judicial Alignment Captured";
|
| 260 |
title.style.color = "var(--indigo-500)";
|
| 261 |
-
scoreDisplay.textContent = rw.toFixed(4);
|
| 262 |
|
| 263 |
// Dynamic Grading Color
|
| 264 |
if (rw >= 0.8) scoreDisplay.style.color = 'var(--emerald-500)';
|
|
@@ -268,9 +365,7 @@ const app = {
|
|
| 268 |
|
| 269 |
document.getElementById('feedback-display').textContent = feedback;
|
| 270 |
document.getElementById('reward-overlay').style.display = 'flex';
|
| 271 |
-
|
| 272 |
-
document.getElementById('btn-run-agent').disabled = false;
|
| 273 |
-
document.getElementById('btn-auto-loop').disabled = false;
|
| 274 |
|
| 275 |
// HUD Stats Update
|
| 276 |
this.metrics.count++;
|
|
@@ -282,7 +377,12 @@ const app = {
|
|
| 282 |
setTimeout(() => {
|
| 283 |
if (this.isAutoTraining) {
|
| 284 |
this.closeReward();
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
}
|
| 287 |
}, 1800);
|
| 288 |
}
|
|
|
|
| 10 |
terminalActiveLine: null,
|
| 11 |
typewriterTimeout: null,
|
| 12 |
isAutoTraining: false,
|
| 13 |
+
autoRunAfterReset: false,
|
| 14 |
+
episodeDone: false,
|
| 15 |
metrics: { count: 0, sumReward: 0 },
|
| 16 |
scrollPending: false,
|
| 17 |
|
|
|
|
| 54 |
};
|
| 55 |
|
| 56 |
this.ws.onmessage = (e) => {
|
| 57 |
+
let data;
|
| 58 |
+
try {
|
| 59 |
+
data = JSON.parse(e.data);
|
| 60 |
+
} catch (_) {
|
| 61 |
+
this.terminalPrint('[ALERT] Invalid telemetry packet received from gateway.');
|
| 62 |
+
return;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
if (data.type === 'reset') {
|
| 66 |
this.handleReset(data.observation);
|
| 67 |
} else if (data.type === 'stream') {
|
|
|
|
| 69 |
} else if (data.type === 'step') {
|
| 70 |
this.handleStep(data.result);
|
| 71 |
} else if (data.type === 'error') {
|
| 72 |
+
this.handleServerError(data.message || 'Unspecified gateway error.');
|
|
|
|
|
|
|
| 73 |
}
|
| 74 |
};
|
| 75 |
},
|
|
|
|
| 77 |
// ===== OPERATIONAL FLOW =====
|
| 78 |
startEpisode: function(taskId) {
|
| 79 |
this.currentTask = taskId;
|
| 80 |
+
this.autoRunAfterReset = false;
|
| 81 |
+
this.episodeDone = false;
|
| 82 |
+
this.currentEpisodeId = null;
|
| 83 |
|
| 84 |
// Update Sidebar States
|
| 85 |
document.querySelectorAll('.nav-item[data-task]').forEach(btn => {
|
|
|
|
| 90 |
const labels = { easy: 'Tier I: Detection', medium: 'Tier II: Action', hard: 'Tier III: Adjudication' };
|
| 91 |
document.getElementById('breadcrumb-task').textContent = labels[taskId] || taskId;
|
| 92 |
|
| 93 |
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
| 94 |
+
this.terminalPrint('WARNING: Gateway link is not ready. Wait for LINK ACTIVE before starting.');
|
| 95 |
+
return;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
this.setAgentButtonsIdle();
|
| 99 |
+
|
| 100 |
const config = JSON.parse(sessionStorage.getItem('env_config') || '{}');
|
| 101 |
this.ws.send(JSON.stringify({
|
| 102 |
action: "reset",
|
|
|
|
| 114 |
|
| 115 |
handleReset: function(obs) {
|
| 116 |
this.currentEpisodeId = obs.episode_id;
|
| 117 |
+
this.episodeDone = false;
|
| 118 |
const c = obs.content_case;
|
| 119 |
const b = obs.policy_briefing;
|
| 120 |
|
|
|
|
| 140 |
this.typeWriterEffect('val-content', `"${c.content}"`, 10);
|
| 141 |
this.renderActionForm(obs.action_space);
|
| 142 |
this.terminalPrint(`INFO: Ingesting context payload. Case ID: ${c.post_id}`);
|
| 143 |
+
|
| 144 |
+
if (this.autoRunAfterReset && this.isAutoTraining) {
|
| 145 |
+
this.autoRunAfterReset = false;
|
| 146 |
+
setTimeout(() => {
|
| 147 |
+
if (this.isAutoTraining) this.runAgent(true);
|
| 148 |
+
}, 180);
|
| 149 |
+
}
|
| 150 |
},
|
| 151 |
|
| 152 |
updateMetric: function(id, val) {
|
|
|
|
| 185 |
},
|
| 186 |
|
| 187 |
submitAction: function() {
|
| 188 |
+
if (!this.currentEpisodeId || this.episodeDone) {
|
| 189 |
+
this.terminalPrint('NOTICE: Active episode required. Start a tier to submit a ruling.');
|
| 190 |
+
return;
|
| 191 |
+
}
|
| 192 |
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
| 193 |
+
this.terminalPrint('WARNING: Gateway link is not ready.');
|
| 194 |
+
return;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
const payload = {};
|
| 198 |
const inputs = document.querySelectorAll('[id^="input-"]');
|
| 199 |
inputs.forEach(input => {
|
|
|
|
| 215 |
btn.innerHTML = '<i class="fa-solid fa-stop"></i> Terminate Loop';
|
| 216 |
btn.style.background = 'var(--zinc-50)';
|
| 217 |
btn.style.color = 'var(--zinc-950)';
|
| 218 |
+
|
| 219 |
+
if (!this.currentTask) {
|
| 220 |
+
this.stopAutoLoop('NOTICE: Pick an environment tier before enabling training loop.');
|
| 221 |
+
return;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
if (!this.currentEpisodeId || this.episodeDone) {
|
| 225 |
+
this.autoRunAfterReset = true;
|
| 226 |
+
this.startEpisode(this.currentTask);
|
| 227 |
+
return;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
this.runAgent(true);
|
| 231 |
} else {
|
| 232 |
+
this.stopAutoLoop('\nNOTICE: Autonomous training loop halted.');
|
|
|
|
|
|
|
|
|
|
| 233 |
}
|
| 234 |
},
|
| 235 |
|
| 236 |
runAgent: function(isLooping) {
|
| 237 |
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
| 238 |
+
this.stopAutoLoop('WARNING: Gateway link unavailable. Reconnect and retry.');
|
| 239 |
+
return;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
if (!this.currentEpisodeId || this.episodeDone) {
|
| 243 |
+
if (isLooping && this.currentTask) {
|
| 244 |
+
this.autoRunAfterReset = true;
|
| 245 |
+
this.startEpisode(this.currentTask);
|
| 246 |
+
} else {
|
| 247 |
+
this.terminalPrint('NOTICE: Episode finished. Start a tier to create a new case.');
|
| 248 |
+
}
|
| 249 |
+
return;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
this.terminalPrint(`\nINFO: AI Judge invoked for Judicial Evaluation.`);
|
| 253 |
|
| 254 |
const term = document.getElementById('terminal-output');
|
|
|
|
| 265 |
// Kinetic Active State
|
| 266 |
document.querySelectorAll('.card').forEach(c => c.style.borderColor = 'var(--indigo-500)');
|
| 267 |
},
|
| 268 |
+
|
| 269 |
+
setAgentButtonsIdle: function() {
|
| 270 |
+
const runBtn = document.getElementById('btn-run-agent');
|
| 271 |
+
const loopBtn = document.getElementById('btn-auto-loop');
|
| 272 |
+
if (runBtn) runBtn.disabled = false;
|
| 273 |
+
if (loopBtn) {
|
| 274 |
+
loopBtn.disabled = false;
|
| 275 |
+
if (this.isAutoTraining) {
|
| 276 |
+
loopBtn.innerHTML = '<i class="fa-solid fa-stop"></i> Terminate Loop';
|
| 277 |
+
loopBtn.style.background = 'var(--zinc-50)';
|
| 278 |
+
loopBtn.style.color = 'var(--zinc-950)';
|
| 279 |
+
} else {
|
| 280 |
+
loopBtn.innerHTML = '<i class="fa-solid fa-bolt"></i> Training Loop';
|
| 281 |
+
loopBtn.style.background = '';
|
| 282 |
+
loopBtn.style.color = '';
|
| 283 |
+
}
|
| 284 |
+
}
|
| 285 |
+
},
|
| 286 |
+
|
| 287 |
+
stopAutoLoop: function(message) {
|
| 288 |
+
this.isAutoTraining = false;
|
| 289 |
+
this.autoRunAfterReset = false;
|
| 290 |
+
this.setAgentButtonsIdle();
|
| 291 |
+
if (message) this.terminalPrint(message);
|
| 292 |
+
},
|
| 293 |
+
|
| 294 |
+
handleServerError: function(message) {
|
| 295 |
+
const text = message || 'Unknown internal error';
|
| 296 |
+
this.terminalPrint(`[ALERT] Internal Error: ${text}`);
|
| 297 |
+
console.error('ContentGuard Error:', text);
|
| 298 |
+
|
| 299 |
+
const lowered = text.toLowerCase();
|
| 300 |
+
if (lowered.includes('episode finished') || lowered.includes('reset()') || lowered.includes('api key') || lowered.includes('authentication')) {
|
| 301 |
+
this.stopAutoLoop('NOTICE: Loop paused due to gateway guard. Resetting case is required.');
|
| 302 |
+
} else {
|
| 303 |
+
this.setAgentButtonsIdle();
|
| 304 |
+
}
|
| 305 |
+
},
|
| 306 |
|
| 307 |
handleStreamChunk: function(content) {
|
| 308 |
if (!this.terminalActiveLine) {
|
|
|
|
| 330 |
this.terminalActiveLine = null;
|
| 331 |
}
|
| 332 |
|
| 333 |
+
this.episodeDone = true;
|
| 334 |
+
|
| 335 |
// Reset Kinetic States
|
| 336 |
document.querySelectorAll('.card').forEach(c => c.style.borderColor = '');
|
| 337 |
|
| 338 |
const scoreDisplay = document.getElementById('reward-display');
|
| 339 |
const title = document.getElementById('diagnostic-title');
|
| 340 |
+
const parsedReward = Number(result.reward);
|
| 341 |
+
const rw = Number.isFinite(parsedReward) ? parsedReward : 0;
|
| 342 |
+
const feedback = (result.info && result.info.feedback) ? result.info.feedback : "";
|
| 343 |
|
| 344 |
// --- Credential Interceptor (Vanguard Logic) ---
|
| 345 |
+
const feedbackLower = feedback.toLowerCase();
|
| 346 |
+
const isAuthError = feedbackLower.includes("401") || feedbackLower.includes("invalid api key") || feedbackLower.includes("incorrect api key") || feedbackLower.includes("authentication failed");
|
| 347 |
|
| 348 |
if (isAuthError) {
|
| 349 |
this.terminalPrint("ALERT: Security Handshake Failed. Halting operations.");
|
| 350 |
+
this.stopAutoLoop();
|
| 351 |
title.textContent = "SECURITY_HANDSHAKE_FAILED";
|
| 352 |
title.style.color = "var(--rose-500)";
|
| 353 |
scoreDisplay.textContent = "FAULT";
|
| 354 |
scoreDisplay.style.color = "var(--rose-500)";
|
|
|
|
|
|
|
|
|
|
| 355 |
} else {
|
| 356 |
title.textContent = "Judicial Alignment Captured";
|
| 357 |
title.style.color = "var(--indigo-500)";
|
| 358 |
+
scoreDisplay.textContent = Number.isFinite(rw) ? rw.toFixed(4) : '0.0000';
|
| 359 |
|
| 360 |
// Dynamic Grading Color
|
| 361 |
if (rw >= 0.8) scoreDisplay.style.color = 'var(--emerald-500)';
|
|
|
|
| 365 |
|
| 366 |
document.getElementById('feedback-display').textContent = feedback;
|
| 367 |
document.getElementById('reward-overlay').style.display = 'flex';
|
| 368 |
+
this.setAgentButtonsIdle();
|
|
|
|
|
|
|
| 369 |
|
| 370 |
// HUD Stats Update
|
| 371 |
this.metrics.count++;
|
|
|
|
| 377 |
setTimeout(() => {
|
| 378 |
if (this.isAutoTraining) {
|
| 379 |
this.closeReward();
|
| 380 |
+
if (this.currentTask) {
|
| 381 |
+
this.autoRunAfterReset = true;
|
| 382 |
+
this.startEpisode(this.currentTask);
|
| 383 |
+
} else {
|
| 384 |
+
this.stopAutoLoop('NOTICE: Training loop paused because no tier is selected.');
|
| 385 |
+
}
|
| 386 |
}
|
| 387 |
}, 1800);
|
| 388 |
}
|