Spaces:

mj064
/

ContentGuardEnv

Sleeping

App Files Files Community

mj064 commited on Apr 12

Commit

d23cbe2

verified ·

1 Parent(s): 1ca8240

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

server/app.py +84 -40
server/env/environment.py +0 -1
server/env/graders.py +8 -1
server/static/app.js +120 -20

server/app.py CHANGED Viewed

@@ -14,7 +14,7 @@ Key Features:
 import os
 import json
-from typing import Dict, Any
 from dotenv import load_dotenv
 load_dotenv()
@@ -46,23 +46,67 @@ STATIC_DIR = os.path.join(BASE_DIR, "static")
 if os.path.exists(STATIC_DIR):
     app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
-@app.get("/", response_class=HTMLResponse, tags=["UI"])
-async def read_dashboard():
-    """Serves the ContentGuard monitoring dashboard."""
-    index_path = os.path.join(STATIC_DIR, "index.html")
-    if os.path.exists(index_path):
-        with open(index_path, "r", encoding="utf-8") as f:
-            return f.read()
-    return "Dashboard UI source missing."
 sessions: Dict[str, ContentGuardEnv] = {}
 # LLM Inference Client (Defaulting to Hackathon standard endpoints)
-aclient = AsyncOpenAI(
-    api_key=os.environ.get("HF_TOKEN", "sk-placeholder"),
-    base_url=os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
-)
 MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
 class ResetRequest(BaseModel):
     task_id: str = Field(default="easy", description="Difficulty tier: easy | medium | hard")
@@ -115,7 +159,7 @@ async def policy_trace_socket(websocket: WebSocket):
     """Streams real-time reasoning traces and environment telemetry."""
     await websocket.accept()
     env: ContentGuardEnv | None = None
-    session_client: AsyncOpenAI | None = None
     session_model: str = MODEL_NAME
     try:
@@ -129,20 +173,8 @@ async def policy_trace_socket(websocket: WebSocket):
             cmd = msg.get("action")
             # Universal Credential Injector (Session-based)
-            cfg = msg.get("config", {})
-            if cfg.get("api_key"):
-                api_key = cfg["api_key"]
-                base_url = cfg.get("base_url") or "https://api.openai.com/v1"
-                # Intelligent Router logic
-                if api_key.startswith("hf_") and "openai.com" in base_url:
-                    base_url = "https://api-inference.huggingface.co/v1"
-                session_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-                session_model = cfg.get("model") or (MODEL_NAME if "openai.com" in base_url else "meta-llama/Llama-3-70b-instruct")
-            elif session_client is None:
-                session_client = aclient
-                session_model = MODEL_NAME
             if cmd == "reset":
                 env = ContentGuardEnv()
@@ -167,8 +199,14 @@ async def policy_trace_socket(websocket: WebSocket):
                 if not env:
                     await websocket.send_json({"type": "error", "message": "Session inactive."})
                     continue
                 try:
                     await websocket.send_json({"type": "stream", "content": f"[START] ep={env.episode_id} task={env.task_id}\n"})
                     sys_prompt = "Expert Safety Moderator. Respond with JSON only. Strictly align with platform policies."
                     user_prompt = f"Policy Task: {env._task_config['description']}\n\nEvidence:\n{json.dumps(env.case)}\n\nSubmit ruling in JSON."
@@ -186,6 +224,9 @@ async def policy_trace_socket(websocket: WebSocket):
                         if content:
                             full_response += content
                             await websocket.send_json({"type": "stream", "content": content})
                     # Clean/Parse Output
                     js_str = full_response.strip()
@@ -201,22 +242,25 @@ async def policy_trace_socket(websocket: WebSocket):
                     await websocket.send_json({"type": "stream", "content": f"[END] Result: Success. Reward: {result['reward']:.4f}\n"})
                 except Exception as e:
-                    await websocket.send_json({"type": "stream", "content": f"\n\n[NOTICE] Inference Unavailable: {str(e)}\nInitiating Passive Grader demo...\n"})
-                    # Deterministic Demo Mode: Sustains the visual loop for grading without active tokens
-                    sim_action = {
-                        "violation": env.case.get("detected_violation", "safe"),
-                        "severity": 4,
-                        "action": env.case.get("action_taken", "remove")
-                    }
-                    if env.task_id == "easy": sim_action = {"violation": sim_action["violation"]}
                     try:
-                        result = await env.step(sim_action, client=session_client, model=session_model)
                         await websocket.send_json({"type": "step", "result": result})
                         await websocket.send_json({"type": "stream", "content": f"\n[DEMO] Passive Ruling Emitted. Final Reward: {result['reward']:.4f}\n"})
-                    except RuntimeError:
-                        await websocket.send_json({"type": "error", "message": "Episode concluded."})
             elif cmd == "state":
                 if env: await websocket.send_json({"type": "state", "state": env.state()})

 import os
 import json
+from typing import Dict, Any, Optional
 from dotenv import load_dotenv
 load_dotenv()
 if os.path.exists(STATIC_DIR):
     app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 sessions: Dict[str, ContentGuardEnv] = {}
 # LLM Inference Client (Defaulting to Hackathon standard endpoints)
+DEFAULT_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
 MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
+DEFAULT_API_KEY = os.environ.get("HF_TOKEN") or os.environ.get("OPENAI_API_KEY")
+def _is_placeholder_api_key(api_key: Optional[str]) -> bool:
+    if not api_key:
+        return True
+    lowered = api_key.strip().lower()
+    return lowered in {"sk-placeholder", "your_api_key", "changeme"}
+def _resolve_session_client(cfg: Optional[Dict[str, Any]]) -> tuple[Optional[AsyncOpenAI], str]:
+    """Build a runtime client from UI config with provider-aware routing rules."""
+    if not cfg:
+        return aclient, MODEL_NAME
+    api_key = (cfg.get("api_key") or "").strip()
+    base_url = (cfg.get("base_url") or "").strip() or DEFAULT_BASE_URL
+    model = (cfg.get("model") or "").strip() or MODEL_NAME
+    if _is_placeholder_api_key(api_key):
+        # No runtime key provided: use server default if configured, otherwise deterministic grading fallback.
+        return aclient, MODEL_NAME
+    if api_key.startswith("hf_") and "openai.com" in base_url:
+        base_url = "https://api-inference.huggingface.co/v1"
+    elif api_key.startswith("sk-") and "huggingface.co" in base_url:
+        base_url = "https://api.openai.com/v1"
+    if api_key.startswith("hf_") and not (cfg.get("model") or "").strip():
+        model = "meta-llama/Llama-3-70b-instruct"
+    return AsyncOpenAI(api_key=api_key, base_url=base_url), model
+def _build_demo_action(env: ContentGuardEnv) -> Dict[str, Any]:
+    """Generate a task-valid deterministic action when live inference is unavailable."""
+    gt = env.ground_truth or {}
+    if env.task_id == "easy":
+        return {"violation": gt.get("violation", "safe")}
+    if env.task_id == "medium":
+        return {
+            "action": gt.get("action", "no_action"),
+            "severity": int(gt.get("severity", 3)),
+            "reasoning": "Deterministic demo fallback due unavailable inference credentials.",
+        }
+    return {
+        "ruling": gt.get("ruling", "upheld"),
+        "policy_references": gt.get("policy_references", []),
+        "explanation": "Deterministic fallback path used because model inference is unavailable.",
+        "user_guidance": "Review platform standards and avoid repeating flagged behavior.",
+    }
+aclient: Optional[AsyncOpenAI] = None
+if not _is_placeholder_api_key(DEFAULT_API_KEY):
+    aclient = AsyncOpenAI(api_key=DEFAULT_API_KEY.strip(), base_url=DEFAULT_BASE_URL)
 class ResetRequest(BaseModel):
     task_id: str = Field(default="easy", description="Difficulty tier: easy | medium | hard")
     """Streams real-time reasoning traces and environment telemetry."""
     await websocket.accept()
     env: ContentGuardEnv | None = None
+    session_client: AsyncOpenAI | None = aclient
     session_model: str = MODEL_NAME
     try:
             cmd = msg.get("action")
             # Universal Credential Injector (Session-based)
+            if "config" in msg:
+                session_client, session_model = _resolve_session_client(msg.get("config"))
             if cmd == "reset":
                 env = ContentGuardEnv()
                 if not env:
                     await websocket.send_json({"type": "error", "message": "Session inactive."})
                     continue
+                if env.done:
+                    await websocket.send_json({"type": "error", "message": "Episode finished. Call reset() for a new case."})
+                    continue
                 try:
                     await websocket.send_json({"type": "stream", "content": f"[START] ep={env.episode_id} task={env.task_id}\n"})
+                    if session_client is None:
+                        raise RuntimeError("No API credentials configured.")
                     sys_prompt = "Expert Safety Moderator. Respond with JSON only. Strictly align with platform policies."
                     user_prompt = f"Policy Task: {env._task_config['description']}\n\nEvidence:\n{json.dumps(env.case)}\n\nSubmit ruling in JSON."
                         if content:
                             full_response += content
                             await websocket.send_json({"type": "stream", "content": content})
+                    if not full_response.strip():
+                        raise ValueError("Model returned an empty response.")
                     # Clean/Parse Output
                     js_str = full_response.strip()
                     await websocket.send_json({"type": "stream", "content": f"[END] Result: Success. Reward: {result['reward']:.4f}\n"})
                 except Exception as e:
+                    err_text = str(e)
+                    lowered_err = err_text.lower()
+                    if "invalid_api_key" in lowered_err or "incorrect api key" in lowered_err or "api key" in lowered_err or "401" in lowered_err:
+                        err_text = "Authentication failed for the configured provider."
+                    await websocket.send_json({"type": "stream", "content": f"\n\n[NOTICE] Inference Unavailable: {err_text}\nInitiating Passive Grader demo...\n"})
+                    if env.done:
+                        await websocket.send_json({"type": "error", "message": "Episode finished. Call reset() for a new case."})
+                        continue
+                    sim_action = _build_demo_action(env)
                     try:
+                        result = await env.step(sim_action, client=None, model=session_model)
                         await websocket.send_json({"type": "step", "result": result})
                         await websocket.send_json({"type": "stream", "content": f"\n[DEMO] Passive Ruling Emitted. Final Reward: {result['reward']:.4f}\n"})
+                    except RuntimeError as step_error:
+                        await websocket.send_json({"type": "error", "message": str(step_error)})
             elif cmd == "state":
                 if env: await websocket.send_json({"type": "state", "state": env.state()})

server/env/environment.py CHANGED Viewed

@@ -6,7 +6,6 @@ social media content moderation — the same challenge Meta faces at
 100 billion+ content items per week.
 """
-import uuid
 import uuid
 import random
 from typing import Any, Dict, List, Optional, Tuple

 100 billion+ content items per week.
 """
 import uuid
 import random
 from typing import Any, Dict, List, Optional, Tuple

server/env/graders.py CHANGED Viewed

@@ -286,8 +286,15 @@ async def grade_action_async(
     except Exception as e:
         # Robust Fallback to Heuristic Grader
         reward, feedback, rationale = grade_action(action, ground_truth, task_id, case)
-        return reward, f"[FALLBACK] {feedback} (AI Judge Error: {str(e)})", rationale
 def grade_action(

     except Exception as e:
         # Robust Fallback to Heuristic Grader
+        err_text = str(e)
+        lowered = err_text.lower()
+        if "invalid_api_key" in lowered or "incorrect api key" in lowered or "api key" in lowered or "401" in lowered:
+            err_text = "Authentication failed for the configured provider."
+        elif "model" in lowered and "not found" in lowered:
+            err_text = "Configured model is unavailable for the selected provider."
         reward, feedback, rationale = grade_action(action, ground_truth, task_id, case)
+        return reward, f"[FALLBACK] {feedback} (AI Judge Error: {err_text})", rationale
 def grade_action(

server/static/app.js CHANGED Viewed

@@ -10,6 +10,8 @@ const app = {
     terminalActiveLine: null,
     typewriterTimeout: null,
     isAutoTraining: false,
     metrics: { count: 0, sumReward: 0 },
     scrollPending: false,
@@ -52,7 +54,14 @@ const app = {
         };
         this.ws.onmessage = (e) => {
-            const data = JSON.parse(e.data);
             if (data.type === 'reset') {
                 this.handleReset(data.observation);
             } else if (data.type === 'stream') {
@@ -60,9 +69,7 @@ const app = {
             } else if (data.type === 'step') {
                 this.handleStep(data.result);
             } else if (data.type === 'error') {
-                this.terminalPrint(`[ALERT] Internal Error: ${data.message}`);
-                // Professional Toast instead of alert
-                console.error("Vault Error:", data.message);
             }
         };
     },
@@ -70,6 +77,9 @@ const app = {
     // ===== OPERATIONAL FLOW =====
     startEpisode: function(taskId) {
         this.currentTask = taskId;
         // Update Sidebar States
         document.querySelectorAll('.nav-item[data-task]').forEach(btn => {
@@ -80,6 +90,13 @@ const app = {
         const labels = { easy: 'Tier I: Detection', medium: 'Tier II: Action', hard: 'Tier III: Adjudication' };
         document.getElementById('breadcrumb-task').textContent = labels[taskId] || taskId;
         const config = JSON.parse(sessionStorage.getItem('env_config') || '{}');
         this.ws.send(JSON.stringify({
             action: "reset",
@@ -97,6 +114,7 @@ const app = {
     handleReset: function(obs) {
         this.currentEpisodeId = obs.episode_id;
         const c = obs.content_case;
         const b = obs.policy_briefing;
@@ -122,6 +140,13 @@ const app = {
         this.typeWriterEffect('val-content', `"${c.content}"`, 10);
         this.renderActionForm(obs.action_space);
         this.terminalPrint(`INFO: Ingesting context payload. Case ID: ${c.post_id}`);
     },
     updateMetric: function(id, val) {
@@ -160,6 +185,15 @@ const app = {
     },
     submitAction: function() {
         const payload = {};
         const inputs = document.querySelectorAll('[id^="input-"]');
         inputs.forEach(input => {
@@ -181,16 +215,40 @@ const app = {
             btn.innerHTML = '<i class="fa-solid fa-stop"></i> Terminate Loop';
             btn.style.background = 'var(--zinc-50)';
             btn.style.color = 'var(--zinc-950)';
             this.runAgent(true);
         } else {
-            btn.innerHTML = '<i class="fa-solid fa-bolt"></i> Training Loop';
-            btn.style.background = '';
-            btn.style.color = '';
-            this.terminalPrint(`\nNOTICE: Autonomous training loop halted.`);
         }
     },
     runAgent: function(isLooping) {
         this.terminalPrint(`\nINFO: AI Judge invoked for Judicial Evaluation.`);
         const term = document.getElementById('terminal-output');
@@ -207,6 +265,44 @@ const app = {
         // Kinetic Active State
         document.querySelectorAll('.card').forEach(c => c.style.borderColor = 'var(--indigo-500)');
     },
     handleStreamChunk: function(content) {
         if (!this.terminalActiveLine) {
@@ -234,31 +330,32 @@ const app = {
             this.terminalActiveLine = null;
         }
         // Reset Kinetic States
         document.querySelectorAll('.card').forEach(c => c.style.borderColor = '');
         const scoreDisplay = document.getElementById('reward-display');
         const title = document.getElementById('diagnostic-title');
-        const rw = result.reward;
-        const feedback = result.info.feedback || "";
         // --- Credential Interceptor (Vanguard Logic) ---
-        const isAuthError = feedback.includes("401") || feedback.includes("Invalid API key") || feedback.includes("Incorrect API key");
         if (isAuthError) {
             this.terminalPrint("ALERT: Security Handshake Failed. Halting operations.");
-            this.isAutoTraining = false; // Kill loop
             title.textContent = "SECURITY_HANDSHAKE_FAILED";
             title.style.color = "var(--rose-500)";
             scoreDisplay.textContent = "FAULT";
             scoreDisplay.style.color = "var(--rose-500)";
-            document.getElementById('btn-auto-loop').innerHTML = '<i class="fa-solid fa-bolt"></i> Training Loop';
-            document.getElementById('btn-auto-loop').style.background = '';
-            document.getElementById('btn-auto-loop').style.color = '';
         } else {
             title.textContent = "Judicial Alignment Captured";
             title.style.color = "var(--indigo-500)";
-            scoreDisplay.textContent = rw.toFixed(4);
             // Dynamic Grading Color
             if (rw >= 0.8) scoreDisplay.style.color = 'var(--emerald-500)';
@@ -268,9 +365,7 @@ const app = {
         document.getElementById('feedback-display').textContent = feedback;
         document.getElementById('reward-overlay').style.display = 'flex';
-        document.getElementById('btn-run-agent').disabled = false;
-        document.getElementById('btn-auto-loop').disabled = false;
         // HUD Stats Update
         this.metrics.count++;
@@ -282,7 +377,12 @@ const app = {
             setTimeout(() => {
                 if (this.isAutoTraining) {
                     this.closeReward();
-                    setTimeout(() => { if (this.isAutoTraining) this.runAgent(true); }, 400);
                 }
             }, 1800);
         }

     terminalActiveLine: null,
     typewriterTimeout: null,
     isAutoTraining: false,
+    autoRunAfterReset: false,
+    episodeDone: false,
     metrics: { count: 0, sumReward: 0 },
     scrollPending: false,
         };
         this.ws.onmessage = (e) => {
+            let data;
+            try {
+                data = JSON.parse(e.data);
+            } catch (_) {
+                this.terminalPrint('[ALERT] Invalid telemetry packet received from gateway.');
+                return;
+            }
             if (data.type === 'reset') {
                 this.handleReset(data.observation);
             } else if (data.type === 'stream') {
             } else if (data.type === 'step') {
                 this.handleStep(data.result);
             } else if (data.type === 'error') {
+                this.handleServerError(data.message || 'Unspecified gateway error.');
             }
         };
     },
     // ===== OPERATIONAL FLOW =====
     startEpisode: function(taskId) {
         this.currentTask = taskId;
+        this.autoRunAfterReset = false;
+        this.episodeDone = false;
+        this.currentEpisodeId = null;
         // Update Sidebar States
         document.querySelectorAll('.nav-item[data-task]').forEach(btn => {
         const labels = { easy: 'Tier I: Detection', medium: 'Tier II: Action', hard: 'Tier III: Adjudication' };
         document.getElementById('breadcrumb-task').textContent = labels[taskId] || taskId;
+        if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
+            this.terminalPrint('WARNING: Gateway link is not ready. Wait for LINK ACTIVE before starting.');
+            return;
+        }
+        this.setAgentButtonsIdle();
         const config = JSON.parse(sessionStorage.getItem('env_config') || '{}');
         this.ws.send(JSON.stringify({
             action: "reset",
     handleReset: function(obs) {
         this.currentEpisodeId = obs.episode_id;
+        this.episodeDone = false;
         const c = obs.content_case;
         const b = obs.policy_briefing;
         this.typeWriterEffect('val-content', `"${c.content}"`, 10);
         this.renderActionForm(obs.action_space);
         this.terminalPrint(`INFO: Ingesting context payload. Case ID: ${c.post_id}`);
+        if (this.autoRunAfterReset && this.isAutoTraining) {
+            this.autoRunAfterReset = false;
+            setTimeout(() => {
+                if (this.isAutoTraining) this.runAgent(true);
+            }, 180);
+        }
     },
     updateMetric: function(id, val) {
     },
     submitAction: function() {
+        if (!this.currentEpisodeId || this.episodeDone) {
+            this.terminalPrint('NOTICE: Active episode required. Start a tier to submit a ruling.');
+            return;
+        }
+        if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
+            this.terminalPrint('WARNING: Gateway link is not ready.');
+            return;
+        }
         const payload = {};
         const inputs = document.querySelectorAll('[id^="input-"]');
         inputs.forEach(input => {
             btn.innerHTML = '<i class="fa-solid fa-stop"></i> Terminate Loop';
             btn.style.background = 'var(--zinc-50)';
             btn.style.color = 'var(--zinc-950)';
+            if (!this.currentTask) {
+                this.stopAutoLoop('NOTICE: Pick an environment tier before enabling training loop.');
+                return;
+            }
+            if (!this.currentEpisodeId || this.episodeDone) {
+                this.autoRunAfterReset = true;
+                this.startEpisode(this.currentTask);
+                return;
+            }
             this.runAgent(true);
         } else {
+            this.stopAutoLoop('\nNOTICE: Autonomous training loop halted.');
         }
     },
     runAgent: function(isLooping) {
+        if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
+            this.stopAutoLoop('WARNING: Gateway link unavailable. Reconnect and retry.');
+            return;
+        }
+        if (!this.currentEpisodeId || this.episodeDone) {
+            if (isLooping && this.currentTask) {
+                this.autoRunAfterReset = true;
+                this.startEpisode(this.currentTask);
+            } else {
+                this.terminalPrint('NOTICE: Episode finished. Start a tier to create a new case.');
+            }
+            return;
+        }
         this.terminalPrint(`\nINFO: AI Judge invoked for Judicial Evaluation.`);
         const term = document.getElementById('terminal-output');
         // Kinetic Active State
         document.querySelectorAll('.card').forEach(c => c.style.borderColor = 'var(--indigo-500)');
     },
+    setAgentButtonsIdle: function() {
+        const runBtn = document.getElementById('btn-run-agent');
+        const loopBtn = document.getElementById('btn-auto-loop');
+        if (runBtn) runBtn.disabled = false;
+        if (loopBtn) {
+            loopBtn.disabled = false;
+            if (this.isAutoTraining) {
+                loopBtn.innerHTML = '<i class="fa-solid fa-stop"></i> Terminate Loop';
+                loopBtn.style.background = 'var(--zinc-50)';
+                loopBtn.style.color = 'var(--zinc-950)';
+            } else {
+                loopBtn.innerHTML = '<i class="fa-solid fa-bolt"></i> Training Loop';
+                loopBtn.style.background = '';
+                loopBtn.style.color = '';
+            }
+        }
+    },
+    stopAutoLoop: function(message) {
+        this.isAutoTraining = false;
+        this.autoRunAfterReset = false;
+        this.setAgentButtonsIdle();
+        if (message) this.terminalPrint(message);
+    },
+    handleServerError: function(message) {
+        const text = message || 'Unknown internal error';
+        this.terminalPrint(`[ALERT] Internal Error: ${text}`);
+        console.error('ContentGuard Error:', text);
+        const lowered = text.toLowerCase();
+        if (lowered.includes('episode finished') || lowered.includes('reset()') || lowered.includes('api key') || lowered.includes('authentication')) {
+            this.stopAutoLoop('NOTICE: Loop paused due to gateway guard. Resetting case is required.');
+        } else {
+            this.setAgentButtonsIdle();
+        }
+    },
     handleStreamChunk: function(content) {
         if (!this.terminalActiveLine) {
             this.terminalActiveLine = null;
         }
+        this.episodeDone = true;
         // Reset Kinetic States
         document.querySelectorAll('.card').forEach(c => c.style.borderColor = '');
         const scoreDisplay = document.getElementById('reward-display');
         const title = document.getElementById('diagnostic-title');
+        const parsedReward = Number(result.reward);
+        const rw = Number.isFinite(parsedReward) ? parsedReward : 0;
+        const feedback = (result.info && result.info.feedback) ? result.info.feedback : "";
         // --- Credential Interceptor (Vanguard Logic) ---
+        const feedbackLower = feedback.toLowerCase();
+        const isAuthError = feedbackLower.includes("401") || feedbackLower.includes("invalid api key") || feedbackLower.includes("incorrect api key") || feedbackLower.includes("authentication failed");
         if (isAuthError) {
             this.terminalPrint("ALERT: Security Handshake Failed. Halting operations.");
+            this.stopAutoLoop();
             title.textContent = "SECURITY_HANDSHAKE_FAILED";
             title.style.color = "var(--rose-500)";
             scoreDisplay.textContent = "FAULT";
             scoreDisplay.style.color = "var(--rose-500)";
         } else {
             title.textContent = "Judicial Alignment Captured";
             title.style.color = "var(--indigo-500)";
+            scoreDisplay.textContent = Number.isFinite(rw) ? rw.toFixed(4) : '0.0000';
             // Dynamic Grading Color
             if (rw >= 0.8) scoreDisplay.style.color = 'var(--emerald-500)';
         document.getElementById('feedback-display').textContent = feedback;
         document.getElementById('reward-overlay').style.display = 'flex';
+        this.setAgentButtonsIdle();
         // HUD Stats Update
         this.metrics.count++;
             setTimeout(() => {
                 if (this.isAutoTraining) {
                     this.closeReward();
+                    if (this.currentTask) {
+                        this.autoRunAfterReset = true;
+                        this.startEpisode(this.currentTask);
+                    } else {
+                        this.stopAutoLoop('NOTICE: Training loop paused because no tier is selected.');
+                    }
                 }
             }, 1800);
         }