Spaces:

Otter21
/

OPENENV_RL_01

Running

App Files Files Community

Siddharaj Shirke commited on 26 days ago

Commit

3eae4cc

0 Parent(s):

deploy: fresh snapshot to Hugging Face Space

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +65 -0
.env.example +144 -0
.gitignore +59 -0
Blog.md +314 -0
Dockerfile +51 -0
GovWorkflow_RL_ENV.ipynb +0 -0
README.md +191 -0
app/README.md +23 -0
app/__init__.py +20 -0
app/api_gateway.py +257 -0
app/baselines.py +161 -0
app/config.py +87 -0
app/engine.py +1712 -0
app/env.py +553 -0
app/event_engine.py +101 -0
app/graders.py +176 -0
app/main.py +0 -0
app/models.py +509 -0
app/persistence.py +304 -0
app/reward.py +108 -0
app/sector_profiles.py +183 -0
app/signal_computer.py +81 -0
app/simulator.py +1106 -0
app/state_machine.py +107 -0
app/story_router.py +407 -0
app/tasks.py +144 -0
app/training_jobs.py +634 -0
app/utils.py +25 -0
app/web/app.js +380 -0
app/web/index.html +27 -0
app/web/react_app.js +933 -0
app/web/styles.css +256 -0
audit.py +367 -0
baseline_openai.py +983 -0
client.py +134 -0
docs/FRONTEND_WORKFLOW.md +48 -0
docs/PHASE2_IMPLEMENTATION.md +41 -0
docs/PHASE3_IMPLEMENTATION.md +39 -0
docs/PROJECT_STRUCTURE.md +41 -0
examples/sample_actions.json +0 -0
examples/sample_observations.json +0 -0
frontend/README.md +33 -0
frontend/react/.gitignore +2 -0
frontend/react/README.md +24 -0
frontend/react/index.html +16 -0
frontend/react/package-lock.json +2050 -0
frontend/react/package.json +22 -0
frontend/react/postcss.config.js +6 -0
frontend/react/src/App.jsx +21 -0
frontend/react/src/api/client.js +131 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,65 @@

+# VCS / local env
+.git/
+.gitignore
+.venv/
+.venv313/
+.env
+.env.*
+!.env.example
+# Python cache/build
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+# Frontend cache/deps
+frontend/react/node_modules/
+frontend/react/.vite/
+frontend/react/.vite-temp/
+frontend/react/dist/
+.npm-cache/
+.vite/
+# Runtime/generated data not needed in image build context
+logs/
+reports/
+outputs/
+data/
+results/training_runs/
+results/runs/
+results/eval_logs/
+results/best_model/archived/
+artifacts/
+results/prevalidation_*.log
+# Test/dev-only assets
+.pytest_cache/
+.tmp/
+docs/
+examples/
+tests/
+gov_workflow_openenv_tests/
+pip_bootstrap/
+test_results.txt
+test_rl_output*.txt
+tests/test_output*.txt
+tests/test_run.txt
+phase1_validation.py
+test_phase2.py
+old_simulator.py
+restore_simulator.py
+# Non-runtime docs/notebooks
+GovWorkflow_RL_ENV.ipynb
+Blog.md
+uv.lock
+# IDE/OS noise
+.vscode/
+.idea/
+*.swp
+Thumbs.db
+.DS_Store

.env.example ADDED Viewed

	@@ -0,0 +1,144 @@

+# Gov Workflow OpenEnv
+# Detailed environment template for local run, E2E validation, Docker preflight,
+# and release deployment.
+#
+# Usage:
+# 1) Copy this file to .env
+# 2) Fill only the auth/provider values you use
+# 3) Keep defaults unless you intentionally need different behavior
+# -----------------------------------------------------------------------------
+# 1) LLM Provider Endpoints and Auth
+# -----------------------------------------------------------------------------
+# Primary OpenAI-compatible endpoint used by inference/simulation runtime.
+API_BASE_URL=https://integrate.api.nvidia.com/v1
+# OpenAI-compatible model used for LLM inference mode.
+MODEL_NAME=meta/llama-3.3-70b-instruct
+# Auth precedence in runtime:
+#   HF_TOKEN -> OPENAI_API_KEY -> API_KEY
+HF_TOKEN=
+OPENAI_API_KEY=
+API_KEY=
+# Optional image tag used by inference / utility flows.
+LOCAL_IMAGE_NAME=gov-workflow-openenv:latest
+# Inference acceptance criteria (inference.py).
+MAX_STEPS=80
+SUCCESS_SCORE_THRESHOLD=0.50
+# -----------------------------------------------------------------------------
+# 2) Provider-Specific API Base URLs
+# -----------------------------------------------------------------------------
+# OpenAI-compatible provider URL (fallback path in engine/simulator).
+OPENAI_API_BASE_URL=https://api.openai.com/v1
+# NVIDIA provider URL for NIM calls.
+NVIDIA_API_BASE_URL=https://integrate.api.nvidia.com/v1
+# -----------------------------------------------------------------------------
+# 3) Model Routing and Fallback Pools
+# -----------------------------------------------------------------------------
+# Optional CSV fallback models for OpenAI-compatible runtime.
+# Example:
+# MODEL_FALLBACKS=meta/llama-3.1-8b-instruct,microsoft/phi-4-mini-instruct
+MODEL_FALLBACKS=
+# Optional CSV fallback models for NVIDIA runtime.
+NVIDIA_MODEL_FALLBACKS=
+# Primary NVIDIA model for NVIDIA-key runtime path.
+NVIDIA_MODEL=meta/llama-3.3-70b-instruct
+# NVIDIA keys for baseline and simulation fallback behavior.
+# Get keys at: https://build.nvidia.com/explore/discover
+NVIDIA_API_KEY=
+NVIDIA_API_KEY_2=
+# -----------------------------------------------------------------------------
+# 4) Environment Transport (Direct vs HTTP)
+# -----------------------------------------------------------------------------
+# Used by inference / gateway code.
+# Allowed: auto, http, direct
+OPENENV_ENV_TRANSPORT=auto
+# Base URL for HTTP transport path.
+OPENENV_ENV_BASE_URL=http://127.0.0.1:7860
+# Optional explicit API prefix for /reset /step /grade calls.
+# Typical values: (empty), /api, /api/v1
+OPENENV_ENV_API_PREFIX=
+# Optional candidate prefixes (CSV) tried before built-in fallback sequence.
+# Example: /api/v1,/api
+OPENENV_ENV_API_PREFIX_CANDIDATES=
+# Force HTTP/FastAPI gateway even when direct transport is available.
+# Allowed truthy values: 1, true, yes, on
+FORCE_FASTAPI_GATEWAY=0
+# -----------------------------------------------------------------------------
+# 5) Structured API Alias Controls (app.main)
+# -----------------------------------------------------------------------------
+# Enables automatic aliasing from source prefix to versioned prefix.
+ENABLE_STRUCTURED_V1_API=1
+OPENENV_API_SOURCE_PREFIX=/api
+OPENENV_API_V1_PREFIX=/api/v1
+# -----------------------------------------------------------------------------
+# 6) FastAPI Server Settings (SERVER_* in app/config.py)
+# -----------------------------------------------------------------------------
+SERVER_HOST=0.0.0.0
+SERVER_PORT=7860
+SERVER_LOG_LEVEL=info
+# Keep 1 for in-memory session store unless external shared state is added.
+SERVER_WORKERS=1
+# JSON list string expected by Pydantic settings.
+SERVER_CORS_ORIGINS=["*"]
+# -----------------------------------------------------------------------------
+# 7) Environment Defaults (ENV_* in app/config.py)
+# -----------------------------------------------------------------------------
+ENV_DEFAULT_TASK_ID=district_backlog_easy
+ENV_DEFAULT_SEED=11
+ENV_MAX_SESSIONS=100
+ENV_MAX_STEPS_PER_EPISODE=500
+# -----------------------------------------------------------------------------
+# 8) Runtime Throttling
+# -----------------------------------------------------------------------------
+# Delay between LLM calls used by baseline_openai.py.
+LLM_CALL_DELAY=12.0
+# -----------------------------------------------------------------------------
+# 9) Persistence and Storage
+# -----------------------------------------------------------------------------
+# Enables SQLite/filesystem persistence.
+STORAGE_ENABLED=true
+# Preferred persistence root (used by app/persistence.py).
+# Local example: C:/Users/your-user/OPENENV_RL/outputs/persist
+# HF Spaces example: /data/openenv_rl
+OPENENV_DATA_DIR=
+# Legacy fallback path key still supported by code.
+STORAGE_DATA_DIR=
+# -----------------------------------------------------------------------------
+# 10) Frontend Dev Proxy (Vite)
+# -----------------------------------------------------------------------------
+# Used by frontend/react/vite.config.js for local /api proxy target.
+VITE_DEV_API_TARGET=http://127.0.0.1:7860

.gitignore ADDED Viewed

	@@ -0,0 +1,59 @@

+# Environment secrets - NEVER commit .env
+.env
+.env.local
+.env.production
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+.venv313/
+*.egg-info/
+dist/
+build/
+# pytest
+.pytest_cache/
+# Local temp/bootstrap
+.tmp/
+pip_bootstrap/
+# Runtime outputs
+outputs/
+logs/
+reports/
+data/
+results/training_runs/
+results/runs/
+results/eval_logs/
+results/best_model/archived/
+artifacts/
+# Frontend build cache/deps
+frontend/react/node_modules/
+frontend/react/.vite/
+frontend/react/.vite-temp/
+frontend/react/dist/
+.vite/
+.npm-cache/
+# Docker/local deployment overrides
+docker-compose.override.yml
+*.local.env
+# Local test artifacts
+test_results.txt
+test_rl_output*.txt
+tests/test_output*.txt
+tests/test_run.txt
+# Pre-submission validation artifacts
+scripts/validate-submission.sh
+results/prevalidation_docker_build.log
+results/prevalidation_*.log
+# Keep benchmark Phase 1 model in Git for Colab/Kaggle transfer
+!results/best_model/phase1/phase1_final.zip

Blog.md ADDED Viewed

	@@ -0,0 +1,314 @@

+# 🏛️ Gov Workflow OpenEnv — Teaching Machines to Manage Real-World Bureaucracy
+---
+## 🚨 The Problem Nobody Talks About
+Every day, thousands of applications flow into government systems:
+* Passports
+* Income certificates
+* Land records
+* Licenses
+But the system handling them?
+```text
+Rigid. Static. Fragile.
+```
+Most workflows rely on simple rules like:
+* First-Come-First-Serve
+* Urgent-first prioritization
+And that’s where things break.
+---
+### ⚠️ What goes wrong?
+* If you prioritize **old cases**, new easy ones pile up → backlog explodes
+* If you prioritize **fast cases**, complex ones miss deadlines → SLA breaches
+* If you follow **fixed rules**, you ignore real-time system state
+This is not a sorting problem.
+```text
+This is a decision-making problem under uncertainty.
+```
+---
+## 💡 Our Idea
+What if instead of **hardcoding rules**,
+we let a system **learn how to manage workflows**?
+That’s exactly what we built.
+---
+## 🌍 What is the Environment?
+At the heart of this project is a **simulation environment** that mimics a real government office.
+Think of it as:
+```text
+A virtual district office running in code
+```
+It includes:
+* Multiple services (passport, certificates, etc.)
+* Multi-stage workflows (submission → approval → issuance)
+* Limited officers (resources)
+* Delays due to missing documents
+* SLA deadlines and penalties
+* Fairness constraints across services
+Every “step” in this environment represents **one unit of time** (a working day).
+---
+## 🧠 The Core Concept
+We model this system as a **Reinforcement Learning problem**.
+```text
+Environment → Government workflow simulation
+Agent       → Decision-maker
+Goal        → Optimize system performance over time
+```
+---
+## ⚙️ How RL Works Here
+At every step, the agent interacts with the environment using three core components:
+---
+### 🔹 1. State (What the agent sees)
+The **state** is a snapshot of the system at a given time.
+It includes:
+* Number of pending applications per service
+* Average waiting time
+* SLA pressure (how close deadlines are)
+* Missing document backlog
+* Officer allocation across services
+```text
+State = Current condition of the entire workflow system
+```
+---
+### 🔹 2. Action (What the agent can do)
+The agent chooses **one action per step** to influence the system.
+Examples:
+* Change prioritization strategy (urgent-first, fairness-based, etc.)
+* Allocate more officers to a service
+* Request missing documents
+* Escalate high-priority cases
+* Reallocate resources
+* Advance time (do nothing)
+```text
+Action = A decision that changes how the system evolves
+```
+---
+### 🔹 3. Reward (How the agent learns)
+After each action, the agent receives a **reward signal**.
+This reward tells the agent how good or bad its decision was.
+---
+#### Reward is based on:
+* ✅ Applications progressing through stages
+* ✅ Completed applications
+* ❌ SLA breaches (penalty)
+* ❌ Long waiting times
+* ❌ Unfair distribution across services
+* ❌ Idle resources
+---
+### Simplified reward intuition:
+```text
+Good decisions → positive reward
+Bad decisions  → negative reward
+```
+Over time, the agent learns:
+```text
+“How to maximize long-term reward”
+```
+---
+## 🔁 Why Reinforcement Learning?
+Because this system is:
+```text
+✔ Dynamic (state keeps changing)
+✔ Multi-objective (speed vs fairness vs deadlines)
+✔ Sequential (each decision affects future)
+✔ Uncertain (random delays, missing docs)
+```
+This makes RL a natural fit.
+---
+## 🏗️ What We Built
+---
+### 🔹 1. Simulation Environment
+A realistic, controllable system that models:
+* Workflow pipelines
+* Resource constraints
+* Delays and uncertainties
+* Policy decisions
+---
+### 🔹 2. RL Training Pipeline
+We trained an agent using **PPO (Proximal Policy Optimization)**:
+* Runs through thousands of simulated steps
+* Learns via trial and error
+* Improves decision-making over time
+---
+### 🔹 3. Baseline vs RL Comparison
+We compared against:
+```text
+Heuristic Systems:
+- FIFO
+- Urgent-first
+```
+---
+## 📊 What Did We Observe?
+Across all scenarios:
+```text
+✔ Reduced backlog
+✔ Fewer SLA breaches
+✔ Better completion rates
+```
+The RL agent consistently **outperformed static policies**.
+---
+## 🎬 Making AI Explainable
+AI systems often act like black boxes.
+We solved this using a **storytelling frontend**:
+* Timeline of decisions
+* Agent reasoning (why a decision was taken)
+* Impact indicators (what changed after each action)
+---
+```text
+The system doesn’t just act — it explains.
+```
+---
+## 🧠 Addressing the Big Question
+> “Is this just coded logic?”
+---
+### ❌ Static System
+```text
+if backlog > X → do Y
+```
+---
+### ✅ RL System
+```text
+policy(state) → action
+```
+* Learns from experience
+* Adapts to changing conditions
+* Balances trade-offs dynamically
+---
+## 🌍 Why This Matters
+This approach applies to:
+* Government services
+* Public infrastructure systems
+* Large-scale workflow automation
+It demonstrates:
+```text
+Adaptive systems can outperform rule-based systems
+```
+---
+## 🚀 Final Thought
+We didn’t just build a model.
+We built a system that learns:
+```text
+“How to make better decisions in complex workflows”
+```
+---
+## 📌 TL;DR
+* Government workflows fail due to rigid rules
+* We simulate them as an RL environment
+* Train an agent to make adaptive decisions
+* Result: improved efficiency, fairness, and scalability
+---
+> From rules → to learning
+> From static → to adaptive intelligence
+---

Dockerfile ADDED Viewed

	@@ -0,0 +1,51 @@

+# Gov Workflow OpenEnv
+# Multi-stage image:
+# 1) build Vite frontend assets
+# 2) run FastAPI backend and serve built UI under /ui
+FROM node:20-slim AS frontend-build
+WORKDIR /web
+COPY frontend/react/package.json frontend/react/package-lock.json ./frontend/react/
+RUN cd frontend/react && npm ci --no-audit --no-fund
+COPY frontend/react ./frontend/react
+RUN cd frontend/react && npm run build
+FROM python:3.11-slim AS runtime
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    OPENENV_DATA_DIR=/data/openenv_rl \
+    STORAGE_ENABLED=true \
+    PORT=7860
+WORKDIR /app
+# Runtime OS dependencies (torch/sb3 commonly require libgomp at runtime)
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt requirements_rl.txt ./
+RUN python -m pip install --upgrade pip \
+    && python -m pip install -r requirements.txt \
+    && python -m pip install -r requirements_rl.txt
+COPY . .
+COPY --from=frontend-build /web/frontend/react/dist ./app/web/vite_dist
+RUN mkdir -p /data/openenv_rl \
+    && useradd --create-home --uid 10001 appuser \
+    && chown -R appuser:appuser /app /data/openenv_rl
+USER appuser
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:7860/health', timeout=3)" || exit 1
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

GovWorkflow_RL_ENV.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,191 @@

+---
+title: Gov Workflow OpenEnv
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# Gov Workflow OpenEnv
+## Quick Links
+- Hugging Face Space URL (Dummy, update later): [https://huggingface.co/spaces/your-username/your-space-name](https://huggingface.co/spaces/your-username/your-space-name)
+  This placeholder will be replaced with the final deployed demo link.
+- Blog path in codebase: `OPENENV_RL/Blog.md`
+  Project write-up and narrative documentation for design choices and outcomes.
+- Notebook path: `OPENENV_RL/GovWorkflow_RL_ENV.ipynb`
+  Main OpenEnv RL government workflow notebook used as the judge-facing criteria book. It contains the practical judging context, environment setup, and the full end-to-end flow in one place.
+- Notebook Colab URL: [https://colab.research.google.com/drive/1ssTnxKoU1nOfSNA3nOeiNM8S4fKFpkby?usp=sharing](https://colab.research.google.com/drive/1ssTnxKoU1nOfSNA3nOeiNM8S4fKFpkby?usp=sharing)
+  Cloud version of the same notebook so judges can run and review the complete workflow without local setup.
+- GRPO Phase 1 training link: [https://colab.research.google.com/drive/1ND_DZ6xcT2JuH7uGB2AYbiZ1dcHKFfIw?usp=sharing](https://colab.research.google.com/drive/1ND_DZ6xcT2JuH7uGB2AYbiZ1dcHKFfIw?usp=sharing)
+  First-stage GRPO training run where the LLM agent starts learning policy behavior inside the RL environment.
+- GRPO Phase 2 training link: [https://colab.research.google.com/drive/1ofxEADct_gTX5DGhcnk8lW6p31gFCIFV?usp=sharing](https://colab.research.google.com/drive/1ofxEADct_gTX5DGhcnk8lW6p31gFCIFV?usp=sharing)
+  Second-stage GRPO continuation where the same LLM agent is further trained and refined on the RL environment.
+- PPO Phase 1 training (local): `rl/train_ppo.py`
+  Phase 1 PPO baseline training was executed on the local system to establish the RL algorithm baseline before phase-2 progression.
+- PPO Phase 2 training link: [https://colab.research.google.com/drive/1RVXQs-QAuXLBw0YXJtN4cbEootCTfHO7?usp=sharing](https://colab.research.google.com/drive/1RVXQs-QAuXLBw0YXJtN4cbEootCTfHO7?usp=sharing)
+  PPO phase 2 training notebook where the RL algorithm is further trained on the same environment for improved policy performance.
+Gov Workflow OpenEnv is a FastAPI-first simulation environment for public service workflow operations.
+It models queue prioritization, officer allocation, missing-document recovery, escalation usage, and fairness-aware SLA management across government services.
+This repository is productionized for:
+- local development (FastAPI + Vite)
+- Docker runtime
+- Hugging Face Spaces (Docker SDK)
+## Current Main-Branch Status
+This README is aligned to the current `main` branch code paths, including:
+- `app.main:app` as primary server runtime
+- React UI served at `/ui` from built Vite assets when available
+- OpenEnv contract endpoints (`/reset`, `/step`, `/state`, `/grade`)
+- frontend API aliases (`/api/*`) and versioned aliases (`/api/v1/*`)
+- training story endpoints (`/training/*`)
+- simulation, RL, persistence, compliance, and history endpoints
+## End-to-End Architecture
+```mermaid
+flowchart LR
+  UI["React UI"] --> API["FastAPI app.main"]
+  API --> ENV["GovWorkflowEnv app/env.py"]
+  API --> SIM["Simulation runtime app/simulator.py"]
+  API --> RL["RL train/eval rl/*"]
+  API --> STORE["PersistenceStore SQLite + filesystem"]
+  API --> STORY["Training Story router /training/*"]
+  API --> OPENENV["Optional OpenEnv adapter /openenv/*"]
+```
+## Core Runtime Components
+- API server: `app/main.py`
+- Environment kernel: `app/env.py`
+- Typed models: `app/models.py`
+- Task registry: `app/tasks.py`
+- Reward shaping: `app/reward.py`
+- Deterministic graders: `app/graders.py`
+- Simulation runtime: `app/simulator.py`
+- Training jobs manager: `app/training_jobs.py`
+- Persistence layer: `app/persistence.py`
+- Transport gateway: `app/api_gateway.py`
+- React frontend: `frontend/react`
+## Task Set (Current Runtime)
+Configured in `app/tasks.py`:
+- `district_backlog_easy`
+- `mixed_urgency_medium`
+- `cross_department_hard`
+- `district_backlog_easy_extreme`
+Benchmark list used by APIs:
+- `district_backlog_easy`
+- `mixed_urgency_medium`
+- `cross_department_hard`
+## Service Coverage
+`ServiceType` includes:
+- `passport`
+- `driving_license`
+- `aadhaar_card`
+- `gst_registration`
+- `income_certificate`
+- `caste_certificate`
+- `birth_certificate`
+- `land_registration`
+Medium and hard tasks currently run with:
+- `income_certificate`
+- `land_registration`
+- `passport`
+- `driving_license`
+- `aadhaar_card`
+## Local Development
+### Prerequisites
+- Python 3.11+
+- Node 20+
+- Docker
+### Install dependencies
+```bash
+pip install -r requirements.txt
+pip install -r requirements_rl.txt
+pip install pytest pytest-asyncio
+npm --prefix frontend/react install
+```
+### Configure environment
+```bash
+copy .env.example .env
+```
+Populate as needed:
+- `API_BASE_URL`
+- `MODEL_NAME`
+- `HF_TOKEN` or `OPENAI_API_KEY`/`API_KEY`
+- optional NVIDIA keys (`NVIDIA_API_KEY`, `NVIDIA_API_KEY_2`)
+- storage settings (`STORAGE_ENABLED`, `OPENENV_DATA_DIR`)
+### Run backend
+```bash
+python scripts/run_local.py --host 127.0.0.1 --port 7860 --reload
+```
+### Run frontend
+```bash
+npm --prefix frontend/react run dev
+```
+Open:
+- UI: `http://127.0.0.1:5173/ui`
+- API docs: `http://127.0.0.1:7860/docs`
+## Repository Layout
+```text
+app/
+  main.py               FastAPI app + API routing + compatibility aliases
+  env.py                GovWorkflowEnv kernel
+  models.py             Typed Pydantic contracts
+  tasks.py              Runtime task registry
+  reward.py             Reward shaping
+  graders.py            Deterministic graders
+  simulator.py          Simulation runtime and live sessions
+  training_jobs.py      Background RL training manager
+  persistence.py        SQLite/filesystem persistence
+  api_gateway.py        direct/http/auto environment transport layer
+  story_router.py       training story endpoints
+rl/
+  gov_workflow_env.py   Gym adapter
+  train_ppo.py          PPO phase training entrypoint
+  evaluate.py           Checkpoint evaluator
+  feature_builder.py    RL feature engineering
+  action_mask.py        Action mask logic
+frontend/react/
+  src/                  React modules/components/api hooks
+scripts/
+  run_local.py          Local FastAPI launcher
+  convert_grpo_csv.py   Training CSV to JSON converter for story endpoints
+openenv.yaml            OpenEnv manifest metadata
+baseline_openai.py      Baseline and LLM runner
+inference.py            Submission-style inference runner
+Dockerfile              Docker image definition
+```
+## License
+BSD-3-Clause

app/README.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# app/
+Core environment and API layer.
+- `main.py`: FastAPI app and endpoints
+- `env.py`: GovWorkflowEnv simulation kernel
+- `models.py`: Pydantic action/observation/reward/state models
+- `tasks.py`: easy/medium/hard deterministic task configs
+- `graders.py`: deterministic task scoring (0.0 to 1.0)
+- `reward.py`: dense reward breakdown
+- `baselines.py`: heuristic baseline policies
+- `web/`: frontend assets served by FastAPI at `/ui`
+  - `vite_dist/`: production Vite build output copied during Docker build
+  - legacy files (`index.html`, `react_app.js`, `styles.css`) remain as local fallback
+Additional frontend-focused APIs in `main.py`:
+- `/api/workflows/components`
+- `/api/workflows/run`
+- `/api/rl/models`
+- `/api/rl/run`
+- `/api/rl/evaluate`
+- `/api/simulation/run`
+- `/api/training/jobs`

app/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# from app.env import GovWorkflowEnv
+from app.models import ActionModel, ObservationModel, RewardModel
+try:
+    from client import GovWorkflowClient
+except ModuleNotFoundError:
+    GovWorkflowClient = None  # type: ignore[assignment]
+GovWorkflowAction = ActionModel
+GovWorkflowObservation = ObservationModel
+__all__ = [
+    "ActionModel",
+    "ObservationModel",
+    "RewardModel",
+    "GovWorkflowAction",
+    "GovWorkflowObservation",
+#     "GovWorkflowEnv",
+    "GovWorkflowClient",
+]

app/api_gateway.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""
+Unified environment transport layer.
+This module centralizes environment access so callers can use:
+  - FastAPI HTTP transport
+  - direct in-process transport
+  - dynamic auto selection
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import os
+from typing import Literal, Protocol
+from app.env import GovWorkflowEnv
+from app.graders import grade_episode
+from app.models import ActionModel, ObservationModel, StepInfoModel
+TransportMode = Literal["auto", "http", "direct"]
+class EnvGateway(Protocol):
+    transport: TransportMode
+    terminated: bool
+    truncated: bool
+    def reset(self) -> ObservationModel: ...
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]: ...
+    def grade(self) -> tuple[float, str, dict[str, float]]: ...
+    def close(self) -> None: ...
+@dataclass
+class DirectEnvGateway:
+    task_id: str
+    seed: int
+    transport: TransportMode = "direct"
+    def __post_init__(self) -> None:
+        self._env = GovWorkflowEnv(task_id=self.task_id)
+        self.terminated = False
+        self.truncated = False
+    def reset(self) -> ObservationModel:
+        obs, _ = self._env.reset(seed=self.seed)
+        self.terminated = False
+        self.truncated = False
+        return obs
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        obs, reward, terminated, truncated, info = self._env.step(action)
+        self.terminated = bool(terminated)
+        self.truncated = bool(truncated)
+        return obs, float(reward), bool(terminated), bool(truncated), info
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        result = grade_episode(self._env.state())
+        return float(result.score), str(result.grader_name), dict(result.metrics)
+    def close(self) -> None:
+        close_fn = getattr(self._env, "close", None)
+        if callable(close_fn):
+            close_fn()
+@dataclass
+class HttpEnvGateway:
+    task_id: str
+    seed: int
+    base_url: str
+    api_prefix: str | None = None
+    transport: TransportMode = "http"
+    def __post_init__(self) -> None:
+        try:
+            import requests as _requests
+        except ImportError as exc:
+            raise ImportError("requests is required for HTTP transport.") from exc
+        self._requests = _requests
+        self._session_id: str | None = None
+        self.terminated = False
+        self.truncated = False
+        self.base_url = self.base_url.rstrip("/")
+        self._resolved_prefix = self._normalize_prefix(self.api_prefix)
+    @staticmethod
+    def _normalize_prefix(prefix: str | None) -> str:
+        if prefix is None:
+            return ""
+        p = str(prefix).strip()
+        if not p:
+            return ""
+        if not p.startswith("/"):
+            p = "/" + p
+        return p.rstrip("/")
+    @staticmethod
+    def _candidate_prefixes(explicit_prefix: str | None) -> list[str]:
+        normalized_explicit = HttpEnvGateway._normalize_prefix(explicit_prefix)
+        if normalized_explicit:
+            return [normalized_explicit]
+        env_prefix = HttpEnvGateway._normalize_prefix(os.getenv("OPENENV_ENV_API_PREFIX", ""))
+        configured_candidates = os.getenv("OPENENV_ENV_API_PREFIX_CANDIDATES", "")
+        candidates: list[str] = []
+        for item in [env_prefix, *configured_candidates.split(",")]:
+            normalized = HttpEnvGateway._normalize_prefix(item)
+            if normalized not in candidates:
+                candidates.append(normalized)
+        # Ordered fallbacks: versioned API -> frontend API -> root OpenEnv API.
+        for fallback in ["/api/v1", "/api", ""]:
+            if fallback not in candidates:
+                candidates.append(fallback)
+        return candidates
+    def _resolve_prefix(self) -> str:
+        if self._resolved_prefix:
+            return self._resolved_prefix
+        for prefix in self._candidate_prefixes(self.api_prefix):
+            try:
+                response = self._requests.get(
+                    f"{self.base_url}{prefix}/health",
+                    timeout=3,
+                )
+                if response.ok:
+                    self._resolved_prefix = prefix
+                    return self._resolved_prefix
+            except Exception:
+                continue
+        self._resolved_prefix = ""
+        return self._resolved_prefix
+    def _url(self, path: str) -> str:
+        return f"{self.base_url}{self._resolve_prefix()}{path}"
+    def _post(self, path: str, body: dict) -> dict:
+        response = self._requests.post(
+            self._url(path),
+            json=body,
+            timeout=30,
+        )
+        response.raise_for_status()
+        return response.json()
+    def reset(self) -> ObservationModel:
+        payload = {"task_id": self.task_id, "seed": self.seed}
+        data = self._post("/reset", payload)
+        self._session_id = str(data["session_id"])
+        self.terminated = False
+        self.truncated = False
+        return ObservationModel(**data["observation"])
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        if not self._session_id:
+            raise RuntimeError("Session is not initialized. Call reset() first.")
+        data = self._post(
+            "/step",
+            {
+                "session_id": self._session_id,
+                "action": action.model_dump(exclude_none=True, mode="json"),
+            },
+        )
+        obs = ObservationModel(**data["observation"])
+        info = StepInfoModel(**data["info"])
+        self.terminated = bool(data["terminated"])
+        self.truncated = bool(data["truncated"])
+        return (
+            obs,
+            float(data["reward"]),
+            bool(data["terminated"]),
+            bool(data["truncated"]),
+            info,
+        )
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        if not self._session_id:
+            raise RuntimeError("Session is not initialized. Call reset() first.")
+        data = self._post("/grade", {"session_id": self._session_id})
+        return (
+            float(data["score"]),
+            str(data["grader_name"]),
+            dict(data.get("metrics", {})),
+        )
+    def close(self) -> None:
+        if not self._session_id:
+            return
+        try:
+            self._requests.delete(self._url(f"/sessions/{self._session_id}"), timeout=10)
+        except Exception:
+            pass
+        self._session_id = None
+def _http_reachable(base_url: str) -> bool:
+    try:
+        import requests
+        r = requests.get(f"{base_url.rstrip('/')}/health", timeout=3)
+        return bool(r.ok)
+    except Exception:
+        return False
+def create_env_gateway(
+    *,
+    task_id: str,
+    seed: int,
+    mode: TransportMode = "auto",
+    base_url: str = "http://127.0.0.1:7860",
+    api_prefix: str | None = None,
+    enforce_fastapi: bool = False,
+) -> EnvGateway:
+    """
+    Create environment gateway with dynamic transport selection.
+    Behavior:
+      - mode=http    -> always HTTP
+      - mode=direct  -> always direct (unless enforce_fastapi=True)
+      - mode=auto    -> HTTP if /health reachable, else direct fallback
+    """
+    if enforce_fastapi and mode == "direct":
+        raise RuntimeError("Direct transport is disabled. Set mode to 'http' or 'auto'.")
+    if mode == "http":
+        return HttpEnvGateway(task_id=task_id, seed=seed, base_url=base_url, api_prefix=api_prefix)
+    if mode == "direct":
+        return DirectEnvGateway(task_id=task_id, seed=seed)
+    if _http_reachable(base_url):
+        return HttpEnvGateway(
+            task_id=task_id,
+            seed=seed,
+            base_url=base_url,
+            api_prefix=api_prefix,
+            transport="auto",
+        )
+    if enforce_fastapi:
+        raise RuntimeError(
+            f"FastAPI gateway is required but unavailable at {base_url}. "
+            "Start the API server or disable FORCE_FASTAPI_GATEWAY."
+        )
+    return DirectEnvGateway(task_id=task_id, seed=seed, transport="auto")

app/baselines.py ADDED Viewed

	@@ -0,0 +1,161 @@

+from __future__ import annotations
+from collections.abc import Callable
+from types import SimpleNamespace
+from app.env import GovWorkflowEnv
+from app.graders import grade_episode
+from app.models import ActionModel, ActionType, ObservationModel, PriorityMode, ServiceType
+PolicyFn = Callable[[ObservationModel], ActionModel]
+def _snapshots(obs: ObservationModel):
+    """Return queue snapshots as a list regardless of Phase 1 (list) or Phase 2 (dict)."""
+    qs = obs.queue_snapshots
+    if isinstance(qs, dict):
+        return list(qs.values())
+    return list(qs)
+def _service_attr(q, *attrs):
+    """Return the first attribute that exists on a QueueSnapshot (Phase 1 vs Phase 2 names)."""
+    for attr in attrs:
+        val = getattr(q, attr, None)
+        if val is not None:
+            return val
+    return 0
+def _service_name(q) -> ServiceType:
+    """Return ServiceType regardless of Phase 1 (.service) or Phase 2 (.service_type)."""
+    return getattr(q, "service_type", None) or getattr(q, "service", None)
+def _service_with_max(obs: ObservationModel, *attrs) -> ServiceType | None:
+    snaps = _snapshots(obs)
+    ranked = sorted(snaps, key=lambda s: _service_attr(s, *attrs), reverse=True)
+    if ranked and _service_attr(ranked[0], *attrs) > 0:
+        return _service_name(ranked[0])
+    return None
+def _reserve_officers(obs: ObservationModel) -> int:
+    pool = obs.officer_pool
+    # Phase 2: idle_officers property
+    if hasattr(pool, "idle_officers"):
+        return int(pool.idle_officers)
+    # Phase 1 fallback
+    return int(getattr(pool, "reserve_officers", 0))
+def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
+    pool = obs.officer_pool
+    # Phase 2 uses 'allocated'; Phase 1 used 'allocations'
+    alloc_dict = getattr(pool, "allocated", None) or getattr(pool, "allocations", {})
+    raw = alloc_dict.get(service)
+    if raw is None:
+        raw = alloc_dict.get(service.value if hasattr(service, "value") else str(service), 0)
+    return int(raw or 0)
+def urgent_first_policy(obs: ObservationModel) -> ActionModel:
+    target = _service_with_max(obs, "urgent_pending", "urgent_cases")
+    if target:
+        return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def oldest_first_policy(obs: ObservationModel) -> ActionModel:
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def backlog_clearance_policy(obs: ObservationModel) -> ActionModel:
+    snaps = _snapshots(obs)
+    # Assign idle officers to the most backlogged service
+    if _reserve_officers(obs) > 0:
+        target = _service_with_max(obs, "total_pending", "active_cases")
+        if target:
+            return ActionModel(
+                action_type=ActionType.ASSIGN_CAPACITY,
+                service_target=target,
+                capacity_assignment={target.value: 1},
+            )
+    # Clear missing-doc bottlenecks
+    target = _service_with_max(obs, "blocked_missing_docs", "missing_docs_cases")
+    if target:
+        return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
+    # Reallocate from least-loaded to most-loaded
+    if len(snaps) >= 2:
+        hot = sorted(snaps, key=lambda s: _service_attr(s, "total_pending", "active_cases"), reverse=True)
+        cold = sorted(snaps, key=lambda s: _service_attr(s, "total_pending", "active_cases"))
+        hot_svc = _service_name(hot[0])
+        cold_svc = _service_name(cold[0])
+        hot_load = _service_attr(hot[0], "total_pending", "active_cases")
+        cold_load = _service_attr(cold[0], "total_pending", "active_cases")
+        if (
+            hot_svc and cold_svc and hot_svc != cold_svc
+            and hot_load - cold_load >= 3
+            and _alloc_for(obs, cold_svc) > 1
+        ):
+            return ActionModel(
+                action_type=ActionType.REALLOCATE_OFFICERS,
+                service_target=cold_svc,
+                reallocation_delta={cold_svc.value: -1, hot_svc.value: 1},
+            )
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def greedy_sla_policy(obs: ObservationModel) -> ActionModel:
+    """SLA-focused fallback policy used by historical aliases."""
+    target = _service_with_max(obs, "urgent_pending", "urgent_cases", "breached_cases")
+    if target:
+        return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
+    return backlog_clearance_policy(obs)
+def random_policy(obs: ObservationModel) -> ActionModel:
+    import random
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+urgent_first_policy = greedy_sla_policy
+fairness_aware_policy = backlog_clearance_policy
+POLICIES: dict[str, PolicyFn] = {
+    "urgent_first":      greedy_sla_policy,
+    "oldest_first":      oldest_first_policy,
+    "backlog_clearance": backlog_clearance_policy,
+    "random_policy":     random_policy,
+    "greedy_sla_policy": greedy_sla_policy,
+    "fairness_aware_policy": fairness_aware_policy,
+}
+def run_policy_episode(task_id: str, policy_name: str, seed: int | None = None, max_steps: int = 500) -> dict:
+    env = GovWorkflowEnv(task_id=task_id)
+    obs, _ = env.reset(seed=seed)
+    policy = POLICIES[policy_name]
+    reward_sum = 0.0
+    for _ in range(max_steps):
+        action = policy(obs)
+        obs, reward, terminated, truncated, _ = env.step(action)
+        reward_sum += reward
+        if terminated or truncated:
+            break
+    state = env.state()
+    grade = grade_episode(state)
+    # Return a SimpleNamespace so attribute access (result.score) works in main.py
+    return SimpleNamespace(
+        task_id=task_id,
+        policy=policy_name,
+        seed=state.seed,
+        reward_sum=round(reward_sum, 4),
+        score=float(grade.score),
+        grader=grade.grader_name,
+        metrics=grade.metrics,
+        steps=int(state.total_steps),
+        completed=int(state.total_completed),
+        backlog=int(state.total_backlog),
+    )

app/config.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# ── Path bootstrap ─────────────────────────────────────────────────────────────
+from __future__ import annotations
+from pathlib import Path
+# Load .env file if it exists — must happen before Pydantic Settings reads env vars
+try:
+    from dotenv import load_dotenv
+except (ImportError, AttributeError):
+    # Keep runtime functional even when python-dotenv is not installed
+    # or when a conflicting `dotenv` package is present.
+    def load_dotenv(*args, **kwargs):  # type: ignore[no-redef]
+        return False
+_ENV_FILE = Path(__file__).resolve().parent.parent / ".env"
+load_dotenv(dotenv_path=_ENV_FILE, override=False)
+# override=False means real environment variables always win over .env values
+# ──────────────────────────────────────────────────────────────────────────────
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class ServerSettings(BaseSettings):
+    """
+    HTTP-server configuration.
+    Read from environment variables prefixed SERVER_.
+    Example: SERVER_PORT=8080  SERVER_LOG_LEVEL=debug
+    Intentionally isolated from EnvSettings — changing server bind
+    options never affects simulation behaviour, and vice-versa.
+    Both classes are instantiated once at import and treated as
+    read-only singletons for the lifetime of the process.
+    """
+    host: str = Field("0.0.0.0", description="Bind host")
+    port: int = Field(7860, description="Bind port — HF Spaces default is 7860")
+    log_level: str = Field(
+        "info", description="Uvicorn log level: debug | info | warning | error"
+    )
+    cors_origins: list[str] = Field(
+        default=["*"],
+        description="Allowed CORS origins. '*' is required for HF Spaces embedding.",
+    )
+    # NOTE: Keep at 1 when using the in-memory session store.
+    # Multiple workers do NOT share process memory.
+    # Use Redis + a shared store before increasing workers in production.
+    workers: int = Field(
+        1, description="Uvicorn worker count — keep at 1 for in-memory sessions"
+    )
+    model_config = SettingsConfigDict(env_prefix="SERVER_", extra="ignore")
+class EnvSettings(BaseSettings):
+    """
+    Simulation-environment defaults.
+    Read from environment variables prefixed ENV_.
+    Example: ENV_DEFAULT_TASK_ID=mixed_urgency_medium  ENV_MAX_SESSIONS=50
+    Controls the environment kernel only. No effect on network
+    binding, logging, or CORS — those belong to ServerSettings.
+    """
+    default_task_id: str = Field(
+        "district_backlog_easy",
+        description="Task used when POST /reset is called without an explicit task_id",
+    )
+    default_seed: int = Field(
+        11,
+        description="Seed used when POST /reset is called without an explicit seed",
+    )
+    max_steps_per_episode: int = Field(
+        500,
+        description="Hard cap on step() calls per session before episode is truncated",
+    )
+    max_sessions: int = Field(
+        100,
+        description="Maximum concurrent in-memory sessions. Oldest is evicted when exceeded.",
+    )
+    model_config = SettingsConfigDict(env_prefix="ENV_", extra="ignore")
+# ── Singletons ────────────────────────────────────────────────────────────────
+# Loaded exactly once at import time. Never mutated at runtime.
+# Tests may monkeypatch individual fields after import if needed.
+server_settings = ServerSettings()
+env_settings = EnvSettings()

app/engine.py ADDED Viewed

	@@ -0,0 +1,1712 @@

+from __future__ import annotations
+import json
+import os
+import random
+import re
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal, Optional
+from openai import OpenAI
+from app.event_engine import EventEngine
+from app.models import (
+    ActionModel,
+    ActionType,
+    ApplicationCase,
+    DelayedEffect,
+    EventType,
+    IntakeChannel,
+    InternalSubstate,
+    ObservationModel,
+    PriorityMode,
+    QueueSnapshot,
+    ServiceType,
+    StageType,
+)
+from app.sector_profiles import get_sector_profile
+from app.state_machine import can_advance
+if TYPE_CHECKING:
+    from app.models import TaskConfig
+LEGACY_NVIDIA_MODEL_POOL = [
+    "meta/llama-3.3-70b-instruct",
+    "qwen/qwen3-next-80b-a3b-instruct",
+    "moonshotai/kimi-k2-instruct-0905",
+    "meta/llama-3.1-405b-instruct",
+    "deepseek-ai/deepseek-v3.2",
+    "qwen/qwq-32b",
+    "mistralai/mixtral-8x22b-instruct-v0.1",
+    "google/gemma-3-27b-it",
+    "microsoft/phi-4-mini-instruct",
+    "meta/llama-3.1-8b-instruct",
+]
+_MODEL_CACHE: dict[tuple[str, str], Any] = {}
+# ─────────────────────────────────────────────
+# DAY RESULT
+# ─────────────────────────────────────────────
+class DayResult:
+    def __init__(self) -> None:
+        self.new_arrivals: int = 0
+        self.new_completions: int = 0
+        self.new_sla_breaches: int = 0
+        self.total_capacity_days: int = 0
+        self.idle_officer_days: int = 0
+        self.stage_advances: int = 0
+        self.newly_unblocked_missing: int = 0
+        self.newly_blocked_missing: int = 0
+        self.newly_unblocked_enrich: int = 0
+        self.field_verif_completed: int = 0
+        self.urgent_completed: int = 0
+        self.digital_arrivals: int = 0
+        self.active_events: list[EventType] = []
+# ─────────────────────────────────────────────
+# DAY SIMULATOR
+# ─────────────────────────────────────────────
+class DaySimulator:
+    """
+    Core daily simulation engine.
+    Accepts TWO calling conventions so both env.py and tests work:
+    Convention A (tests):
+        DaySimulator(task_config=task, rng=rng, event_engine=engine)
+    Convention B (env.py legacy):
+        DaySimulator(seed=42, task_config=task, sector_registry={})
+        — in this case rng and event_engine are built internally.
+    """
+    def __init__(
+        self,
+        task_config: "TaskConfig",
+        rng: Optional[random.Random] = None,
+        event_engine: Optional[EventEngine] = None,
+        seed: Optional[int] = None,
+        sector_registry: Optional[dict] = None,
+    ) -> None:
+        self.task_config = task_config
+        self.task = task_config
+        if rng is not None:
+            self.rng = rng
+        elif seed is not None:
+            self.rng = random.Random(seed)
+        else:
+            self.rng = random.Random(task_config.seed)
+        if event_engine is not None:
+            self.event_engine = event_engine
+        else:
+            _seed = seed if seed is not None else task_config.seed
+            self.event_engine = EventEngine(
+                seed=_seed,
+                scenario_mode=task_config.scenario_mode,
+            )
+        self.sector_registry = sector_registry or {}
+        self.active_cases: list[ApplicationCase] = []
+        self.pending_effects: list[DelayedEffect] = []
+        self.case_counter: int = 0
+    def simulate_day(
+        self,
+        day: int,
+        active_cases: list[ApplicationCase],
+        completed_cases: list[ApplicationCase],
+        priority_mode: PriorityMode,
+        officer_allocations: dict,
+    ) -> DayResult:
+        result = DayResult()
+        events = self.event_engine.get_events_for_day(day, self.task_config)
+        params = self.event_engine.apply_events(events, self.task_config)
+        result.active_events = list(params.active_events)
+        new_cases = self._spawn_arrivals(day, params, result)
+        active_cases.extend(new_cases)
+        effective_alloc = self._apply_officer_reduction(officer_allocations, params)
+        self._resolve_field_verification(day, active_cases, result)
+        self._resolve_doc_requests(day, active_cases, result)
+        newly_completed: list[ApplicationCase] = []
+        for service in self.task_config.enabled_services:
+            capacity = effective_alloc.get(service, effective_alloc.get(service.value, 0))
+            result.total_capacity_days += int(capacity)
+            service_cases = [
+                c
+                for c in active_cases
+                if c.service_type == service and not c.completed and not c.rejected
+            ]
+            if not service_cases:
+                result.idle_officer_days += int(capacity)
+                continue
+            sorted_cases = self._sort_queue(service_cases, priority_mode)
+            for case in sorted_cases:
+                if capacity <= 0:
+                    break
+                from app.state_machine import advance_case
+                advanced, final = advance_case(case, day)
+                if advanced:
+                    capacity -= 1
+                    result.stage_advances += 1
+                    if final:
+                        newly_completed.append(case)
+                        if case.is_urgent:
+                            result.urgent_completed += 1
+        if newly_completed:
+            done_ids = {c.case_id for c in newly_completed}
+            still_active = [c for c in active_cases if c.case_id not in done_ids]
+            active_cases.clear()
+            active_cases.extend(still_active)
+            completed_cases.extend(newly_completed)
+            result.new_completions = len(newly_completed)
+        for case in active_cases:
+            case.current_day = day
+            case.waiting_days += 1
+            if day > case.sla_deadline_day and not case.sla_breached:
+                case.sla_breached = True
+                result.new_sla_breaches += 1
+        return result
+    def _apply_officer_reduction(self, allocations: dict, params: Any) -> dict:
+        reduction = int(getattr(params, "officer_reduction", 0))
+        if reduction <= 0:
+            return dict(allocations)
+        effective = dict(allocations)
+        for _ in range(reduction):
+            target = max(effective, key=lambda k: effective[k], default=None)
+            if target is None or effective[target] <= 0:
+                break
+            effective[target] -= 1
+        return effective
+    def _spawn_arrivals(
+        self,
+        day: int,
+        params: Any,
+        result: DayResult,
+    ) -> list[ApplicationCase]:
+        new_cases: list[ApplicationCase] = []
+        for service in self.task_config.enabled_services:
+            base_rate = self.task_config.arrival_rate_per_day.get(
+                service,
+                self.task_config.arrival_rate_per_day.get(service.value, 0.0),
+            )
+            effective_rate = float(base_rate) * float(getattr(params, "arrival_multiplier", 1.0))
+            count = int(effective_rate)
+            if self.rng.random() < (effective_rate - count):
+                count += 1
+            for _ in range(count):
+                case = self._new_case(service, day, params)
+                new_cases.append(case)
+                if case.intake_channel == IntakeChannel.DIGITAL:
+                    result.digital_arrivals += 1
+        result.new_arrivals = len(new_cases)
+        return new_cases
+    def _new_case(self, service: ServiceType, day: int, params: Any) -> ApplicationCase:
+        self.case_counter += 1
+        profile = get_sector_profile(service)
+        sla_days = int(profile.sla_days * getattr(params, "sla_window_multiplier", 1.0))
+        sla_deadline_day = day + sla_days
+        digital_ratio = self.task_config.digital_intake_ratio
+        channel = (
+            IntakeChannel.DIGITAL
+            if self.rng.random() < digital_ratio
+            else IntakeChannel.PAPER
+        )
+        base_missing = profile.missing_docs_probability
+        override = (self.task_config.missing_docs_probability_override or {}).get(
+            service,
+            (self.task_config.missing_docs_probability_override or {}).get(service.value),
+        )
+        if override is not None:
+            base_missing = override
+        defect_rate = (
+            profile.doc_defect_rate_digital
+            if channel == IntakeChannel.DIGITAL
+            else profile.doc_defect_rate_paper
+        )
+        eff_missing = min(
+            1.0,
+            base_missing + getattr(params, "doc_defect_rate_boost", 0.0) * defect_rate,
+        )
+        has_missing = self.rng.random() < eff_missing
+        base_fv = profile.field_verification_probability
+        fv_override = (self.task_config.field_verification_probability_override or {}).get(
+            service,
+            (self.task_config.field_verification_probability_override or {}).get(service.value),
+        )
+        if fv_override is not None:
+            base_fv = fv_override
+        eff_fv = min(1.0, base_fv + getattr(params, "field_verification_boost", 0.0))
+        has_fv = self.rng.random() < eff_fv
+        field_completion_day = day + profile.field_verification_days if has_fv else None
+        from app.models import UrgencyProfile
+        urgency_profile = profile.urgency_profile
+        is_urgent = (
+            urgency_profile == UrgencyProfile.HIGH and self.rng.random() < 0.20
+        ) or (
+            urgency_profile == UrgencyProfile.MODERATE and self.rng.random() < 0.08
+        )
+        return ApplicationCase(
+            case_id=f"case-{self.case_counter:06d}",
+            service_type=service,
+            arrival_day=day,
+            current_day=day,
+            sla_deadline_day=sla_deadline_day,
+            intake_channel=channel,
+            internal_substate=(
+                InternalSubstate.BLOCKED_MISSING_DOCS
+                if has_missing
+                else InternalSubstate.PRE_SCRUTINY
+            ),
+            public_stage=StageType.SUBMISSION,
+            is_urgent=is_urgent,
+            has_missing_docs=has_missing,
+            field_verification_required=has_fv,
+            field_verification_completion_day=field_completion_day,
+        )
+    def _resolve_field_verification(
+        self,
+        day: int,
+        active_cases: list[ApplicationCase],
+        result: DayResult,
+    ) -> None:
+        for case in active_cases:
+            if (
+                case.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
+                and case.field_verification_completion_day is not None
+                and day >= case.field_verification_completion_day
+            ):
+                case.internal_substate = InternalSubstate.PRE_SCRUTINY
+                case.field_verification_completion_day = None
+                result.field_verif_completed += 1
+    def _resolve_doc_requests(
+        self,
+        day: int,
+        active_cases: list[ApplicationCase],
+        result: DayResult,
+    ) -> None:
+        for case in active_cases:
+            if (
+                case.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+                and case.doc_resolution_day is not None
+                and day >= case.doc_resolution_day
+            ):
+                case.internal_substate = InternalSubstate.PRE_SCRUTINY
+                case.doc_resolution_day = None
+                result.newly_unblocked_missing += 1
+    def _sort_queue(
+        self,
+        cases: list[ApplicationCase],
+        priority_mode: PriorityMode,
+    ) -> list[ApplicationCase]:
+        eligible = [c for c in cases if can_advance(c)]
+        if priority_mode == PriorityMode.URGENT_FIRST:
+            return sorted(
+                eligible,
+                key=lambda c: (not c.is_urgent, -c.sla_risk, c.arrival_day),
+            )
+        if priority_mode == PriorityMode.OLDEST_FIRST:
+            return sorted(eligible, key=lambda c: c.arrival_day)
+        if priority_mode == PriorityMode.BACKLOG_CLEARANCE:
+            return sorted(
+                eligible,
+                key=lambda c: (-c.sla_risk, not c.is_urgent, c.arrival_day),
+            )
+        return sorted(
+            eligible,
+            key=lambda c: (
+                -c.sla_risk if c.sla_risk > 0.8 else 0,
+                not c.is_urgent,
+                c.arrival_day,
+            ),
+        )
+    def build_queue_snapshot(
+        self,
+        service: ServiceType,
+        active_cases: list[ApplicationCase],
+        day: int,
+    ) -> QueueSnapshot:
+        cases = [
+            c
+            for c in active_cases
+            if c.service_type == service and not c.completed and not c.rejected
+        ]
+        stage_counts = {s.value: 0 for s in StageType}
+        for c in cases:
+            stage_counts[c.public_stage.value] = stage_counts.get(c.public_stage.value, 0) + 1
+        oldest_age = max((c.waiting_days for c in cases), default=0)
+        avg_wait = sum(c.waiting_days for c in cases) / len(cases) if cases else 0.0
+        sla_risk = sum(c.sla_risk for c in cases) / len(cases) if cases else 0.0
+        return QueueSnapshot(
+            service_type=service,
+            public_stage_counts=stage_counts,
+            total_pending=len(cases),
+            total_completed_today=0,
+            total_sla_breached=sum(1 for c in cases if c.sla_breached),
+            urgent_pending=sum(1 for c in cases if c.is_urgent),
+            blocked_missing_docs=sum(
+                1
+                for c in cases
+                if c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+            ),
+            field_verification_pending=sum(
+                1
+                for c in cases
+                if c.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
+            ),
+            oldest_case_age_days=oldest_age,
+            avg_waiting_days=round(avg_wait, 2),
+            current_sla_risk=round(min(1.0, sla_risk), 3),
+        )
+# ─────────────────────────────────────────────
+# HIGH-LEVEL SIMULATION ORCHESTRATION
+# ─────────────────────────────────────────────
+class SimulationAgentMode(str, Enum):
+    BASELINE_POLICY = "baseline_policy"
+    LLM_INFERENCE = "llm_inference"
+    TRAINED_RL = "trained_rl"
+@dataclass
+class SimulationRun:
+    task_id: str
+    agent_mode: SimulationAgentMode
+    seed: int
+    total_reward: float
+    score: float
+    grader_name: str
+    summary: dict[str, Any]
+    trace: list[dict[str, Any]]
+def _dedupe(values: list[str | None]) -> list[str]:
+    out: list[str] = []
+    for value in values:
+        if value is None:
+            continue
+        v = str(value).strip()
+        if v and v not in out:
+            out.append(v)
+    return out
+def _env_csv_list(name: str) -> list[str]:
+    raw = os.getenv(name, "").strip()
+    if not raw:
+        return []
+    return [x.strip() for x in raw.split(",") if x.strip()]
+def _extract_json_object(text: str) -> dict[str, Any] | None:
+    text = (text or "").strip()
+    if not text:
+        return None
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        pass
+    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
+    if not match:
+        return None
+    try:
+        parsed = json.loads(match.group(0))
+    except json.JSONDecodeError:
+        return None
+    return parsed if isinstance(parsed, dict) else None
+def _enum_service(value: Any) -> ServiceType | None:
+    if value is None or value == "":
+        return None
+    if isinstance(value, ServiceType):
+        return value
+    try:
+        return ServiceType(str(value))
+    except Exception:
+        return None
+def _enum_priority(value: Any) -> PriorityMode | None:
+    if value is None or value == "":
+        return None
+    if isinstance(value, PriorityMode):
+        return value
+    try:
+        return PriorityMode(str(value))
+    except Exception:
+        return None
+def _action_model_from_kwargs(action_type: ActionType, **kwargs: Any) -> ActionModel:
+    service = _enum_service(kwargs.get("service") or kwargs.get("service_target"))
+    target_service = _enum_service(kwargs.get("target_service"))
+    escalation_target = _enum_service(kwargs.get("escalation_target"))
+    priority_mode = _enum_priority(kwargs.get("priority_mode"))
+    officer_delta = kwargs.get("officer_delta")
+    case_id = kwargs.get("case_id")
+    candidates: list[dict[str, Any]] = []
+    if action_type == ActionType.ADVANCE_TIME:
+        candidates.append({"action_type": action_type})
+    elif action_type == ActionType.SET_PRIORITY_MODE:
+        candidates.extend(
+            [
+                {"action_type": action_type, "priority_mode": priority_mode},
+            ]
+        )
+    elif action_type == ActionType.ASSIGN_CAPACITY:
+        if service is not None:
+            delta = max(1, int(officer_delta or 1))
+            candidates.extend(
+                [
+                    {"action_type": action_type, "service": service, "officer_delta": delta},
+                    {"action_type": action_type, "service_target": service, "officer_delta": delta},
+                    {
+                        "action_type": action_type,
+                        "capacity_assignment": {service.value: delta},
+                    },
+                ]
+            )
+    elif action_type == ActionType.REQUEST_MISSING_DOCUMENTS:
+        if service is not None:
+            candidates.extend(
+                [
+                    {"action_type": action_type, "service": service},
+                    {"action_type": action_type, "service_target": service},
+                ]
+            )
+    elif action_type == ActionType.ESCALATE_SERVICE:
+        svc = escalation_target or service
+        candidates.extend(
+            [
+                {"action_type": action_type, "service": svc, "case_id": case_id},
+                {"action_type": action_type, "service_target": svc, "case_id": case_id},
+                {"action_type": action_type, "escalation_target": svc, "case_id": case_id},
+            ]
+        )
+    elif action_type == ActionType.REALLOCATE_OFFICERS:
+        if service is not None and target_service is not None:
+            delta = max(1, int(officer_delta or 1))
+            candidates.extend(
+                [
+                    {
+                        "action_type": action_type,
+                        "service": service,
+                        "target_service": target_service,
+                        "officer_delta": delta,
+                    },
+                    {
+                        "action_type": action_type,
+                        "reallocation_delta": {
+                            service.value: -delta,
+                            target_service.value: delta,
+                        },
+                    },
+                ]
+            )
+    for candidate in candidates:
+        try:
+            return ActionModel(**candidate)
+        except Exception:
+            continue
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def _coerce_action(payload: dict[str, Any] | None) -> ActionModel:
+    if not payload:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+    raw_action_type = payload.get("action_type") or payload.get("actionType")
+    try:
+        action_type = ActionType(str(raw_action_type))
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+    service = payload.get("service") or payload.get("service_target") or payload.get("serviceTarget")
+    target_service = payload.get("target_service") or payload.get("targetService")
+    escalation_target = payload.get("escalation_target") or payload.get("escalationTarget")
+    priority_mode = payload.get("priority_mode") or payload.get("priorityMode")
+    officer_delta = payload.get("officer_delta") or payload.get("officerDelta")
+    case_id = payload.get("case_id") or payload.get("caseId")
+    if action_type == ActionType.ASSIGN_CAPACITY and not service:
+        assignment = payload.get("capacity_assignment") or {}
+        if isinstance(assignment, dict) and assignment:
+            service, officer_delta = next(iter(assignment.items()))
+    if action_type == ActionType.REALLOCATE_OFFICERS and (not service or not target_service):
+        delta_map = payload.get("reallocation_delta") or {}
+        if isinstance(delta_map, dict) and len(delta_map) >= 2:
+            negatives = [k for k, v in delta_map.items() if int(v) < 0]
+            positives = [k for k, v in delta_map.items() if int(v) > 0]
+            if negatives and positives:
+                service = negatives[0]
+                target_service = positives[0]
+                officer_delta = abs(int(delta_map[service]))
+    return _action_model_from_kwargs(
+        action_type,
+        service=service,
+        target_service=target_service,
+        escalation_target=escalation_target,
+        priority_mode=priority_mode,
+        officer_delta=officer_delta,
+        case_id=case_id,
+    )
+def _recommended_min_steps(task_id: str) -> int:
+    if task_id == "cross_department_hard":
+        return 70
+    if task_id == "mixed_urgency_medium":
+        return 60
+    return 40
+def _queue_snapshot_iter(obs: ObservationModel) -> list[Any]:
+    raw = getattr(obs, "queue_snapshots", [])
+    if isinstance(raw, dict):
+        return list(raw.values())
+    if isinstance(raw, list):
+        return list(raw)
+    try:
+        return list(raw)
+    except Exception:
+        return []
+def _queue_service(q: Any) -> ServiceType | None:
+    return _enum_service(getattr(q, "service", None) or getattr(q, "service_type", None))
+def _queue_active_cases(q: Any) -> int:
+    return int(getattr(q, "active_cases", getattr(q, "total_pending", 0)) or 0)
+def _queue_missing_docs(q: Any) -> int:
+    return int(getattr(q, "missing_docs_cases", getattr(q, "blocked_missing_docs", 0)) or 0)
+def _queue_urgent_cases(q: Any) -> int:
+    return int(getattr(q, "urgent_cases", getattr(q, "urgent_pending", 0)) or 0)
+def _queue_breached_cases(q: Any) -> int:
+    return int(getattr(q, "breached_cases", getattr(q, "total_sla_breached", 0)) or 0)
+def _queue_avg_age(q: Any) -> float:
+    if hasattr(q, "avg_age_days"):
+        return float(getattr(q, "avg_age_days") or 0.0)
+    if hasattr(q, "oldest_case_age_days"):
+        return float(getattr(q, "oldest_case_age_days") or 0.0)
+    return float(getattr(q, "avg_waiting_days", 0.0) or 0.0)
+def _queue_rows(obs: ObservationModel) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for q in _queue_snapshot_iter(obs):
+        service = _queue_service(q)
+        if service is None:
+            continue
+        rows.append(
+            {
+                "service": service.value,
+                "active_cases": _queue_active_cases(q),
+                "missing_docs_cases": _queue_missing_docs(q),
+                "urgent_cases": _queue_urgent_cases(q),
+                "breached_cases": _queue_breached_cases(q),
+                "avg_age_days": _queue_avg_age(q),
+            }
+        )
+    return rows
+def _pool_allocations(obs: ObservationModel) -> dict[Any, Any]:
+    pool = getattr(obs, "officer_pool", None)
+    if pool is None:
+        return {}
+    return getattr(pool, "allocations", getattr(pool, "allocated", {})) or {}
+def _reserve_officers(obs: ObservationModel) -> int:
+    pool = getattr(obs, "officer_pool", None)
+    if pool is None:
+        return 0
+    for name in ("reserve_officers", "idle_officers", "available_officers"):
+        if hasattr(pool, name):
+            try:
+                return int(getattr(pool, name) or 0)
+            except Exception:
+                pass
+    return 0
+def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
+    allocs = _pool_allocations(obs)
+    raw = allocs.get(service)
+    if raw is None:
+        raw = allocs.get(service.value, 0)
+    return int(raw or 0)
+def _top_backlog_service(
+    obs: ObservationModel,
+    *,
+    exclude: ServiceType | None = None,
+) -> ServiceType | None:
+    ranked: list[Any] = []
+    for q in _queue_snapshot_iter(obs):
+        service = _queue_service(q)
+        if service is None or service == exclude:
+            continue
+        ranked.append(q)
+    if not ranked:
+        return None
+    ranked.sort(
+        key=lambda q: (
+            _queue_active_cases(q) + (2 * _queue_breached_cases(q)) + _queue_urgent_cases(q),
+            _queue_avg_age(q),
+        ),
+        reverse=True,
+    )
+    return _queue_service(ranked[0])
+def _service_with_missing_docs(obs: ObservationModel) -> ServiceType | None:
+    candidates = [q for q in _queue_snapshot_iter(obs) if _queue_missing_docs(q) > 0]
+    if not candidates:
+        return None
+    candidates.sort(key=lambda q: (_queue_missing_docs(q), _queue_active_cases(q)), reverse=True)
+    return _queue_service(candidates[0])
+def _service_with_officers(obs: ObservationModel) -> ServiceType | None:
+    services = [s for s in (_queue_service(q) for q in _queue_snapshot_iter(obs)) if s is not None]
+    services.sort(key=lambda s: _alloc_for(obs, s), reverse=True)
+    for service in services:
+        if _alloc_for(obs, service) > 0:
+            return service
+    return None
+def _compute_action_mask(obs: ObservationModel) -> dict[ActionType, bool]:
+    has_reserve = _reserve_officers(obs) > 0
+    snapshots = _queue_snapshot_iter(obs)
+    has_missing = any(_queue_missing_docs(q) > 0 for q in snapshots)
+    has_backlog = any(_queue_active_cases(q) > 0 for q in snapshots)
+    has_budget = int(getattr(obs, "escalation_budget_remaining", 0) or 0) > 0
+    staffed_services = [q for q in snapshots if (_queue_service(q) is not None and _alloc_for(obs, _queue_service(q)) > 0)]
+    can_reallocate = len(staffed_services) >= 1 and len(snapshots) >= 2
+    return {
+        ActionType.SET_PRIORITY_MODE: True,
+        ActionType.ADVANCE_TIME: True,
+        ActionType.ASSIGN_CAPACITY: has_reserve and has_backlog,
+        ActionType.REQUEST_MISSING_DOCUMENTS: has_missing,
+        ActionType.ESCALATE_SERVICE: has_budget and has_backlog,
+        ActionType.REALLOCATE_OFFICERS: can_reallocate,
+    }
+def _masked_action_type_hints(obs: ObservationModel) -> tuple[list[str], list[str]]:
+    mask = _compute_action_mask(obs)
+    allowed = [k.value for k, ok in mask.items() if ok]
+    blocked = [k.value for k, ok in mask.items() if not ok]
+    return allowed, blocked
+def _best_high_impact_action(obs: ObservationModel) -> tuple[ActionModel, str]:
+    top_backlog = _top_backlog_service(obs)
+    top_missing = _service_with_missing_docs(obs)
+    if _reserve_officers(obs) > 0 and top_backlog is not None:
+        return (
+            _action_model_from_kwargs(
+                ActionType.ASSIGN_CAPACITY,
+                service=top_backlog,
+                officer_delta=1,
+            ),
+            "high-impact: assign reserve capacity to top backlog service",
+        )
+    if top_missing is not None:
+        return (
+            _action_model_from_kwargs(
+                ActionType.REQUEST_MISSING_DOCUMENTS,
+                service=top_missing,
+            ),
+            "high-impact: clear missing-document bottleneck",
+        )
+    if int(getattr(obs, "escalation_budget_remaining", 0) or 0) > 0:
+        hot = sorted(
+            _queue_snapshot_iter(obs),
+            key=lambda q: (_queue_breached_cases(q), _queue_active_cases(q), _queue_urgent_cases(q)),
+            reverse=True,
+        )
+        if hot and (_queue_breached_cases(hot[0]) > 0 or _queue_active_cases(hot[0]) > 0):
+            service = _queue_service(hot[0])
+            if service is not None:
+                return (
+                    _action_model_from_kwargs(
+                        ActionType.ESCALATE_SERVICE,
+                        service=service,
+                    ),
+                    "high-impact: escalate highest SLA-risk service",
+                )
+    source = _service_with_officers(obs)
+    if source is not None and _alloc_for(obs, source) > 0:
+        target = _top_backlog_service(obs, exclude=source)
+        if target is not None and target != source:
+            return (
+                _action_model_from_kwargs(
+                    ActionType.REALLOCATE_OFFICERS,
+                    service=source,
+                    target_service=target,
+                    officer_delta=1,
+                ),
+                "high-impact: reallocate one officer toward highest backlog",
+            )
+    return ActionModel(action_type=ActionType.ADVANCE_TIME), "fallback: no high-impact action available"
+def _repair_action_for_observation(
+    action: ActionModel,
+    obs: ObservationModel,
+) -> tuple[ActionModel, str | None]:
+    mask = _compute_action_mask(obs)
+    at = action.action_type
+    if not bool(mask.get(at, True)):
+        fallback, why = _best_high_impact_action(obs)
+        return fallback, f"masked {at.value}; {why}"
+    if at == ActionType.ADVANCE_TIME:
+        return action, None
+    if at == ActionType.SET_PRIORITY_MODE:
+        if getattr(action, "priority_mode", None) is None:
+            return (
+                _action_model_from_kwargs(
+                    ActionType.SET_PRIORITY_MODE,
+                    priority_mode=PriorityMode.BACKLOG_CLEARANCE,
+                ),
+                "missing priority_mode, defaulted to backlog_clearance",
+            )
+        return action, None
+    if at == ActionType.ASSIGN_CAPACITY:
+        reserve = _reserve_officers(obs)
+        if reserve <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"reserve officers exhausted; {why}"
+        service = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _top_backlog_service(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no service available for assign_capacity; {why}"
+        delta = max(1, int(getattr(action, "officer_delta", 1) or 1))
+        delta = min(delta, reserve)
+        repaired = _action_model_from_kwargs(
+            ActionType.ASSIGN_CAPACITY,
+            service=service,
+            officer_delta=delta,
+        )
+        return repaired, "repaired assign_capacity payload"
+    if at == ActionType.REQUEST_MISSING_DOCUMENTS:
+        service = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _service_with_missing_docs(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no missing-doc queue available; {why}"
+        repaired = _action_model_from_kwargs(
+            ActionType.REQUEST_MISSING_DOCUMENTS,
+            service=service,
+        )
+        return repaired, "repaired request_missing_documents payload"
+    if at == ActionType.ESCALATE_SERVICE:
+        if int(getattr(obs, "escalation_budget_remaining", 0) or 0) <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"escalation budget exhausted; {why}"
+        service = (
+            _enum_service(getattr(action, "service", None))
+            or _enum_service(getattr(action, "service_target", None))
+            or _enum_service(getattr(action, "escalation_target", None))
+            or _top_backlog_service(obs)
+        )
+        case_id = getattr(action, "case_id", None)
+        if service is None and case_id is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no escalation target available; {why}"
+        repaired = _action_model_from_kwargs(
+            ActionType.ESCALATE_SERVICE,
+            service=service,
+            case_id=case_id,
+        )
+        return repaired, "repaired escalate_service payload"
+    if at == ActionType.REALLOCATE_OFFICERS:
+        source = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _service_with_officers(obs)
+        if source is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no staffed source service; {why}"
+        source_alloc = _alloc_for(obs, source)
+        if source_alloc <= 0:
+            source = _service_with_officers(obs)
+            source_alloc = _alloc_for(obs, source) if source is not None else 0
+        if source is None or source_alloc <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"insufficient source officers; {why}"
+        target = _enum_service(getattr(action, "target_service", None))
+        if target is None or target == source:
+            target = _top_backlog_service(obs, exclude=source)
+        if target is None or target == source:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"missing distinct target_service; {why}"
+        delta = max(1, int(getattr(action, "officer_delta", 1) or 1))
+        delta = min(delta, source_alloc)
+        repaired = _action_model_from_kwargs(
+            ActionType.REALLOCATE_OFFICERS,
+            service=source,
+            target_service=target,
+            officer_delta=delta,
+        )
+        return repaired, "repaired reallocate_officers payload"
+    return action, None
+def _model_label_for_mode(agent_mode: SimulationAgentMode) -> str:
+    if agent_mode == SimulationAgentMode.BASELINE_POLICY:
+        return "baseline_policy"
+    if agent_mode == SimulationAgentMode.TRAINED_RL:
+        return "trained_rl"
+    return os.getenv("MODEL_NAME", "llm_inference")
+def _log_step_line(step_row: dict[str, Any]) -> str:
+    done = "true" if bool(step_row.get("done")) else "false"
+    error = step_row.get("last_action_error") or "null"
+    action = json.dumps(step_row.get("action_payload", {}), separators=(",", ":"))
+    source = step_row.get("decision_source") or "unknown"
+    model = step_row.get("model_used") or "null"
+    repair = step_row.get("repair_note") or "null"
+    switch_note = step_row.get("switch_note") or "null"
+    return (
+        f"[STEP] step={step_row.get('step', 0)} action={action} "
+        f"reward={float(step_row.get('reward', 0.0)):.2f} done={done} "
+        f"error={error} source={source} model={model} repair={repair} switch={switch_note}"
+    )
+def _resolve_model_path_or_raise(model_path: str) -> str:
+    p = Path(model_path).expanduser()
+    if not p.is_absolute():
+        p = (Path.cwd() / p).resolve()
+    if p.is_dir():
+        candidates = [
+            p / "best_model.zip",
+            p / "model.zip",
+            p / "checkpoint.zip",
+        ]
+        zip_files = sorted(p.glob("*.zip"))
+        candidates.extend(zip_files)
+        for candidate in candidates:
+            if candidate.exists():
+                return str(candidate)
+    if p.exists():
+        return str(p)
+    raise FileNotFoundError(f"Model path not found: {model_path}")
+def _load_model_cached_or_raise(model_abs: str, model_type: Literal["maskable", "recurrent"]) -> Any:
+    key = (model_abs, model_type)
+    if key in _MODEL_CACHE:
+        return _MODEL_CACHE[key]
+    if model_type == "recurrent":
+        from sb3_contrib import RecurrentPPO
+        model = RecurrentPPO.load(model_abs)
+    else:
+        try:
+            from sb3_contrib import MaskablePPO
+            model = MaskablePPO.load(model_abs)
+        except Exception:
+            from stable_baselines3 import PPO
+            model = PPO.load(model_abs)
+    _MODEL_CACHE[key] = model
+    return model
+def _safe_invalid_action_count(final_state: Any) -> int:
+    if hasattr(final_state, "total_invalid_actions"):
+        return int(getattr(final_state, "total_invalid_actions") or 0)
+    metrics = getattr(final_state, "metrics", None)
+    if metrics is not None and hasattr(metrics, "total_invalid_actions"):
+        return int(getattr(metrics, "total_invalid_actions") or 0)
+    return 0
+class LiveSimulationSession:
+    def __init__(
+        self,
+        *,
+        task_id: str,
+        agent_mode: SimulationAgentMode,
+        max_steps: int,
+        seed: int | None,
+        policy_name: str | None = None,
+        model_path: str | None = None,
+        model_type: Literal["maskable", "recurrent"] = "maskable",
+    ) -> None:
+        self.task_id = task_id
+        self.agent_mode = agent_mode
+        recommended = _recommended_min_steps(task_id)
+        self.max_steps = max(int(max_steps), int(recommended)) if agent_mode == SimulationAgentMode.LLM_INFERENCE else int(max_steps)
+        self.seed = int(seed if seed is not None else random.randint(1, 999999))
+        self.policy_name = policy_name or "backlog_clearance"
+        self.model_path = model_path
+        self.model_type = model_type
+        self.trace: list[dict[str, Any]] = []
+        self.total_reward = 0.0
+        self.step_idx = 0
+        self.done = False
+        self.summary: dict[str, Any] | None = None
+        self.score: float | None = None
+        self.grader_name: str | None = None
+        self.env: Any = None
+        self.obs: ObservationModel | Any = None
+        self.policy: Any = None
+        self.rl_env: Any = None
+        self.rl_model: Any = None
+        self.rl_lstm_state: Any = None
+        self.rl_episode_start: Any = None
+        self.llm_runtimes: list[dict[str, Any]] = []
+        self.llm_route: list[str] = []
+        self.llm_model_stats: dict[tuple[str, str], dict[str, Any]] = {}
+        self.consecutive_failure_steps = 0
+        self.recovery_steps_remaining = 0
+        self.auto_switch_count = 0
+        self.last_switch_reason: str | None = None
+        if self.agent_mode == SimulationAgentMode.TRAINED_RL:
+            self._init_trained()
+        else:
+            self._init_core()
+    def start_line(self) -> dict[str, Any]:
+        return {
+            "log": (
+                f"[START] task={self.task_id} env=gov-workflow-openenv "
+                f"model={_model_label_for_mode(self.agent_mode)}"
+            ),
+            "observation": self.obs
+        }
+    def _init_core(self) -> None:
+        from app.baselines import POLICIES, backlog_clearance_policy
+        from app.env import GovWorkflowEnv
+        self.env = GovWorkflowEnv(task_id=self.task_id)
+        self.obs, _ = self.env.reset(seed=self.seed)
+        if self.agent_mode == SimulationAgentMode.BASELINE_POLICY:
+            self.policy = POLICIES.get(self.policy_name, backlog_clearance_policy)
+        else:
+            self.policy = self._llm_action_with_meta
+            self._init_llm_runtimes()
+    def _init_llm_runtimes(self) -> None:
+        openai_base = os.getenv("API_BASE_URL") or os.getenv("OPENAI_API_BASE_URL") or "https://api.openai.com/v1"
+        nvidia_base = os.getenv("NVIDIA_API_BASE_URL", "https://integrate.api.nvidia.com/v1")
+        openai_keys = _dedupe(
+            [
+                os.getenv("HF_TOKEN"),
+                os.getenv("OPENAI_API_KEY"),
+                os.getenv("API_KEY"),
+            ]
+        )
+        nvidia_keys = _dedupe(
+            [
+                os.getenv("NVIDIA_API_KEY"),
+                os.getenv("NVIDIA_API_KEY_2"),
+            ]
+        )
+        openai_models = _dedupe(
+            [
+                os.getenv("MODEL_NAME", "meta/llama-3.3-70b-instruct"),
+                *_env_csv_list("MODEL_FALLBACKS"),
+            ]
+        )
+        nvidia_models = _dedupe(
+            [
+                os.getenv("NVIDIA_MODEL"),
+                *_env_csv_list("NVIDIA_MODEL_FALLBACKS"),
+                *LEGACY_NVIDIA_MODEL_POOL,
+            ]
+        )
+        runtimes: list[dict[str, Any]] = []
+        if openai_keys and openai_models:
+            clients: list[tuple[OpenAI, str]] = []
+            for idx, key in enumerate(openai_keys, start=1):
+                try:
+                    clients.append(
+                        (
+                            OpenAI(base_url=openai_base, api_key=key, timeout=8.0, max_retries=0),
+                            f"openai_key_{idx}",
+                        )
+                    )
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "openai-compatible",
+                        "base_url": openai_base,
+                        "clients": clients,
+                        "models": openai_models,
+                    }
+                )
+        if nvidia_keys and nvidia_models:
+            clients = []
+            for idx, key in enumerate(nvidia_keys, start=1):
+                try:
+                    clients.append(
+                        (
+                            OpenAI(base_url=nvidia_base, api_key=key, timeout=8.0, max_retries=0),
+                            f"nvidia_key_{idx}",
+                        )
+                    )
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "nvidia",
+                        "base_url": nvidia_base,
+                        "clients": clients,
+                        "models": nvidia_models,
+                    }
+                )
+        self.llm_runtimes = runtimes
+        self.llm_model_stats = {}
+        for runtime in runtimes:
+            provider = str(runtime.get("provider"))
+            for model in runtime.get("models", []):
+                self.llm_model_stats[(provider, str(model))] = {
+                    "calls": 0,
+                    "invalid": 0,
+                    "repaired": 0,
+                    "failures": 0,
+                    "cooldown_until_step": 0,
+                }
+        openai_runtime = next((rt for rt in runtimes if rt.get("provider") == "openai-compatible"), None)
+        nvidia_runtime = next((rt for rt in runtimes if rt.get("provider") == "nvidia"), None)
+        openai_route = (
+            f"openai-compatible ({len(openai_runtime['clients'])} keys, {len(openai_runtime['models'])} models)"
+            if openai_runtime is not None
+            else "openai-compatible (unavailable: missing API key/model)"
+        )
+        nvidia_route = (
+            f"nvidia ({len(nvidia_runtime['clients'])} keys, {len(nvidia_runtime['models'])} models)"
+            if nvidia_runtime is not None
+            else "nvidia (unavailable: missing API key/model)"
+        )
+        self.llm_route = [
+            openai_route,
+            nvidia_route,
+            "adaptive ranking: prefer models with lower invalid/repaired rates",
+            "heuristic fallback (backlog_clearance_policy)",
+        ]
+    def _rank_runtime_models(self, provider: str, models: list[str]) -> list[str]:
+        def _score(model_name: str) -> tuple[float, int]:
+            stat = self.llm_model_stats.get((provider, model_name), {})
+            calls = max(1, int(stat.get("calls", 0)))
+            invalid_rate = float(stat.get("invalid", 0)) / calls
+            repaired_rate = float(stat.get("repaired", 0)) / calls
+            fail_rate = float(stat.get("failures", 0)) / calls
+            cooldown = int(stat.get("cooldown_until_step", 0))
+            cooldown_penalty = 1.0 if self.step_idx < cooldown else 0.0
+            return (
+                invalid_rate * 2.0 + repaired_rate * 1.25 + fail_rate * 1.5 + cooldown_penalty,
+                -calls,
+            )
+        return sorted([str(m) for m in models], key=_score)
+    def _llm_action_with_meta(self, obs: ObservationModel) -> tuple[ActionModel, dict[str, Any]]:
+        if self.recovery_steps_remaining > 0:
+            self.recovery_steps_remaining -= 1
+            action, why = _best_high_impact_action(obs)
+            return action, {
+                "decision_source": "auto_recovery_policy",
+                "provider": "heuristic",
+                "model_used": "backlog_clearance_policy",
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+                "repair_note": why,
+            }
+        attempts = 0
+        last_error = ""
+        allowed_actions, blocked_actions = _masked_action_type_hints(obs)
+        schema_hint = {
+            "required_fields": {
+                "set_priority_mode": ["action_type", "priority_mode"],
+                "assign_capacity": ["action_type", "service", "officer_delta"],
+                "request_missing_documents": ["action_type", "service"],
+                "escalate_service": ["action_type", "service"],
+                "advance_time": ["action_type"],
+                "reallocate_officers": ["action_type", "service", "target_service", "officer_delta"],
+            },
+            "allowed_priority_mode": [m.value for m in PriorityMode],
+            "allowed_services": [s.value for s in ServiceType],
+        }
+        system_prompt = (
+            "You are controlling a government workflow simulator. "
+            "Return exactly one JSON object only. No markdown. No explanation. "
+            "Allowed action_type: set_priority_mode, assign_capacity, request_missing_documents, "
+            "escalate_service, advance_time, reallocate_officers. "
+            "Rules: "
+            "1) reallocate_officers requires service + target_service + officer_delta>0 and source!=target. "
+            "2) assign_capacity requires service + officer_delta>0. "
+            "3) request_missing_documents requires service with missing_docs_cases>0. "
+            "4) set_priority_mode requires priority_mode in [urgent_first, oldest_first, balanced, backlog_clearance]. "
+            "5) Always prefer high-impact actions that reduce backlog/SLA risk over no-op loops. "
+            "Use lowercase enum values."
+        )
+        user_prompt = (
+            "Observation:\n"
+            f"{obs.model_dump_json() if hasattr(obs, 'model_dump_json') else json.dumps(getattr(obs, 'dict', lambda: {})())}\n"
+            f"Allowed action types now: {allowed_actions}\n"
+            f"Blocked action types now: {blocked_actions}\n"
+            f"Action schema hints: {json.dumps(schema_hint, separators=(',', ':'))}\n"
+            f"Last action validity: {getattr(obs, 'last_action_valid', True)}\n"
+            f"Last action message: {getattr(obs, 'last_action_message', '')}\n"
+            "Return action JSON."
+        )
+        for runtime in self.llm_runtimes:
+            provider = str(runtime["provider"])
+            ranked_models = self._rank_runtime_models(provider, list(runtime["models"]))
+            for client, key_label in runtime["clients"]:
+                for model in ranked_models:
+                    attempts += 1
+                    stat_key = (provider, model)
+                    try:
+                        out = client.chat.completions.create(
+                            model=model,
+                            messages=[
+                                {"role": "system", "content": system_prompt},
+                                {"role": "user", "content": user_prompt},
+                            ],
+                            temperature=0.0,
+                            max_tokens=200,
+                            stream=False,
+                        )
+                        content = (out.choices[0].message.content or "").strip()
+                        action = _coerce_action(_extract_json_object(content))
+                        if stat_key in self.llm_model_stats:
+                            self.llm_model_stats[stat_key]["calls"] += 1
+                        return action, {
+                            "decision_source": "llm",
+                            "provider": provider,
+                            "model_used": model,
+                            "llm_attempts": attempts,
+                            "llm_error": None,
+                            "llm_key_label": key_label,
+                        }
+                    except Exception as exc:
+                        last_error = str(exc)
+                        stat = self.llm_model_stats.get(stat_key)
+                        if stat is not None:
+                            stat["calls"] += 1
+                            stat["failures"] += 1
+                            if stat["failures"] >= 2:
+                                stat["cooldown_until_step"] = self.step_idx + 5
+                        continue
+        action, why = _best_high_impact_action(obs)
+        if not self.llm_runtimes:
+            last_error = "No LLM credentials configured."
+        return action, {
+            "decision_source": "heuristic_fallback",
+            "provider": "heuristic",
+            "model_used": "backlog_clearance_policy",
+            "llm_attempts": attempts,
+            "llm_error": last_error or None,
+            "llm_key_label": None,
+            "repair_note": why,
+        }
+    def _init_trained(self) -> None:
+        import numpy as np
+        from rl.gov_workflow_env import GovWorkflowGymEnv
+        if not self.model_path:
+            raise ValueError("model_path is required for trained_rl simulation.")
+        model_abs = _resolve_model_path_or_raise(self.model_path)
+        self.rl_model = _load_model_cached_or_raise(model_abs, self.model_type)
+        self.rl_env = GovWorkflowGymEnv(
+            task_id=self.task_id,
+            seed=self.seed,
+            hard_action_mask=True,
+        )
+        self.obs, _ = self.rl_env.reset(seed=self.seed)
+        self.rl_lstm_state = None
+        self.rl_episode_start = np.array([True], dtype=bool)
+    def step_once(self) -> tuple[dict[str, Any], str, bool]:
+        if self.done:
+            raise RuntimeError("Simulation already finished.")
+        self.step_idx += 1
+        row = self._step_trained() if self.agent_mode == SimulationAgentMode.TRAINED_RL else self._step_core()
+        self.trace.append(row)
+        self.total_reward += float(row["reward"])
+        step_log = _log_step_line(row)
+        if row["done"] or self.step_idx >= self.max_steps:
+            self._finalize()
+            row["done"] = True
+            return row, step_log, True
+        return row, step_log, False
+    def end_line(self) -> str:
+        if self.score is None:
+            return "[END] success=false steps=0 score=0.00 rewards="
+        rewards = ",".join(f"{float(x.get('reward', 0.0)):.2f}" for x in self.trace)
+        success = "true" if self.score >= 0.5 else "false"
+        return f"[END] success={success} steps={len(self.trace)} score={self.score:.2f} rewards={rewards}"
+    def step_line(self, action: dict | ActionModel) -> dict[str, Any]:
+        """Test wrapper for executing an action and returning observation + reward."""
+        if isinstance(action, dict):
+            action = _coerce_action(action)
+        self.obs, reward, terminated, truncated, info = self.env.step(action)
+        return {"observation": self.obs, "reward": reward}
+    def snapshot(self) -> dict[str, Any]:
+        return {
+            "task_id": self.task_id,
+            "agent_mode": self.agent_mode.value,
+            "seed": self.seed,
+            "max_steps": self.max_steps,
+            "step_idx": self.step_idx,
+            "done": self.done,
+            "total_reward": float(self.total_reward),
+            "score": self.score,
+            "grader_name": self.grader_name,
+            "summary": self.summary,
+            "trace_len": len(self.trace),
+            "llm_route": list(self.llm_route),
+        }
+    def close(self) -> None:
+        try:
+            if self.env is not None and hasattr(self.env, "close"):
+                self.env.close()
+        except Exception:
+            pass
+        try:
+            if self.rl_env is not None and hasattr(self.rl_env, "close"):
+                self.rl_env.close()
+        except Exception:
+            pass
+    def _step_core(self) -> dict[str, Any]:
+        if self.env is None:
+            raise RuntimeError("Core simulation env not initialized.")
+        if self.agent_mode == SimulationAgentMode.BASELINE_POLICY:
+            action = self.policy(self.obs)
+            meta = {
+                "decision_source": "baseline_policy",
+                "provider": "local_policy",
+                "model_used": self.policy_name,
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+            }
+        else:
+            raw_decision = self.policy(self.obs)
+            if isinstance(raw_decision, tuple) and len(raw_decision) == 2:
+                action, meta = raw_decision
+            else:
+                action, meta = raw_decision, {}
+            if not isinstance(meta, dict):
+                meta = {}
+            if not isinstance(action, ActionModel):
+                if isinstance(action, dict):
+                    action = _coerce_action(action)
+                else:
+                    action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+                    meta["repair_note"] = "non-action output from llm policy, coerced to advance_time"
+            allowed_mask = _compute_action_mask(self.obs)
+            if not bool(allowed_mask.get(action.action_type, True)):
+                masked_fallback, why = _best_high_impact_action(self.obs)
+                action = masked_fallback
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = f"action masked at runtime; {why}"
+            repaired_action, repair_note = _repair_action_for_observation(action, self.obs)
+            if repair_note:
+                action = repaired_action
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = repair_note
+        self.obs, reward, terminated, truncated, info = self.env.step(action)
+        done = bool(terminated or truncated)
+        last_action_error = getattr(info, "last_action_error", None)
+        if last_action_error is None:
+            last_action_error = getattr(info, "action_explanation", None)
+        row = {
+            "step": self.step_idx,
+            "day": self.obs.day,
+            "action_type": action.action_type.value,
+            "action_payload": action.model_dump(exclude_none=True, mode="json"),
+            "reward": float(reward),
+            "done": done,
+            "backlog": getattr(self.obs, "total_backlog", 0),
+            "completed": getattr(self.obs, "total_completed", 0),
+            "sla_breaches": getattr(self.obs, "total_sla_breaches", 0),
+            "fairness_gap": float(
+                getattr(self.obs, "fairness_gap", getattr(self.obs, "fairness_index", 0.0)) or 0.0
+            ),
+            "escalation_budget_remaining": getattr(self.obs, "escalation_budget_remaining", 0),
+            "invalid_action": bool(getattr(info, "invalid_action", False)),
+            "last_action_error": last_action_error,
+            "queue_rows": _queue_rows(self.obs),
+        }
+        row.update(meta)
+        if self.agent_mode == SimulationAgentMode.LLM_INFERENCE:
+            is_repaired = row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+            is_invalid = bool(row.get("invalid_action")) or bool(row.get("last_action_error"))
+            model_used = str(row.get("model_used") or "")
+            provider = str(row.get("provider") or "")
+            stat_key = (provider, model_used)
+            stat = self.llm_model_stats.get(stat_key)
+            if stat is not None:
+                if is_repaired:
+                    stat["repaired"] += 1
+                if is_invalid:
+                    stat["invalid"] += 1
+                    stat["failures"] += 1
+                else:
+                    stat["failures"] = max(0, int(stat.get("failures", 0)) - 1)
+            is_failure_pattern = is_invalid or is_repaired
+            self.consecutive_failure_steps = self.consecutive_failure_steps + 1 if is_failure_pattern else 0
+            if self.consecutive_failure_steps >= 4:
+                if stat is not None:
+                    stat["cooldown_until_step"] = self.step_idx + 6
+                self.recovery_steps_remaining = max(self.recovery_steps_remaining, 3)
+                self.auto_switch_count += 1
+                self.last_switch_reason = "repeated invalid/repaired pattern detected"
+                row["switch_note"] = "auto-switched to recovery policy and deprioritized failing model"
+                self.consecutive_failure_steps = 0
+        return row
+    def _step_trained(self) -> dict[str, Any]:
+        import numpy as np
+        masks = self.rl_env.action_masks()
+        if self.model_type == "recurrent":
+            action, self.rl_lstm_state = self.rl_model.predict(
+                self.obs,
+                state=self.rl_lstm_state,
+                episode_start=self.rl_episode_start,
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+            if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
+                valid = np.flatnonzero(masks)
+                action_idx = int(valid[0]) if valid.size > 0 else 18
+        else:
+            from sb3_contrib.common.maskable.utils import get_action_masks
+            action, _ = self.rl_model.predict(
+                self.obs,
+                action_masks=get_action_masks(self.rl_env),
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+        self.obs, reward, terminated, truncated, info = self.rl_env.step(action_idx)
+        done = bool(terminated or truncated)
+        if self.model_type == "recurrent":
+            self.rl_episode_start = np.array([done], dtype=bool)
+        core_env = self.rl_env.core_env
+        core_obs = core_env._build_observation()
+        action_model, action_label = _decode_action_idx(action_idx)
+        return {
+            "step": self.step_idx,
+            "day": core_obs.day,
+            "action_type": action_label,
+            "action_payload": action_model.model_dump(exclude_none=True, mode="json"),
+            "action_index": action_idx,
+            "reward": float(reward),
+            "done": done,
+            "backlog": core_obs.total_backlog,
+            "completed": core_obs.total_completed,
+            "sla_breaches": core_obs.total_sla_breaches,
+            "fairness_gap": float(
+                getattr(core_obs, "fairness_gap", getattr(core_obs, "fairness_index", 0.0)) or 0.0
+            ),
+            "escalation_budget_remaining": core_obs.escalation_budget_remaining,
+            "invalid_action": bool(info.get("invalid_action", False)),
+            "last_action_error": info.get("last_action_error") or info.get("action_explanation"),
+            "queue_rows": _queue_rows(core_obs),
+            "decision_source": "trained_rl",
+            "provider": "rl",
+            "model_used": self.model_path or "trained_rl",
+            "llm_attempts": 0,
+            "llm_error": None,
+            "llm_key_label": None,
+        }
+    def _finalize(self) -> None:
+        if self.done:
+            return
+        self.done = True
+        from app.graders import grade_episode
+        if self.agent_mode == SimulationAgentMode.TRAINED_RL:
+            final_state = self.rl_env.core_env.state()
+        else:
+            final_state = self.env.state()
+        gr = grade_episode(final_state)
+        self.score = float(gr.score)
+        self.grader_name = gr.grader_name
+        llm_steps = sum(1 for row in self.trace if row.get("decision_source") in {"llm", "llm_repaired"})
+        fallback_steps = sum(
+            1 for row in self.trace if row.get("decision_source") in {"heuristic_fallback", "auto_recovery_policy"}
+        )
+        repaired_steps = sum(
+            1 for row in self.trace if row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+        )
+        total_steps = max(1, len(self.trace))
+        invalid_actions = _safe_invalid_action_count(final_state)
+        invalid_rate = float(invalid_actions) / float(total_steps)
+        repaired_rate = float(repaired_steps) / float(total_steps)
+        ranked_models: list[dict[str, Any]] = []
+        if self.llm_model_stats:
+            for (provider, model), stat in self.llm_model_stats.items():
+                calls = int(stat.get("calls", 0))
+                if calls <= 0:
+                    continue
+                ranked_models.append(
+                    {
+                        "provider": provider,
+                        "model": model,
+                        "calls": calls,
+                        "invalid_rate": float(stat.get("invalid", 0)) / max(1, calls),
+                        "repaired_rate": float(stat.get("repaired", 0)) / max(1, calls),
+                    }
+                )
+            ranked_models.sort(key=lambda x: (x["invalid_rate"], x["repaired_rate"], -x["calls"]))
+        self.summary = {
+            "total_steps": getattr(final_state, "total_steps", len(self.trace)),
+            "total_completed": getattr(final_state, "total_completed", 0),
+            "total_backlog": getattr(final_state, "total_backlog", 0),
+            "total_sla_breaches": getattr(final_state, "total_sla_breaches", 0),
+            "fairness_gap": float(getattr(final_state, "fairness_gap", 0.0) or 0.0),
+            "total_invalid_actions": invalid_actions,
+            "invalid_action_rate": invalid_rate,
+            "llm_steps": llm_steps,
+            "heuristic_fallback_steps": fallback_steps,
+            "llm_repaired_steps": repaired_steps,
+            "repaired_action_rate": repaired_rate,
+            "auto_switch_count": self.auto_switch_count,
+            "last_switch_reason": self.last_switch_reason,
+            "effective_max_steps": self.max_steps,
+            "recommended_min_steps": _recommended_min_steps(self.task_id),
+        }
+        if self.agent_mode == SimulationAgentMode.LLM_INFERENCE:
+            self.summary["llm_route"] = list(self.llm_route)
+            self.summary["llm_model_performance"] = ranked_models
+        if self.agent_mode == SimulationAgentMode.TRAINED_RL:
+            self.summary["model_path"] = self.model_path
+            self.summary["model_type"] = self.model_type
+def run_simulation(
+    *,
+    task_id: str,
+    agent_mode: SimulationAgentMode,
+    max_steps: int,
+    seed: int | None,
+    policy_name: str | None = None,
+    model_path: str | None = None,
+    model_type: Literal["maskable", "recurrent"] = "maskable",
+) -> SimulationRun:
+    session = LiveSimulationSession(
+        task_id=task_id,
+        agent_mode=agent_mode,
+        max_steps=max_steps,
+        seed=seed,
+        policy_name=policy_name,
+        model_path=model_path,
+        model_type=model_type,
+    )
+    try:
+        while not session.done:
+            session.step_once()
+        return SimulationRun(
+            task_id=session.task_id,
+            agent_mode=session.agent_mode,
+            seed=session.seed,
+            total_reward=float(session.total_reward),
+            score=float(session.score or 0.0),
+            grader_name=str(session.grader_name or "unknown"),
+            summary=dict(session.summary or {}),
+            trace=list(session.trace),
+        )
+    finally:
+        session.close()
+def _decode_action_idx(action_idx: int) -> tuple[ActionModel, str]:
+    try:
+        from rl.feature_builder import ACTION_DECODE_TABLE
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    row = ACTION_DECODE_TABLE.get(int(action_idx))
+    if row is None:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    action_type, service, priority_mode, delta = row
+    try:
+        at = ActionType(str(action_type))
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    if at == ActionType.SET_PRIORITY_MODE:
+        action = _action_model_from_kwargs(at, priority_mode=priority_mode)
+    elif at == ActionType.ASSIGN_CAPACITY:
+        action = _action_model_from_kwargs(at, service=service, officer_delta=delta or 1)
+    elif at == ActionType.REQUEST_MISSING_DOCUMENTS:
+        action = _action_model_from_kwargs(at, service=service)
+    elif at == ActionType.ESCALATE_SERVICE:
+        action = _action_model_from_kwargs(at, service=service)
+    elif at == ActionType.REALLOCATE_OFFICERS:
+        src = _enum_service(service)
+        action = (
+            _action_model_from_kwargs(at, service=src, target_service=src, officer_delta=delta or 1)
+            if src is not None
+            else ActionModel(action_type=ActionType.ADVANCE_TIME)
+        )
+    else:
+        action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+    return action, at.value

app/env.py ADDED Viewed

	@@ -0,0 +1,553 @@

+"""
+env.py — Gov Workflow OpenEnv
+Gymnasium/OpenEnv-compatible environment aligned with Phase 1 schemas.
+"""
+from __future__ import annotations
+import random
+from uuid import uuid4
+from app.event_engine import EventEngine
+from app.models import (
+    ActionModel,
+    ActionType,
+    ApplicationCase,
+    EpisodeStateModel,
+    InternalSubstate,
+    ObservationModel,
+    OfficerPool,
+    PriorityMode,
+    QueueSnapshot,
+    RewardModel,
+    ScenarioMode,
+    ServiceType,
+    StepInfoModel,
+    TaskConfig,
+)
+from app.reward import compute_reward
+from app.signal_computer import SignalComputer
+from app.engine import DayResult, DaySimulator
+from app.tasks import get_task
+def completion_fairness_gap(
+    arrived_by_service: dict[ServiceType, int],
+    completed_by_service: dict[ServiceType, int],
+) -> float:
+    services = list(arrived_by_service.keys())
+    if len(services) < 2:
+        return 0.0
+    rates = []
+    for svc in services:
+        arrived = max(1, arrived_by_service.get(svc, 0))
+        completed = completed_by_service.get(svc, 0)
+        rates.append(completed / arrived)
+    return max(rates) - min(rates) if rates else 0.0
+class EpisodeMetrics:
+    def __init__(self):
+        self.total_arrived: int = 0
+        self.total_completed: int = 0
+        self.total_sla_breaches: int = 0
+        self.total_rejected: int = 0
+        self.total_invalid_actions: int = 0
+        self.total_escalations_used: int = 0
+        self.total_wasted_escalations: int = 0
+        self.total_docs_requested: int = 0
+        self.total_docs_cleared: int = 0
+        self.total_idle_officer_days: int = 0
+        self.total_capacity_days: int = 0
+        self.total_urgent_arrived: int = 0
+        self.total_urgent_completed: int = 0
+        self.cumulative_reward: float = 0.0
+    def to_reward_model(self) -> RewardModel:
+        return RewardModel(total_reward=self.cumulative_reward)
+class GovWorkflowEnv:
+    def __init__(self, task_id: str = "district_backlog_easy", seed: int | None = None) -> None:
+        self.task_id = task_id
+        self.task: TaskConfig = get_task(task_id)
+        self.seed = seed
+        self.max_steps_per_episode = max(1, int(self.task.max_days) * 10)
+        self._init_episode_state()
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict | None = None,
+    ) -> tuple[ObservationModel, dict]:
+        task_id = (options or {}).get("task_id", self.task_id)
+        self.task = get_task(task_id)
+        self.task_id = self.task.task_id
+        self.seed = self.task.seed if seed is None else int(seed)
+        self.rng = random.Random(self.seed)
+        max_steps_override = (options or {}).get("max_steps_per_episode")
+        if max_steps_override is None:
+            self.max_steps_per_episode = max(1, int(self.task.max_days) * 10)
+        else:
+            self.max_steps_per_episode = max(1, int(max_steps_override))
+        self.episode_id = f"{self.task_id}-s{self.seed}-{uuid4().hex[:6]}"
+        self.day = 0
+        self.total_steps = 0
+        self.terminated = False
+        self.truncated = False
+        self.priority_mode = PriorityMode.BALANCED
+        pool = self.task.initial_officer_pool
+        self.officer_pool = OfficerPool(
+            total_officers=pool.total_officers,
+            available_officers=pool.available_officers,
+            allocated=dict(pool.allocated),
+            pending_reallocation=dict(getattr(pool, "pending_reallocation", {})),
+        )
+        self.active_cases: list[ApplicationCase] = []
+        self.completed_cases: list[ApplicationCase] = []
+        self.escalation_budget_remaining = self.task.escalation_budget
+        self.arrived_by_service = {s: 0 for s in self.task.enabled_services}
+        self.completed_by_service = {s: 0 for s in self.task.enabled_services}
+        self.metrics = EpisodeMetrics()
+        self.action_history: list[dict] = []
+        self.last_action_valid = True
+        self.last_action_message = "reset"
+        self.last_action_explanation = ""
+        self.event_engine = EventEngine(
+            seed=self.seed,
+            scenario_mode=self.task.scenario_mode,
+        )
+        self.simulator = DaySimulator(
+            task_config=self.task,
+            rng=self.rng,
+            event_engine=self.event_engine,
+        )
+        self.signal_computer = SignalComputer()
+        obs = self._build_observation(active_events=[])
+        info = {
+            "task_id": self.task_id,
+            "seed": self.seed,
+            "episode_id": self.episode_id,
+            "max_days": self.task.max_days,
+        }
+        return obs, info
+    def step(
+        self,
+        action: ActionModel | dict,
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        if isinstance(action, dict):
+            from app.models import ActionModel
+            action = ActionModel(**action)
+        if self.terminated or self.truncated:
+            raise RuntimeError("Episode ended — call reset() before stepping.")
+        self.total_steps += 1
+        invalid_action = False
+        day_result = DayResult()
+        try:
+            notes, day_result = self._apply_action(action, day_result)
+            self.last_action_valid = True
+            self.last_action_message = notes[-1] if notes else "ok"
+            self.last_action_explanation = self.last_action_message
+        except ValueError as exc:
+            invalid_action = True
+            self.metrics.total_invalid_actions += 1
+            self.last_action_valid = False
+            self.last_action_message = str(exc)
+            self.last_action_explanation = f"Invalid: {exc}"
+        fairness_gap = completion_fairness_gap(
+            self.arrived_by_service,
+            self.completed_by_service,
+        )
+        reward: RewardModel = compute_reward(
+            stage_advances=day_result.stage_advances,
+            completions=day_result.new_completions,
+            active_backlog=len(self.active_cases),
+            new_sla_breaches=day_result.new_sla_breaches,
+            fairness_gap=fairness_gap,
+            fairness_threshold=self.task.fairness_threshold or 0.0,
+            invalid_action=invalid_action,
+            idle_capacity=day_result.idle_officer_days,
+            award_stability_bonus=(action.action_type == ActionType.ADVANCE_TIME),
+        )
+        self.metrics.cumulative_reward += reward.total_reward
+        self.terminated = (
+            len(self.active_cases) == 0
+            and self.day > 0
+            and not invalid_action
+        )
+        self.truncated = (
+            (self.day >= self.task.max_days or self.total_steps >= self.max_steps_per_episode)
+            and not self.terminated
+        )
+        info = StepInfoModel(
+            reward_breakdown=reward,
+            newly_arrived_cases=day_result.new_arrivals,
+            newly_completed_cases=day_result.new_completions,
+            newly_sla_breached_cases=day_result.new_sla_breaches,
+            newly_resolved_doc_cases=day_result.newly_unblocked_missing,
+            invalid_action=invalid_action,
+            action_explanation=self.last_action_explanation,
+            active_events=day_result.active_events,
+            grader_preview_score=0.0,
+            effects_resolved_this_step=[],
+        )
+        self.action_history.append({
+            "step": self.total_steps,
+            "day": self.day,
+            "action": action.model_dump(mode="json"),
+            "invalid": invalid_action,
+            "message": self.last_action_message,
+            "reward": reward.total_reward,
+        })
+        obs = self._build_observation(active_events=day_result.active_events)
+        return obs, reward.total_reward, self.terminated, self.truncated, info
+    def count_pending_effects(self) -> int:
+        """Count all pending delayed effects waiting to resolve."""
+        if hasattr(self, '_pending_effects') and self._pending_effects:
+            return len(self._pending_effects)
+        if hasattr(self, 'simulator') and hasattr(self.simulator, 'pending_effects'):
+            return len(self.simulator.pending_effects)
+        if hasattr(self, 'pending_effects'):
+            return len(self.pending_effects)
+        return 0
+    def state(self) -> EpisodeStateModel:
+        fairness_gap = completion_fairness_gap(
+            self.arrived_by_service, self.completed_by_service
+        )
+        # Compute average waiting days across completed cases
+        avg_wait = (
+            sum(c.waiting_days for c in self.completed_cases) / len(self.completed_cases)
+            if self.completed_cases else 0.0
+        )
+        return EpisodeStateModel(
+            episode_id=self.episode_id,
+            task_id=self.task_id,
+            seed=self.seed,
+            scenario_mode=self.task.scenario_mode,
+            day=self.day,
+            max_days=self.task.max_days,
+            terminated=self.terminated,
+            truncated=self.truncated,
+            total_steps=self.total_steps,
+            total_completed=len(self.completed_cases),
+            total_backlog=len(self.active_cases),
+            total_sla_breaches=self.metrics.total_sla_breaches,
+            total_rejected=self.metrics.total_rejected,
+            action_history_count=len(self.action_history),
+            cumulative_reward=self.metrics.cumulative_reward,
+            officer_pool=self.officer_pool.model_copy(deep=True),
+            pending_effects_count=self.count_pending_effects(),
+            active_events_today=[],
+            # ── Grader-facing fields ──────────────────────────────────
+            fairness_gap=round(fairness_gap, 4),
+            total_arrived=self.metrics.total_arrived,
+            total_docs_requested=self.metrics.total_docs_requested,
+            total_docs_cleared=self.metrics.total_docs_cleared,
+            total_idle_officer_days=self.metrics.total_idle_officer_days,
+            total_capacity_days=self.metrics.total_capacity_days,
+            total_urgent_arrived=self.metrics.total_urgent_arrived,
+            total_urgent_completed=self.metrics.total_urgent_completed,
+            total_escalations_used=self.metrics.total_escalations_used,
+            total_wasted_escalations=self.metrics.total_wasted_escalations,
+            total_invalid_actions=self.metrics.total_invalid_actions,
+            avg_waiting_days=round(avg_wait, 2),
+            # Full action log — populated but stripped by API unless requested
+            action_history=list(self.action_history),
+        )
+    def _apply_action(
+        self,
+        action: ActionModel,
+        day_result: DayResult,
+    ) -> tuple[list[str], DayResult]:
+        notes: list[str] = []
+        if action.action_type == ActionType.SET_PRIORITY_MODE:
+            if action.priority_mode is None:
+                raise ValueError("priority_mode required for set_priority_mode")
+            old_mode = self.priority_mode
+            self.priority_mode = action.priority_mode
+            notes.append(f"Priority mode changed: {old_mode.value} -> {action.priority_mode.value}")
+            return notes, day_result
+        if action.action_type == ActionType.ASSIGN_CAPACITY:
+            cap = action.capacity_assignment
+            if not cap:
+                raise ValueError("capacity_assignment dict required for assign_capacity")
+            for svc_key, delta in cap.items():
+                svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
+                if svc not in self.task.enabled_services:
+                    raise ValueError(f"{svc.value} is not enabled in this task")
+                if delta <= 0:
+                    raise ValueError("capacity delta must be positive")
+                idle = self.officer_pool.idle_officers
+                if delta > idle:
+                    raise ValueError(f"Only {idle} idle officers available; requested {delta}")
+                self.officer_pool.allocated[svc] = self.officer_pool.allocated.get(svc, 0) + delta
+                notes.append(f"Assigned {delta} officer(s) to {svc.value}")
+            return notes, day_result
+        if action.action_type == ActionType.REQUEST_MISSING_DOCUMENTS:
+            svc = action.service_target
+            if svc is None:
+                raise ValueError("service_target required for request_missing_documents")
+            candidates = [
+                c for c in self.active_cases
+                if c.service_type == svc
+                and c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+            ]
+            if not candidates:
+                raise ValueError(f"No BLOCKED_MISSING_DOCS cases for {svc.value}")
+            candidates.sort(key=lambda c: (-c.sla_risk, c.arrival_day))
+            resolved = 0
+            for case in candidates[:3]:
+                case.doc_request_sent_day = self.day
+                case.doc_resolution_day = self.day + self.rng.randint(2, 3)
+                self.metrics.total_docs_requested += 1
+                resolved += 1
+            notes.append(f"Sent missing-doc requests for {resolved} case(s) in {svc.value}")
+            return notes, day_result
+        if action.action_type == ActionType.ESCALATE_SERVICE:
+            if self.escalation_budget_remaining <= 0:
+                self.metrics.total_wasted_escalations += 1
+                raise ValueError("Escalation budget exhausted")
+            svc = action.escalation_target or action.service_target
+            candidates = [
+                c for c in self.active_cases
+                if (svc is None or c.service_type == svc) and not c.is_urgent
+            ]
+            if not candidates:
+                self.metrics.total_wasted_escalations += 1
+                raise ValueError("No eligible non-urgent cases to escalate")
+            best = max(candidates, key=lambda c: (c.sla_risk, -c.arrival_day))
+            best.is_urgent = True
+            self.escalation_budget_remaining -= 1
+            self.metrics.total_escalations_used += 1
+            notes.append(f"Escalated case {best.case_id} ({best.service_type.value})")
+            return notes, day_result
+        if action.action_type == ActionType.ADVANCE_TIME:
+            day_result = self._advance_one_day()
+            notes.append(f"Day {self.day} simulated")
+            return notes, day_result
+        if action.action_type == ActionType.REALLOCATE_OFFICERS:
+            delta = action.reallocation_delta
+            if not delta or len(delta) < 2:
+                raise ValueError("reallocation_delta must have at least 2 entries")
+            total = sum(delta.values())
+            if total != 0:
+                raise ValueError(f"reallocation_delta must sum to 0 (got {total})")
+            for svc_key, change in delta.items():
+                svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
+                if svc not in self.task.enabled_services:
+                    raise ValueError(f"{svc.value} not in enabled services")
+                current = self.officer_pool.allocated.get(svc, 0)
+                if current + change < 0:
+                    raise ValueError(
+                        f"Cannot reduce {svc.value} below 0 (current={current}, change={change})"
+                    )
+            for svc_key, change in delta.items():
+                svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
+                self.officer_pool.allocated[svc] = self.officer_pool.allocated.get(svc, 0) + change
+            changes = ", ".join(f"{k}:{'+' if v > 0 else ''}{v}" for k, v in delta.items())
+            notes.append(f"Officers reallocated: {changes}")
+            return notes, day_result
+        raise ValueError(f"Unsupported action_type: {action.action_type.value}")
+    def _advance_one_day(self) -> DayResult:
+        self.day += 1
+        alloc = dict(self.officer_pool.allocated)
+        result = self.simulator.simulate_day(
+            day=self.day,
+            active_cases=self.active_cases,
+            completed_cases=self.completed_cases,
+            priority_mode=self.priority_mode,
+            officer_allocations=alloc,
+        )
+        for case in self.completed_cases:
+            if getattr(case, "_counted", False):
+                continue
+            case._counted = True
+            svc = case.service_type
+            self.completed_by_service[svc] = self.completed_by_service.get(svc, 0) + 1
+        for case in self.active_cases:
+            if getattr(case, "_arrival_counted", False):
+                continue
+            case._arrival_counted = True
+            svc = case.service_type
+            self.arrived_by_service[svc] = self.arrived_by_service.get(svc, 0) + 1
+            self.metrics.total_arrived += 1
+            if case.is_urgent:
+                self.metrics.total_urgent_arrived += 1
+        self.metrics.total_completed = len(self.completed_cases)
+        self.metrics.total_sla_breaches += result.new_sla_breaches
+        self.metrics.total_idle_officer_days += result.idle_officer_days
+        self.metrics.total_capacity_days += result.total_capacity_days
+        self.metrics.total_urgent_completed += result.urgent_completed
+        self.metrics.total_docs_cleared += result.newly_unblocked_missing
+        return result
+    def _build_observation(self, active_events: list = None) -> ObservationModel:
+        active_events = active_events or []
+        snapshots: dict[str, QueueSnapshot] = {}
+        todays_digital = 0
+        todays_arrivals = 0
+        today_completed: dict[ServiceType, int] = {}
+        for case in self.completed_cases:
+            today_completed[case.service_type] = today_completed.get(case.service_type, 0) + 1
+        for service in self.task.enabled_services:
+            snap = self.simulator.build_queue_snapshot(service, self.active_cases, self.day)
+            snap.total_completed_today = today_completed.get(service, 0)
+            snapshots[service.value] = snap
+        for case in self.active_cases:
+            if case.arrival_day == self.day:
+                todays_arrivals += 1
+                if case.intake_channel.value == "digital":
+                    todays_digital += 1
+        sigs = self.signal_computer.compute(
+            queue_snapshots=snapshots,
+            officer_pool=self.officer_pool,
+            todays_arrivals=todays_arrivals,
+            digital_arrivals=todays_digital,
+            capacity_per_day=max(1.0, float(self.officer_pool.available_officers)),
+        )
+        pending_doc = sum(
+            1 for c in self.active_cases
+            if c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+            and c.doc_resolution_day is not None
+        )
+        pending_officer = len(getattr(self.officer_pool, "pending_reallocation", {}))
+        return ObservationModel(
+            task_id=self.task_id,
+            episode_id=self.episode_id,
+            day=self.day,
+            max_days=self.task.max_days,
+            scenario_mode=self.task.scenario_mode,
+            officer_pool=self.officer_pool.model_copy(deep=True),
+            queue_snapshots=snapshots,
+            total_backlog=len(self.active_cases),
+            total_completed=len(self.completed_cases),
+            total_sla_breaches=self.metrics.total_sla_breaches,
+            total_rejected=self.metrics.total_rejected,
+            escalation_budget_remaining=self.escalation_budget_remaining,
+            backlog_pressure=sigs.backlog_pressure,
+            sla_risk_score=sigs.sla_risk_score,
+            fairness_index=sigs.fairness_index,
+            resource_utilization=sigs.resource_utilization,
+            digital_intake_ratio=sigs.digital_intake_ratio,
+            blocked_cases_missing_docs=sigs.blocked_cases_missing_docs,
+            field_verification_load=sigs.field_verification_load,
+            active_events=active_events,
+            last_action_valid=self.last_action_valid,
+            last_action_message=self.last_action_message,
+            last_action_explanation=self.last_action_explanation,
+            pending_doc_resolutions=pending_doc,
+            pending_officer_reallocations=pending_officer,
+        )
+    def _init_episode_state(self) -> None:
+        self.seed = self.task.seed
+        self.rng = random.Random(self.seed)
+        self.episode_id = f"{self.task_id}-s{self.seed}-init"
+        self.day = 0
+        self.total_steps = 0
+        self.terminated = False
+        self.truncated = False
+        self.priority_mode = PriorityMode.BALANCED
+        self.officer_pool = OfficerPool(
+            total_officers=1,
+            available_officers=1,
+            allocated={},
+            pending_reallocation={},
+        )
+        self.active_cases: list[ApplicationCase] = []
+        self.completed_cases: list[ApplicationCase] = []
+        self.escalation_budget_remaining = 0
+        self.arrived_by_service: dict[ServiceType, int] = {}
+        self.completed_by_service: dict[ServiceType, int] = {}
+        self.metrics = EpisodeMetrics()
+        self.action_history: list[dict] = []
+        self.last_action_valid = True
+        self.last_action_message = ""
+        self.last_action_explanation = ""
+        self.event_engine = EventEngine(seed=self.seed, scenario_mode=ScenarioMode.NORMAL)
+        self.simulator = DaySimulator(self.task, self.rng, self.event_engine)
+        self.signal_computer = SignalComputer()
+    def _count_pending_effects(self) -> int:
+        doc_pending = sum(
+            1 for c in self.active_cases
+            if c.doc_resolution_day is not None
+            and c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+        )
+        fv_pending = sum(
+            1 for c in self.active_cases
+            if c.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
+            and c.field_verification_completion_day is not None
+        )
+        return doc_pending + fv_pending
+    @property
+    def fairness_gap(self) -> float:
+        return completion_fairness_gap(self.arrived_by_service, self.completed_by_service)
+    @property
+    def total_completed(self) -> int:
+        return len(self.completed_cases)
+    @property
+    def total_backlog(self) -> int:
+        return len(self.active_cases)

app/event_engine.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+event_engine.py — Gov Workflow OpenEnv v2.0
+Deterministic daily event system. Same seed + day + scenario = same events always.
+"""
+import random
+from typing import List
+from app.models import EventType, ScenarioMode, TaskConfig
+SCENARIO_MULTIPLIER = {
+    ScenarioMode.NORMAL:           1.0,
+    ScenarioMode.CRISIS:           2.0,
+    ScenarioMode.EXTREME_OVERLOAD: 3.5,
+}
+BASE_PROBS = {
+    EventType.SURGE_APPLICATIONS:       0.08,
+    EventType.OFFICER_UNAVAILABLE:      0.07,
+    EventType.DOCUMENT_REJECTION_SPIKE: 0.10,
+    EventType.REVENUE_DB_DELAY:         0.06,
+    EventType.SLA_ESCALATION_ORDER:     0.05,
+}
+EVENT_EFFECTS = {
+    EventType.SURGE_APPLICATIONS:
+        {ScenarioMode.NORMAL: 1.3, ScenarioMode.CRISIS: 1.5, ScenarioMode.EXTREME_OVERLOAD: 2.0},
+    EventType.OFFICER_UNAVAILABLE:
+        {ScenarioMode.NORMAL: 1,   ScenarioMode.CRISIS: 1,   ScenarioMode.EXTREME_OVERLOAD: 2},
+    EventType.DOCUMENT_REJECTION_SPIKE:
+        {ScenarioMode.NORMAL: 0.15, ScenarioMode.CRISIS: 0.20, ScenarioMode.EXTREME_OVERLOAD: 0.35},
+    EventType.REVENUE_DB_DELAY:
+        {ScenarioMode.NORMAL: 0.30, ScenarioMode.CRISIS: 0.40, ScenarioMode.EXTREME_OVERLOAD: 0.60},
+    EventType.SLA_ESCALATION_ORDER:
+        {ScenarioMode.NORMAL: 0.50, ScenarioMode.CRISIS: 0.50, ScenarioMode.EXTREME_OVERLOAD: 0.40},
+}
+class DayEventParams:
+    def __init__(self):
+        self.arrival_multiplier: float = 1.0
+        self.officer_reduction: int = 0
+        self.doc_defect_rate_boost: float = 0.0
+        self.system_dependency_boost: float = 0.0
+        self.sla_window_multiplier: float = 1.0
+        self.active_events: List[EventType] = []
+    def has_events(self) -> bool:
+        return bool(self.active_events)
+class EventEngine:
+    def __init__(self, seed: int, scenario_mode: ScenarioMode):
+        self.seed = seed
+        self.scenario_mode = scenario_mode
+        self._multiplier = SCENARIO_MULTIPLIER[scenario_mode]
+    def get_events_for_day(self, day: int, task_config: "TaskConfig") -> List[EventType]:
+        day_rng = random.Random(self.seed + day * 31337)
+        active = []
+        for event_type in task_config.allowed_events:
+            if event_type == EventType.NO_EVENT:
+                continue
+            base_prob = BASE_PROBS.get(event_type, 0.0)
+            effective_prob = min(0.80, base_prob * self._multiplier)
+            if day_rng.random() < effective_prob:
+                active.append(event_type)
+        return active if active else [EventType.NO_EVENT]
+    def apply_events(self, events: List[EventType], task_config: "TaskConfig") -> DayEventParams:
+        params = DayEventParams()
+        for event in events:
+            if event == EventType.NO_EVENT:
+                continue
+            params.active_events.append(event)
+            magnitude = EVENT_EFFECTS.get(event, {}).get(self.scenario_mode, 0)
+            if event == EventType.SURGE_APPLICATIONS:
+                params.arrival_multiplier *= magnitude
+            elif event == EventType.OFFICER_UNAVAILABLE:
+                params.officer_reduction += int(magnitude)
+            elif event == EventType.DOCUMENT_REJECTION_SPIKE:
+                params.doc_defect_rate_boost += magnitude
+            elif event == EventType.REVENUE_DB_DELAY:
+                params.system_dependency_boost += magnitude
+            elif event == EventType.SLA_ESCALATION_ORDER:
+                params.sla_window_multiplier = min(params.sla_window_multiplier, magnitude)
+        if not params.active_events:
+            params.active_events = [EventType.NO_EVENT]
+        return params
+    def describe_events(self, events: List[EventType]) -> str:
+        descriptions = {
+            EventType.SURGE_APPLICATIONS:       "Digital surge: arrivals increased",
+            EventType.OFFICER_UNAVAILABLE:      "Officer absent: reduced capacity",
+            EventType.DOCUMENT_REJECTION_SPIKE: "Doc rejection spike: higher defect rate",
+            EventType.REVENUE_DB_DELAY:         "Revenue DB delay: land records slower",
+            EventType.SLA_ESCALATION_ORDER:     "SLA escalation order: deadlines tightened",
+            EventType.NO_EVENT:                 "No active events today",
+        }
+        active = [e for e in events if e != EventType.NO_EVENT]
+        if not active:
+            return "No active events today"
+        return "; ".join(descriptions.get(e, str(e)) for e in active)

app/graders.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+graders.py — Gov Workflow OpenEnv: Deterministic Episode Graders
+Rules:
+  - All graders read ONLY from EpisodeStateModel flat fields.
+  - No access to env internals, EpisodeMetrics, or reward breakdown proxies.
+  - GraderResult uses the aligned schema (score, grader_name, named metric fields).
+  - grade_episode() dispatches by task_id.
+Grader weights:
+  Easy   — completion(0.45) + SLA(0.35) + idle_efficiency(0.20)          = 1.00
+  Medium — completion(0.35) + SLA(0.30) + doc_rework(0.20) + urgent(0.15) = 1.00
+  Hard   — completion(0.28) + SLA(0.24) + doc_rework(0.16)
+           + fairness(0.16) + escalation_discipline(0.16)                 = 1.00
+"""
+from __future__ import annotations
+from app.models import EpisodeStateModel, GraderResult
+# ─────────────────────────────────────────────────────────────────────────────
+# INTERNAL HELPERS
+# ─────────────────────────────────────────────────────────────────────────────
+def _safe_ratio(num: float, den: float, default: float = 1.0) -> float:
+    """Safe division, clamped to [0.0, 1.0]. Returns `default` when den ≤ 0."""
+    if den <= 0:
+        return max(0.0, min(1.0, default))
+    return max(0.0, min(1.0, num / den))
+def _b(value: float) -> float:
+    """Clamp any float to [0.0, 1.0]."""
+    return max(0.0, min(1.0, float(value)))
+def _extract(state: EpisodeStateModel) -> dict[str, float]:
+    """
+    Extract all grader input metrics from EpisodeStateModel flat fields.
+    Design note:
+      - total_arrived   : populated by env.state() from metrics.total_arrived
+      - fairness_gap    : computed by completion_fairness_gap() in env.state()
+      - All other fields are direct EpisodeStateModel attributes.
+    """
+    total_arrived      = max(1, state.total_arrived)
+    total_completed    = float(state.total_completed)
+    total_breaches     = float(state.total_sla_breaches)
+    total_docs_req     = float(state.total_docs_requested)
+    total_docs_cleared = float(state.total_docs_cleared)
+    total_urgent_arr   = float(state.total_urgent_arrived)
+    total_urgent_comp  = float(state.total_urgent_completed)
+    total_idle         = float(state.total_idle_officer_days)
+    total_capacity     = float(state.total_capacity_days)
+    total_escused      = float(state.total_escalations_used)
+    total_wasted_esc   = float(state.total_wasted_escalations)
+    fairness_gap       = float(state.fairness_gap)
+    return {
+        "completion_rate":         _b(_safe_ratio(total_completed, total_arrived, 0.0)),
+        "sla_compliance":          _b(1.0 - _safe_ratio(total_breaches, total_arrived, 0.0)),
+        "document_rework_quality": _b(_safe_ratio(total_docs_cleared, total_docs_req, 1.0)),
+        "urgent_served_rate":      _b(_safe_ratio(total_urgent_comp, total_urgent_arr, 1.0)),
+        "fairness_score":          _b(1.0 - fairness_gap),
+        "escalation_discipline":   _b(1.0 - _safe_ratio(total_wasted_esc, max(1.0, total_escused), 0.0)),
+        "idle_efficiency":         _b(1.0 - _safe_ratio(total_idle, max(1.0, total_capacity), 0.0)),
+        "fairness_gap":            round(fairness_gap, 4),
+    }
+def _build_result(
+    state: EpisodeStateModel,
+    score: float,
+    grader_name: str,
+    m: dict[str, float],
+) -> GraderResult:
+    """Assemble a fully-populated GraderResult from metric dict and state."""
+    total_arrived = max(0, state.total_arrived)
+    avg_wait = state.avg_waiting_days
+    return GraderResult(
+        task_id=state.task_id,
+        episode_id=state.episode_id,
+        grader_name=grader_name,
+        score=_b(score),
+        completion_rate=m["completion_rate"],
+        sla_compliance_rate=m["sla_compliance"],
+        idle_efficiency=m["idle_efficiency"],
+        document_rework_quality=m["document_rework_quality"],
+        urgent_served_rate=m["urgent_served_rate"],
+        fairness_score=m["fairness_score"],
+        escalation_discipline=m["escalation_discipline"],
+        fairness_gap=m["fairness_gap"],
+        total_cases_arrived=total_arrived,
+        total_completed=state.total_completed,
+        total_sla_breached=state.total_sla_breaches,
+        total_rejected=state.total_rejected,
+        avg_waiting_days=avg_wait,
+    )
+# ─────────────────────────────────────────────────────────────────────────────
+# TASK GRADERS
+# ─────────────────────────────────────────────────────────────────────────────
+def grade_easy(state: EpisodeStateModel) -> GraderResult:
+    """
+    district_backlog_easy grader.
+    Focus: raw throughput and SLA hygiene under simple single-service load.
+    Weights: completion(0.45) + SLA(0.35) + idle_efficiency(0.20)
+    """
+    m = _extract(state)
+    score = (
+        0.45 * m["completion_rate"]
+      + 0.35 * m["sla_compliance"]
+      + 0.20 * m["idle_efficiency"]
+    )
+    return _build_result(state, score, "easy", m)
+def grade_medium(state: EpisodeStateModel) -> GraderResult:
+    """
+    mixed_urgency_medium grader.
+    Focus: throughput + SLA + document quality + prioritizing urgent cases.
+    Weights: completion(0.35) + SLA(0.30) + doc_rework(0.20) + urgent(0.15)
+    """
+    m = _extract(state)
+    score = (
+        0.35 * m["completion_rate"]
+      + 0.30 * m["sla_compliance"]
+      + 0.20 * m["document_rework_quality"]
+      + 0.15 * m["urgent_served_rate"]
+    )
+    return _build_result(state, score, "medium", m)
+def grade_hard(state: EpisodeStateModel) -> GraderResult:
+    """
+    cross_department_hard grader.
+    Focus: all-round excellence including cross-service fairness and
+    restrained escalation use under crisis conditions.
+    Weights: completion(0.28) + SLA(0.24) + doc_rework(0.16)
+             + fairness(0.16) + escalation_discipline(0.16)
+    """
+    m = _extract(state)
+    score = (
+        0.28 * m["completion_rate"]
+      + 0.24 * m["sla_compliance"]
+      + 0.16 * m["document_rework_quality"]
+      + 0.16 * m["fairness_score"]
+      + 0.16 * m["escalation_discipline"]
+    )
+    return _build_result(state, score, "hard", m)
+# ─────────────────────────────────────────────────────────────────────────────
+# DISPATCHER
+# ─────────────────────────────────────────────────────────────────────────────
+_GRADER_MAP = {
+    "district_backlog_easy":          grade_easy,
+    "district_backlog_easy_extreme":  grade_easy,
+    "mixed_urgency_medium":           grade_medium,
+    "cross_department_hard":          grade_hard,
+}
+def grade_episode(state: EpisodeStateModel) -> GraderResult:
+    """
+    Dispatch to the correct task grader.
+    Falls back to grade_hard for unknown task IDs (safe default for new tasks).
+    """
+    grader_fn = _GRADER_MAP.get(state.task_id, grade_hard)
+    return grader_fn(state)

app/main.py ADDED Viewed

The diff for this file is too large to render. See raw diff

app/models.py ADDED Viewed

	@@ -0,0 +1,509 @@

+"""
+models.py — Gov Workflow OpenEnv v2.0 — Phase 2 FULL FILE
+Adds: DocEnrichmentType, doc_enrichment fields on ApplicationCase,
+      blocked_cases_enrichment / pending_enrichment_lookups on observation,
+      INTERNAL_TO_PUBLIC_STAGE mapping,
+      SectorProfile enrichment fields.
+"""
+from __future__ import annotations
+from enum import Enum
+from typing import Dict, List, Optional
+from pydantic import BaseModel, Field
+import uuid
+# ─────────────────────────────────────────────
+# ENUMS
+# ─────────────────────────────────────────────
+class ServiceType(str, Enum):
+    PASSPORT            = "passport"
+    DRIVING_LICENSE     = "driving_license"
+    AADHAAR_CARD        = "aadhaar_card"
+    GST_REGISTRATION    = "gst_registration"
+    INCOME_CERTIFICATE  = "income_certificate"
+    CASTE_CERTIFICATE   = "caste_certificate"
+    BIRTH_CERTIFICATE   = "birth_certificate"
+    LAND_REGISTRATION   = "land_registration"
+class StageType(str, Enum):
+    SUBMISSION            = "submission"
+    DOCUMENT_VERIFICATION = "document_verification"
+    FIELD_VERIFICATION    = "field_verification"
+    APPROVAL              = "approval"
+    ISSUANCE              = "issuance"
+class InternalSubstate(str, Enum):
+    PRE_SCRUTINY                 = "pre_scrutiny"
+    DOC_VALIDATION               = "doc_validation"
+    SERVICE_SPECIFIC_VALIDATION  = "service_specific_validation"
+    FIELD_VERIFICATION_PENDING   = "field_verification_pending"
+    DECISION_PENDING             = "decision_pending"
+    ISSUANCE_READY               = "issuance_ready"
+    BLOCKED_MISSING_DOCS         = "blocked_missing_docs"
+    BLOCKED_ENRICHMENT           = "blocked_enrichment"
+    COMPLETED                    = "completed"
+    REJECTED                     = "rejected"
+# ── Phase 2 addition ──────────────────────────────────────────────────────────
+class DocEnrichmentType(str, Enum):
+    """External lookup needed for document verification."""
+    NONE                  = "none"
+    PAST_LAND_RECORDS     = "past_land_records"       # Land Registration — Revenue DB
+    FAMILY_CASTE_HISTORY  = "family_caste_history"    # Caste Certificate — Caste Registry
+    POLICE_VERIFICATION   = "police_verification"     # Passport — Police Station
+    TAX_RECORD_CROSS_CHECK= "tax_record_cross_check"  # GST Registration — Tax DB
+# Public stage mapping — used by state_machine.build_public_stage
+INTERNAL_TO_PUBLIC_STAGE: dict = {
+    "pre_scrutiny":                  "submission",
+    "doc_validation":                "document_verification",
+    "service_specific_validation":   "document_verification",
+    "field_verification_pending":    "field_verification",
+    "decision_pending":              "approval",
+    "issuance_ready":                "issuance",
+    "blocked_missing_docs":          "document_verification",
+    "blocked_enrichment":            "document_verification",
+    "completed":                     "issuance",
+    "rejected":                      "approval",
+}
+class PriorityMode(str, Enum):
+    URGENT_FIRST       = "urgent_first"
+    OLDEST_FIRST       = "oldest_first"
+    BALANCED           = "balanced"
+    BACKLOG_CLEARANCE  = "backlog_clearance"
+class ActionType(str, Enum):
+    SET_PRIORITY_MODE         = "set_priority_mode"
+    ASSIGN_CAPACITY           = "assign_capacity"
+    REQUEST_MISSING_DOCUMENTS = "request_missing_documents"
+    ESCALATE_SERVICE          = "escalate_service"
+    ADVANCE_TIME              = "advance_time"
+    REALLOCATE_OFFICERS       = "reallocate_officers"
+class EventType(str, Enum):
+    SURGE_APPLICATIONS        = "surge_applications"
+    OFFICER_UNAVAILABLE       = "officer_unavailable"
+    DOCUMENT_REJECTION_SPIKE  = "document_rejection_spike"
+    REVENUE_DB_DELAY          = "revenue_db_delay"
+    SLA_ESCALATION_ORDER      = "sla_escalation_order"
+    NO_EVENT                  = "no_event"
+class ScenarioMode(str, Enum):
+    NORMAL           = "normal"
+    CRISIS           = "crisis"
+    EXTREME_OVERLOAD = "extreme_overload"
+class UrgencyProfile(str, Enum):
+    LOW             = "low"
+    MODERATE        = "moderate"
+    HIGH            = "high"
+    LOW_BUT_STICKY  = "low_but_sticky"
+class IntakeChannel(str, Enum):
+    DIGITAL = "digital"
+    PAPER   = "paper"
+    HYBRID  = "hybrid"
+class DelayedEffectType(str, Enum):
+    DOC_REQUEST_RESOLUTION = "doc_request_resolution"
+    OFFICER_REALLOCATION   = "officer_reallocation"
+    ESCALATION_RELIEF      = "escalation_relief"
+# ─────────────────────────────────────────────
+# SECTOR / SERVICE CONFIGURATION
+# ─────────────────────────────────────────────
+class SectorProfile(BaseModel):
+    service_type:                   ServiceType
+    sector_name:                    str
+    missing_docs_probability:       float = Field(ge=0.0, le=1.0)
+    doc_defect_rate_digital:        float = Field(ge=0.0, le=1.0)
+    doc_defect_rate_paper:          float = Field(ge=0.0, le=1.0)
+    field_verification_probability: float = Field(ge=0.0, le=1.0)
+    manual_scrutiny_intensity:      float = Field(ge=0.0, le=1.0)
+    decision_backlog_sensitivity:   float = Field(ge=0.0, le=1.0)
+    system_dependency_risk:         float = Field(ge=0.0, le=1.0)
+    sla_days:                       int   = Field(ge=1)
+    urgency_profile:                UrgencyProfile
+    base_processing_rate:           float = Field(ge=0.1)
+    field_verification_days:        int   = Field(ge=1)
+    # ── Phase 2: enrichment ─────────────────────────────────────────
+    doc_enrichment_type:                DocEnrichmentType  = DocEnrichmentType.NONE
+    doc_enrichment_probability:         float              = Field(default=0.0, ge=0.0, le=1.0)
+    doc_enrichment_delay_days_min:      int                = Field(default=1, ge=1)
+    doc_enrichment_delay_days_max:      int                = Field(default=3, ge=1)
+class OfficerPool(BaseModel):
+    total_officers:       int           = Field(ge=1)
+    available_officers:   int           = Field(ge=0)
+    allocated:            Dict[str, int] = Field(default_factory=dict)
+    pending_reallocation: Dict[str, int] = Field(default_factory=dict)
+    @property
+    def idle_officers(self) -> int:
+        return self.available_officers - sum(self.allocated.values())
+# ─────────────────────────────────────────────
+# CASE MODEL  (Phase 2: enrichment fields added)
+# ─────────────────────────────────────────────
+class ApplicationCase(BaseModel):
+    case_id:              str               = Field(default_factory=lambda: str(uuid.uuid4())[:8])
+    service_type:         ServiceType
+    internal_substate:    InternalSubstate  = InternalSubstate.PRE_SCRUTINY
+    public_stage:         StageType         = StageType.SUBMISSION
+    arrival_day:          int               = Field(ge=0)
+    current_day:          int               = Field(ge=0)
+    sla_deadline_day:     int               = Field(ge=0)
+    days_in_current_stage:int               = Field(default=0, ge=0)
+    waiting_days:         int               = Field(default=0, ge=0)
+    is_urgent:            bool              = False
+    intake_channel:       IntakeChannel     = IntakeChannel.DIGITAL
+    has_missing_docs:     bool              = False
+    doc_request_sent_day: Optional[int]     = None
+    doc_resolution_day:   Optional[int]     = None
+    field_verification_required:          bool           = False
+    field_verification_completion_day:    Optional[int]  = None
+    sla_breached:         bool              = False
+    completed:            bool              = False
+    rejected:             bool              = False
+    # ── Phase 2: enrichment ─────────────────────────────────────────
+    doc_enrichment_type:     DocEnrichmentType  = DocEnrichmentType.NONE
+    doc_enrichment_triggered:bool               = False
+    enrichment_resolution_day:Optional[int]     = None
+    doc_enrichment_reason:   Optional[str]      = None
+    @property
+    def days_until_sla(self) -> int:
+        return max(0, self.sla_deadline_day - self.current_day)
+    @property
+    def sla_risk(self) -> float:
+        total_window = self.sla_deadline_day - self.arrival_day
+        if total_window <= 0:
+            return 1.0
+        elapsed = self.current_day - self.arrival_day
+        return min(1.0, elapsed / total_window)
+class QueueSnapshot(BaseModel):
+    service_type:              ServiceType
+    public_stage_counts:       Dict[str, int] = Field(default_factory=dict)
+    total_pending:             int            = Field(default=0, ge=0)
+    total_completed_today:     int            = Field(default=0, ge=0)
+    total_sla_breached:        int            = Field(default=0, ge=0)
+    urgent_pending:            int            = Field(default=0, ge=0)
+    blocked_missing_docs:      int            = Field(default=0, ge=0)
+    blocked_enrichment:        int            = Field(default=0, ge=0)   # Phase 2
+    field_verification_pending:int            = Field(default=0, ge=0)
+    oldest_case_age_days:      int            = Field(default=0, ge=0)
+    avg_waiting_days:          float          = Field(default=0.0, ge=0.0)
+    current_sla_risk:          float          = Field(default=0.0, ge=0.0, le=1.0)
+# ─────────────────────────────────────────────
+# DELAYED EFFECT MODEL
+# ─────────────────────────────────────────────
+class DelayedEffect(BaseModel):
+    effect_id:       str                    = Field(default_factory=lambda: str(uuid.uuid4())[:8])
+    effect_type:     DelayedEffectType
+    target_service:  Optional[ServiceType]  = None
+    target_case_id:  Optional[str]          = None
+    resolution_day:  int                    = Field(ge=0)
+    magnitude:       float                  = Field(default=1.0)
+    description:     str                    = Field(default="")
+# ─────────────────────────────────────────────
+# OBSERVATION MODEL  (Phase 2: enrichment signals added)
+# ─────────────────────────────────────────────
+class ObservationModel(BaseModel):
+    task_id:         str
+    episode_id:      str
+    day:             int                    = Field(ge=0)
+    max_days:        int                    = Field(ge=1)
+    scenario_mode:   ScenarioMode           = ScenarioMode.NORMAL
+    officer_pool:    OfficerPool
+    queue_snapshots: Dict[str, QueueSnapshot] = Field(default_factory=dict)
+    total_backlog:             int          = Field(default=0, ge=0)
+    total_completed:           int          = Field(default=0, ge=0)
+    total_sla_breaches:        int          = Field(default=0, ge=0)
+    total_rejected:            int          = Field(default=0, ge=0)
+    escalation_budget_remaining:int         = Field(default=0, ge=0)
+    # Compressed signals
+    backlog_pressure:          float        = Field(default=0.0, ge=0.0, le=1.0)
+    sla_risk_score:            float        = Field(default=0.0, ge=0.0, le=1.0)
+    fairness_index:            float        = Field(default=1.0, ge=0.0, le=1.0)
+    resource_utilization:      float        = Field(default=0.0, ge=0.0, le=1.0)
+    digital_intake_ratio:      float        = Field(default=0.5, ge=0.0, le=1.0)
+    blocked_cases_missing_docs:int          = Field(default=0, ge=0)
+    blocked_cases_enrichment:  int          = Field(default=0, ge=0)   # Phase 2
+    field_verification_load:   float        = Field(default=0.0, ge=0.0, le=1.0)
+    active_events:             List[EventType] = Field(default_factory=list)
+    last_action_valid:         bool         = True
+    last_action_message:       str          = ""
+    last_action_explanation:   str          = Field(default="")
+    pending_doc_resolutions:   int          = Field(default=0, ge=0)
+    pending_enrichment_lookups:int          = Field(default=0, ge=0)  # Phase 2
+    pending_officer_reallocations:int       = Field(default=0, ge=0)
+# ─────────────────────────────────────────────
+# ACTION / REWARD / STATE MODELS (unchanged)
+# ─────────────────────────────────────────────
+class ActionModel(BaseModel):
+    action_type:          ActionType
+    service_target:       Optional[ServiceType]  = None
+    priority_mode:        Optional[PriorityMode] = None
+    reallocation_delta:   Optional[Dict[str, int]] = None
+    escalation_target:    Optional[ServiceType]  = None
+    capacity_assignment:  Optional[Dict[str, int]] = None
+    notes:                Optional[str]           = None
+class RewardModel(BaseModel):
+    total_reward:              float = 0.0
+    progress_reward:           float = 0.0
+    completion_reward:         float = 0.0
+    recovery_reward:           float = 0.0
+    stability_bonus:           float = 0.0
+    waiting_penalty:           float = 0.0
+    sla_penalty:               float = 0.0
+    fairness_penalty:          float = 0.0
+    invalid_action_penalty:    float = 0.0
+    idle_capacity_penalty:     float = 0.0
+    oscillation_penalty:       float = 0.0
+class EpisodeStateModel(BaseModel):
+    """Internal episode state exposed via GET /state and POST /state endpoints."""
+    episode_id: str
+    task_id: str
+    seed: int
+    scenario_mode: ScenarioMode
+    day: int = Field(ge=0)
+    max_days: int = Field(ge=1)
+    terminated: bool = False
+    truncated: bool = False
+    total_steps: int = Field(default=0, ge=0)
+    total_completed: int = Field(default=0, ge=0)
+    total_backlog: int = Field(default=0, ge=0)
+    total_sla_breaches: int = Field(default=0, ge=0)
+    total_rejected: int = Field(default=0, ge=0)
+    action_history_count: int = Field(default=0, ge=0)
+    cumulative_reward: float = 0.0
+    cumulative_reward_breakdown: RewardModel = Field(default_factory=RewardModel)
+    officer_pool: Optional[OfficerPool] = None
+    pending_effects_count: int = Field(default=0, ge=0)
+    active_events_today: List[EventType] = Field(default_factory=list)
+    # ── Grader-facing fields ──────────────────────────────────────
+    # These are populated by env.state() so graders never need to
+    # reach into private EpisodeMetrics.
+    fairness_gap: float = Field(
+        default=0.0, ge=0.0, le=1.0,
+        description="Cross-service completion fairness gap at episode end"
+    )
+    total_arrived: int = Field(
+        default=0, ge=0,
+        description="Total cases that arrived across all services"
+    )
+    total_docs_requested: int = Field(
+        default=0, ge=0,
+        description="Total missing-doc requests sent"
+    )
+    total_docs_cleared: int = Field(
+        default=0, ge=0,
+        description="Total missing-doc cases subsequently resolved"
+    )
+    total_idle_officer_days: int = Field(
+        default=0, ge=0,
+        description="Cumulative officer-days wasted idle"
+    )
+    total_capacity_days: int = Field(
+        default=0, ge=0,
+        description="Cumulative total officer-days available"
+    )
+    total_urgent_arrived: int = Field(
+        default=0, ge=0,
+        description="Total urgent cases that arrived"
+    )
+    total_urgent_completed: int = Field(
+        default=0, ge=0,
+        description="Total urgent cases completed"
+    )
+    total_escalations_used: int = Field(
+        default=0, ge=0,
+        description="Total escalation actions consumed"
+    )
+    total_wasted_escalations: int = Field(
+        default=0, ge=0,
+        description="Escalations used on already-urgent or ineligible cases"
+    )
+    total_invalid_actions: int = Field(
+        default=0, ge=0,
+        description="Total invalid actions submitted by agent"
+    )
+    avg_waiting_days: float = Field(
+        default=0.0, ge=0.0,
+        description="Mean waiting days across all completed cases"
+    )
+    # ── Full action log (optional, stripped by default) ──────────
+    action_history: Optional[List[dict]] = Field(
+        default=None,
+        description="Step-by-step action log. Stripped in normal API responses."
+    )
+class StepInfoModel(BaseModel):
+    reward_breakdown:              RewardModel  = Field(default_factory=RewardModel)
+    newly_arrived_cases:           int          = Field(default=0, ge=0)
+    newly_completed_cases:         int          = Field(default=0, ge=0)
+    newly_sla_breached_cases:      int          = Field(default=0, ge=0)
+    newly_resolved_doc_cases:      int          = Field(default=0, ge=0)
+    invalid_action:                bool         = False
+    action_explanation:            str          = ""
+    active_events:                 List[EventType] = Field(default_factory=list)
+    grader_preview_score:          float        = Field(default=0.0, ge=0.0, le=1.0)
+    effects_resolved_this_step:    List[str]    = Field(default_factory=list)
+class TaskConfig(BaseModel):
+    task_id:                str
+    display_name:           str
+    difficulty:             str
+    scenario_mode:          ScenarioMode
+    seed:                   int
+    max_days:               int                    = Field(ge=1)
+    enabled_services:       List[ServiceType]
+    arrival_rate_per_day:   Dict[str, float]
+    digital_intake_ratio:   float                  = Field(default=0.6, ge=0.0, le=1.0)
+    initial_officer_pool:   OfficerPool
+    missing_docs_probability_override:       Optional[Dict[str, float]] = None
+    field_verification_probability_override: Optional[Dict[str, float]] = None
+    escalation_budget:      int                    = Field(ge=0)
+    fairness_threshold:     Optional[float]        = Field(default=None, ge=0.0, le=1.0)
+    event_probability:      float                  = Field(default=0.1, ge=0.0, le=1.0)
+    allowed_events:         List[EventType]        = Field(default_factory=list)
+class GraderResult(BaseModel):
+    """
+    Final deterministic score for a completed or in-progress episode.
+    Range: [0.0, 1.0].
+    Design decision: exposes .score and .grader_name as convenience aliases,
+    plus a .metrics dict for easy serialization to JSON by main.py endpoints.
+    The named fields (completion_rate, sla_compliance_rate, etc.) remain
+    for typed access in tests and baselines.
+    """
+    task_id: str = ""
+    episode_id: str = ""
+    grader_name: str = ""                          # "easy" | "medium" | "hard"
+    # Primary scalar — use result.score everywhere
+    score: float = Field(default=0.0, ge=0.0, le=1.0)
+    # Named metric components
+    completion_rate: float = Field(default=0.0, ge=0.0, le=1.0)
+    sla_compliance_rate: float = Field(default=0.0, ge=0.0, le=1.0)
+    idle_efficiency: float = Field(default=1.0, ge=0.0, le=1.0)
+    document_rework_quality: float = Field(default=1.0, ge=0.0, le=1.0)
+    urgent_served_rate: float = Field(default=1.0, ge=0.0, le=1.0)
+    fairness_score: float = Field(default=1.0, ge=0.0, le=1.0)
+    escalation_discipline: float = Field(default=1.0, ge=0.0, le=1.0)
+    fairness_gap: float = Field(default=0.0, ge=0.0, le=1.0)
+    # Episode counters — populated from EpisodeStateModel
+    total_cases_arrived: int = 0
+    total_completed: int = 0
+    total_sla_breached: int = 0
+    total_rejected: int = 0
+    avg_waiting_days: float = 0.0
+    @property
+    def metrics(self) -> dict:
+        """
+        Convenience dict for JSON serialization in API endpoints.
+        main.py uses result.metrics directly in GradeResponse.
+        """
+        return {
+            "completion_rate":         round(self.completion_rate, 4),
+            "sla_compliance_rate":     round(self.sla_compliance_rate, 4),
+            "idle_efficiency":         round(self.idle_efficiency, 4),
+            "document_rework_quality": round(self.document_rework_quality, 4),
+            "urgent_served_rate":      round(self.urgent_served_rate, 4),
+            "fairness_score":          round(self.fairness_score, 4),
+            "escalation_discipline":   round(self.escalation_discipline, 4),
+            "fairness_gap":            round(self.fairness_gap, 4),
+            "total_cases_arrived":     self.total_cases_arrived,
+            "total_completed":         self.total_completed,
+            "total_sla_breached":      self.total_sla_breached,
+            "total_rejected":          self.total_rejected,
+            "avg_waiting_days":        round(self.avg_waiting_days, 2),
+        }
+class ResetRequest(BaseModel):
+    task_id:        str
+    seed:           Optional[int]           = None
+    scenario_mode:  Optional[ScenarioMode]  = None
+class ResetResponse(BaseModel):
+    observation:    ObservationModel
+    info:           dict
+    episode_id:     str
+class StepRequest(BaseModel):
+    episode_id:     str
+    action:         ActionModel
+class StepResponse(BaseModel):
+    observation:    ObservationModel
+    reward:         float
+    terminated:     bool
+    truncated:      bool
+    info:           StepInfoModel
+class StateResponse(BaseModel):
+    state:          EpisodeStateModel
+class HealthResponse(BaseModel):
+    status:         str = "ok"
+    version:        str = "2.0.0"
+    active_episodes:int = 0

app/persistence.py ADDED Viewed

	@@ -0,0 +1,304 @@

+from __future__ import annotations
+import json
+import os
+import sqlite3
+import time
+from pathlib import Path
+from threading import Lock
+from typing import Any
+from uuid import uuid4
+def _now() -> float:
+    return time.time()
+def _as_json(payload: dict[str, Any]) -> str:
+    return json.dumps(payload, separators=(",", ":"), ensure_ascii=True)
+def _from_json(payload: str) -> dict[str, Any]:
+    data = json.loads(payload)
+    return data if isinstance(data, dict) else {}
+def _resolve_data_dir(repo_root: Path) -> Path:
+    configured = os.getenv("OPENENV_DATA_DIR") or os.getenv("STORAGE_DATA_DIR")
+    if configured:
+        return Path(configured).expanduser().resolve()
+    if Path("/data").exists():
+        return Path("/data/openenv_rl").resolve()
+    return (repo_root / "outputs" / "persist").resolve()
+def _storage_enabled() -> bool:
+    raw = str(os.getenv("STORAGE_ENABLED", "true")).strip().lower()
+    return raw not in {"0", "false", "no", "off"}
+class PersistenceStore:
+    def __init__(self, repo_root: Path) -> None:
+        self.repo_root = repo_root.resolve()
+        self.enabled = _storage_enabled()
+        self.data_dir = _resolve_data_dir(self.repo_root)
+        self.db_path = self.data_dir / "openenv_state.sqlite3"
+        self.training_runs_dir = self.data_dir / "training_runs"
+        self._lock = Lock()
+        if not self.enabled:
+            return
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.training_runs_dir.mkdir(parents=True, exist_ok=True)
+        self._init_schema()
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(self.db_path, timeout=30)
+        conn.row_factory = sqlite3.Row
+        return conn
+    def _init_schema(self) -> None:
+        with self._connect() as conn:
+            conn.executescript(
+                """
+                CREATE TABLE IF NOT EXISTS training_jobs (
+                    job_id TEXT PRIMARY KEY,
+                    created_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    payload_json TEXT NOT NULL
+                );
+                CREATE TABLE IF NOT EXISTS simulation_runs (
+                    run_id TEXT PRIMARY KEY,
+                    created_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    task_id TEXT,
+                    agent_mode TEXT,
+                    status TEXT,
+                    payload_json TEXT NOT NULL
+                );
+                CREATE TABLE IF NOT EXISTS comparison_runs (
+                    comparison_id TEXT PRIMARY KEY,
+                    created_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    task_id TEXT,
+                    payload_json TEXT NOT NULL
+                );
+                """
+            )
+            conn.commit()
+    # Training jobs ---------------------------------------------------------
+    def upsert_training_job(self, snapshot: dict[str, Any]) -> None:
+        if not self.enabled:
+            return
+        job_id = str(snapshot.get("job_id") or "")
+        if not job_id:
+            return
+        created_at = float(snapshot.get("created_at") or _now())
+        updated_at = float(snapshot.get("updated_at") or _now())
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO training_jobs (job_id, created_at, updated_at, payload_json)
+                VALUES (?, ?, ?, ?)
+                ON CONFLICT(job_id) DO UPDATE SET
+                    updated_at = excluded.updated_at,
+                    payload_json = excluded.payload_json
+                """,
+                (job_id, created_at, updated_at, _as_json(snapshot)),
+            )
+            conn.commit()
+    def list_training_jobs(self, limit: int = 500) -> list[dict[str, Any]]:
+        if not self.enabled:
+            return []
+        rows: list[dict[str, Any]] = []
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT payload_json FROM training_jobs
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (max(1, int(limit)),),
+            )
+            for row in cur.fetchall():
+                rows.append(_from_json(str(row["payload_json"])))
+        return rows
+    def clear_training_jobs(self) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM training_jobs")
+            conn.commit()
+            return int(cur.rowcount or 0)
+    def delete_training_job(self, job_id: str) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM training_jobs WHERE job_id = ?", (str(job_id),))
+            conn.commit()
+            return int(cur.rowcount or 0)
+    # Simulation runs -------------------------------------------------------
+    def upsert_simulation_run(
+        self,
+        *,
+        run_id: str,
+        task_id: str,
+        agent_mode: str,
+        status: str,
+        payload: dict[str, Any],
+    ) -> None:
+        if not self.enabled:
+            return
+        now = _now()
+        created_at = float(payload.get("created_at") or now)
+        payload = dict(payload)
+        payload["run_id"] = run_id
+        payload["created_at"] = created_at
+        payload["updated_at"] = now
+        payload["task_id"] = task_id
+        payload["agent_mode"] = agent_mode
+        payload["status"] = status
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO simulation_runs (run_id, created_at, updated_at, task_id, agent_mode, status, payload_json)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                ON CONFLICT(run_id) DO UPDATE SET
+                    updated_at = excluded.updated_at,
+                    task_id = excluded.task_id,
+                    agent_mode = excluded.agent_mode,
+                    status = excluded.status,
+                    payload_json = excluded.payload_json
+                """,
+                (
+                    run_id,
+                    created_at,
+                    now,
+                    task_id,
+                    agent_mode,
+                    status,
+                    _as_json(payload),
+                ),
+            )
+            conn.commit()
+    def list_simulation_runs(self, limit: int = 50) -> list[dict[str, Any]]:
+        if not self.enabled:
+            return []
+        out: list[dict[str, Any]] = []
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT payload_json FROM simulation_runs
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (max(1, int(limit)),),
+            )
+            for row in cur.fetchall():
+                data = _from_json(str(row["payload_json"]))
+                if isinstance(data.get("trace"), list):
+                    data["trace_len"] = len(data["trace"])
+                    data["has_trace"] = bool(data["trace"])
+                    data.pop("trace", None)
+                out.append(data)
+        return out
+    def get_simulation_run(self, run_id: str) -> dict[str, Any] | None:
+        if not self.enabled:
+            return None
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                "SELECT payload_json FROM simulation_runs WHERE run_id = ?",
+                (run_id,),
+            )
+            row = cur.fetchone()
+        if row is None:
+            return None
+        return _from_json(str(row["payload_json"]))
+    def clear_simulation_runs(self) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM simulation_runs")
+            conn.commit()
+            return int(cur.rowcount or 0)
+    # Comparison runs -------------------------------------------------------
+    def create_comparison_run(self, payload: dict[str, Any]) -> str | None:
+        if not self.enabled:
+            return None
+        comparison_id = str(payload.get("comparison_id") or uuid4())
+        now = _now()
+        body = dict(payload)
+        body["comparison_id"] = comparison_id
+        body["created_at"] = float(body.get("created_at") or now)
+        body["updated_at"] = now
+        task_id = str(body.get("task_id") or "")
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO comparison_runs (comparison_id, created_at, updated_at, task_id, payload_json)
+                VALUES (?, ?, ?, ?, ?)
+                ON CONFLICT(comparison_id) DO UPDATE SET
+                    updated_at = excluded.updated_at,
+                    task_id = excluded.task_id,
+                    payload_json = excluded.payload_json
+                """,
+                (
+                    comparison_id,
+                    float(body["created_at"]),
+                    now,
+                    task_id,
+                    _as_json(body),
+                ),
+            )
+            conn.commit()
+        return comparison_id
+    def list_comparison_runs(self, limit: int = 50) -> list[dict[str, Any]]:
+        if not self.enabled:
+            return []
+        out: list[dict[str, Any]] = []
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT payload_json FROM comparison_runs
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (max(1, int(limit)),),
+            )
+            for row in cur.fetchall():
+                out.append(_from_json(str(row["payload_json"])))
+        return out
+    def get_comparison_run(self, comparison_id: str) -> dict[str, Any] | None:
+        if not self.enabled:
+            return None
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                "SELECT payload_json FROM comparison_runs WHERE comparison_id = ?",
+                (comparison_id,),
+            )
+            row = cur.fetchone()
+        if row is None:
+            return None
+        return _from_json(str(row["payload_json"]))
+    def clear_comparison_runs(self) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM comparison_runs")
+            conn.commit()
+            return int(cur.rowcount or 0)

app/reward.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+reward.py — Gov Workflow OpenEnv Phase 4: Dense Reward Shaping
+Formula (per step):
+  R_t = progress_reward + completion_reward + recovery_reward + stability_bonus
+        - waiting_penalty - sla_penalty - fairness_penalty
+        - invalid_action_penalty - idle_capacity_penalty - oscillation_penalty
+All coefficients are named constants — never magic numbers inline.
+"""
+from __future__ import annotations
+from app.models import RewardModel
+# ── Positive coefficients ─────────────────────────────────────────
+COEFF_PROGRESS     = 0.7   # per stage advance
+COEFF_COMPLETION   = 4.0   # per completed case
+COEFF_RECOVERY     = 1.5   # per unblocked missing-doc case resolved
+COEFF_STABILITY    = 0.1   # per step with zero SLA breaches and zero invalid actions
+# ── Negative coefficients ─────────────────────────────────────────
+COEFF_WAITING      = 0.04  # per case per day in backlog
+COEFF_SLA          = 1.5   # per new SLA breach
+COEFF_FAIRNESS     = 2.0   # per unit of fairness excess above threshold
+COEFF_INVALID      = 1.5   # flat penalty per invalid action
+COEFF_IDLE         = 0.05  # per idle officer-day
+COEFF_OSCILLATION  = 0.15  # per oscillation event (repeated contradictory actions)
+# ── Fairness default tolerance (when no threshold set by task) ────
+DEFAULT_FAIRNESS_TOLERANCE = 0.40
+def compute_reward(
+    *,
+    stage_advances: int,
+    completions: int,
+    active_backlog: int,
+    new_sla_breaches: int,
+    fairness_gap: float,
+    fairness_threshold: float | None,
+    invalid_action: bool,
+    idle_capacity: int,
+    newly_unblocked_docs: int = 0,
+    oscillation_detected: bool = False,
+    award_stability_bonus: bool = True,
+) -> RewardModel:
+    """
+    Compute one-step dense reward.
+    Args:
+        stage_advances:       Number of applications that moved forward one stage today.
+        completions:          Number of applications fully completed today.
+        active_backlog:       Total cases still pending (creates waiting pressure).
+        new_sla_breaches:     New SLA deadline violations this step.
+        fairness_gap:         Cross-service completion fairness gap [0.0, 1.0].
+        fairness_threshold:   Task-defined acceptable fairness gap (or None → default).
+        invalid_action:       Whether the submitted action was invalid.
+        idle_capacity:        Officer-days wasted idle while backlog exists.
+        newly_unblocked_docs: Cases unblocked after missing-doc resolution (positive signal).
+        oscillation_detected: True if agent is rapidly reversing recent decisions.
+    Returns:
+        RewardModel with all components filled and total_reward as the scalar.
+    """
+    # ── Positive components ───────────────────────────────────────
+    progress_reward   = COEFF_PROGRESS   * stage_advances
+    completion_reward = COEFF_COMPLETION * completions
+    recovery_reward   = COEFF_RECOVERY   * newly_unblocked_docs
+    stability_bonus = (
+        COEFF_STABILITY
+        if (award_stability_bonus and new_sla_breaches == 0 and not invalid_action)
+        else 0.0
+    )
+    # ── Negative components ───────────────────────────────────────
+    waiting_penalty = COEFF_WAITING * active_backlog
+    sla_penalty = COEFF_SLA * new_sla_breaches
+    tolerance = fairness_threshold if fairness_threshold is not None else DEFAULT_FAIRNESS_TOLERANCE
+    unfairness_excess = max(0.0, fairness_gap - tolerance)
+    fairness_penalty = COEFF_FAIRNESS * unfairness_excess
+    invalid_action_penalty = COEFF_INVALID if invalid_action else 0.0
+    idle_capacity_penalty = COEFF_IDLE * idle_capacity
+    oscillation_penalty = COEFF_OSCILLATION if oscillation_detected else 0.0
+    # ── Total ─────────────────────────────────────────────────────
+    total_reward = (
+        progress_reward + completion_reward + recovery_reward + stability_bonus
+        - waiting_penalty - sla_penalty - fairness_penalty
+        - invalid_action_penalty - idle_capacity_penalty - oscillation_penalty
+    )
+    return RewardModel(
+        total_reward=round(total_reward, 4),
+        progress_reward=round(progress_reward, 4),
+        completion_reward=round(completion_reward, 4),
+        recovery_reward=round(recovery_reward, 4),
+        stability_bonus=round(stability_bonus, 4),
+        waiting_penalty=round(-waiting_penalty, 4),
+        sla_penalty=round(-sla_penalty, 4),
+        fairness_penalty=round(-fairness_penalty, 4),
+        invalid_action_penalty=round(-invalid_action_penalty, 4),
+        idle_capacity_penalty=round(-idle_capacity_penalty, 4),
+        oscillation_penalty=round(-oscillation_penalty, 4),
+    )

app/sector_profiles.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+sector_profiles.py — Phase 2 update: enrichment type, probability, delay range per service.
+"""
+from app.models import (
+    DocEnrichmentType, SectorProfile, ServiceType, UrgencyProfile
+)
+INCOME_CERTIFICATE_PROFILE = SectorProfile(
+    service_type=ServiceType.INCOME_CERTIFICATE,
+    sector_name="Revenue Sector — Income Certificate",
+    missing_docs_probability=0.45,
+    doc_defect_rate_digital=0.30,
+    doc_defect_rate_paper=0.65,
+    field_verification_probability=0.30,
+    manual_scrutiny_intensity=0.60,
+    decision_backlog_sensitivity=0.70,
+    system_dependency_risk=0.20,
+    sla_days=21,
+    urgency_profile=UrgencyProfile.MODERATE,
+    base_processing_rate=8.0,
+    field_verification_days=3,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=2,
+)
+LAND_REGISTRATION_PROFILE = SectorProfile(
+    service_type=ServiceType.LAND_REGISTRATION,
+    sector_name="Land Sector — 7/12 Mutation",
+    missing_docs_probability=0.35,
+    doc_defect_rate_digital=0.25,
+    doc_defect_rate_paper=0.55,
+    field_verification_probability=0.65,
+    manual_scrutiny_intensity=0.75,
+    decision_backlog_sensitivity=0.85,
+    system_dependency_risk=0.55,
+    sla_days=30,
+    urgency_profile=UrgencyProfile.LOW_BUT_STICKY,
+    base_processing_rate=4.0,
+    field_verification_days=5,
+    doc_enrichment_type=DocEnrichmentType.PAST_LAND_RECORDS,
+    doc_enrichment_probability=0.70,
+    doc_enrichment_delay_days_min=2,
+    doc_enrichment_delay_days_max=5,   # REVENUE_DB_DELAY event adds 1-2 more
+)
+CASTE_CERTIFICATE_PROFILE = SectorProfile(
+    service_type=ServiceType.CASTE_CERTIFICATE,
+    sector_name="Revenue Sector — Caste Certificate",
+    missing_docs_probability=0.40,
+    doc_defect_rate_digital=0.25,
+    doc_defect_rate_paper=0.60,
+    field_verification_probability=0.35,
+    manual_scrutiny_intensity=0.65,
+    decision_backlog_sensitivity=0.65,
+    system_dependency_risk=0.25,
+    sla_days=21,
+    urgency_profile=UrgencyProfile.MODERATE,
+    base_processing_rate=7.0,
+    field_verification_days=3,
+    doc_enrichment_type=DocEnrichmentType.FAMILY_CASTE_HISTORY,
+    doc_enrichment_probability=0.55,
+    doc_enrichment_delay_days_min=2,
+    doc_enrichment_delay_days_max=4,
+)
+BIRTH_CERTIFICATE_PROFILE = SectorProfile(
+    service_type=ServiceType.BIRTH_CERTIFICATE,
+    sector_name="Municipal Sector — Birth Certificate",
+    missing_docs_probability=0.20,
+    doc_defect_rate_digital=0.15,
+    doc_defect_rate_paper=0.35,
+    field_verification_probability=0.05,
+    manual_scrutiny_intensity=0.30,
+    decision_backlog_sensitivity=0.40,
+    system_dependency_risk=0.30,
+    sla_days=7,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=15.0,
+    field_verification_days=1,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=1,
+)
+PASSPORT_PROFILE = SectorProfile(
+    service_type=ServiceType.PASSPORT,
+    sector_name="National Sector — Passport",
+    missing_docs_probability=0.25,
+    doc_defect_rate_digital=0.20,
+    doc_defect_rate_paper=0.50,
+    field_verification_probability=0.90,
+    manual_scrutiny_intensity=0.80,
+    decision_backlog_sensitivity=0.75,
+    system_dependency_risk=0.35,
+    sla_days=30,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=5.0,
+    field_verification_days=14,
+    doc_enrichment_type=DocEnrichmentType.POLICE_VERIFICATION,
+    doc_enrichment_probability=0.85,
+    doc_enrichment_delay_days_min=7,
+    doc_enrichment_delay_days_max=14,
+)
+GST_REGISTRATION_PROFILE = SectorProfile(
+    service_type=ServiceType.GST_REGISTRATION,
+    sector_name="Tax Sector — GST Registration",
+    missing_docs_probability=0.30,
+    doc_defect_rate_digital=0.20,
+    doc_defect_rate_paper=0.50,
+    field_verification_probability=0.20,
+    manual_scrutiny_intensity=0.55,
+    decision_backlog_sensitivity=0.60,
+    system_dependency_risk=0.45,
+    sla_days=7,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=10.0,
+    field_verification_days=2,
+    doc_enrichment_type=DocEnrichmentType.TAX_RECORD_CROSS_CHECK,
+    doc_enrichment_probability=0.50,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=3,
+)
+DRIVING_LICENSE_PROFILE = SectorProfile(
+    service_type=ServiceType.DRIVING_LICENSE,
+    sector_name="Transport Sector — Driving License",
+    missing_docs_probability=0.28,
+    doc_defect_rate_digital=0.18,
+    doc_defect_rate_paper=0.45,
+    field_verification_probability=0.40,
+    manual_scrutiny_intensity=0.50,
+    decision_backlog_sensitivity=0.55,
+    system_dependency_risk=0.30,
+    sla_days=14,
+    urgency_profile=UrgencyProfile.MODERATE,
+    base_processing_rate=12.0,
+    field_verification_days=2,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=1,
+)
+AADHAAR_CARD_PROFILE = SectorProfile(
+    service_type=ServiceType.AADHAAR_CARD,
+    sector_name="National Identity Sector - Aadhaar Card",
+    missing_docs_probability=0.22,
+    doc_defect_rate_digital=0.12,
+    doc_defect_rate_paper=0.30,
+    field_verification_probability=0.18,
+    manual_scrutiny_intensity=0.42,
+    decision_backlog_sensitivity=0.50,
+    system_dependency_risk=0.38,
+    sla_days=10,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=13.0,
+    field_verification_days=2,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=2,
+)
+SECTOR_REGISTRY: dict = {
+    ServiceType.INCOME_CERTIFICATE: INCOME_CERTIFICATE_PROFILE,
+    ServiceType.LAND_REGISTRATION:  LAND_REGISTRATION_PROFILE,
+    ServiceType.CASTE_CERTIFICATE:  CASTE_CERTIFICATE_PROFILE,
+    ServiceType.BIRTH_CERTIFICATE:  BIRTH_CERTIFICATE_PROFILE,
+    ServiceType.PASSPORT:           PASSPORT_PROFILE,
+    ServiceType.GST_REGISTRATION:   GST_REGISTRATION_PROFILE,
+    ServiceType.DRIVING_LICENSE:    DRIVING_LICENSE_PROFILE,
+    ServiceType.AADHAAR_CARD:       AADHAAR_CARD_PROFILE,
+}
+def get_sector_profile(service_type: ServiceType) -> SectorProfile:
+    if service_type not in SECTOR_REGISTRY:
+        raise KeyError(f"No SectorProfile for {service_type}")
+    return SECTOR_REGISTRY[service_type]

app/signal_computer.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+signal_computer.py — Gov Workflow OpenEnv v2.0
+Computes normalized compressed state signals for observations.
+All signals are deterministic and normalized to [0.0, 1.0].
+"""
+from typing import Dict
+from app.models import QueueSnapshot, OfficerPool
+class ComputedSignals:
+    def __init__(self):
+        self.backlog_pressure: float = 0.0
+        self.sla_risk_score: float = 0.0
+        self.fairness_index: float = 1.0
+        self.resource_utilization: float = 0.0
+        self.digital_intake_ratio: float = 0.5
+        self.blocked_cases_missing_docs: int = 0
+        self.blocked_cases_enrichment: int = 0
+        self.field_verification_load: float = 0.0
+class SignalComputer:
+    def compute(
+        self,
+        queue_snapshots: Dict[str, QueueSnapshot],
+        officer_pool: OfficerPool,
+        todays_arrivals: int = 0,
+        digital_arrivals: int = 0,
+        capacity_per_day: float = 1.0,
+    ) -> ComputedSignals:
+        signals = ComputedSignals()
+        snapshots = list(queue_snapshots.values())
+        if not snapshots:
+            return signals
+        total_pending = sum(s.total_pending for s in snapshots)
+        # Backlog pressure
+        capacity_ceiling = max(1.0, capacity_per_day * 5.0)
+        signals.backlog_pressure = min(1.0, total_pending / capacity_ceiling)
+        # SLA risk score (weighted average)
+        total_nonzero = max(1, total_pending)
+        signals.sla_risk_score = min(1.0, max(0.0,
+            sum(s.current_sla_risk * s.total_pending for s in snapshots) / total_nonzero
+        ))
+        # Fairness index (1 - coefficient of variation of completion rates)
+        if len(snapshots) < 2:
+            signals.fairness_index = 1.0
+        else:
+            rates = []
+            for s in snapshots:
+                total = s.total_pending + s.total_completed_today
+                rates.append(s.total_completed_today / max(1, total) if total > 0 else 0.0)
+            mean = sum(rates) / len(rates)
+            if mean > 0:
+                variance = sum((r - mean) ** 2 for r in rates) / len(rates)
+                cv = (variance ** 0.5) / mean
+                signals.fairness_index = max(0.0, 1.0 - min(1.0, cv))
+            else:
+                signals.fairness_index = 1.0
+        # Resource utilization
+        allocated = sum(officer_pool.allocated.values())
+        signals.resource_utilization = min(1.0, allocated / max(1, officer_pool.available_officers))
+        # Digital intake ratio
+        signals.digital_intake_ratio = (
+            min(1.0, digital_arrivals / todays_arrivals) if todays_arrivals > 0 else 0.5
+        )
+        # Blocked cases
+        signals.blocked_cases_missing_docs = sum(s.blocked_missing_docs for s in snapshots)
+        signals.blocked_cases_enrichment   = sum(s.blocked_enrichment for s in snapshots)
+        # Field verification load
+        total_in_field = sum(s.field_verification_pending for s in snapshots)
+        signals.field_verification_load = total_in_field / total_nonzero if total_nonzero > 0 else 0.0
+        return signals

app/simulator.py ADDED Viewed

	@@ -0,0 +1,1106 @@

+from __future__ import annotations
+import json
+import os
+import random
+import re
+from dataclasses import dataclass
+from typing import Any, Literal
+from openai import OpenAI
+from app.baselines import POLICIES, backlog_clearance_policy
+from app.env import GovWorkflowEnv
+from app.graders import grade_episode
+from app.models import ActionModel, ActionType, ObservationModel, PriorityMode, ServiceType
+from app.engine import DayResult, DaySimulator
+from enum import Enum
+SimulationAgentMode = Literal["baseline_policy", "llm_inference", "trained_rl"]
+class SimulationAgentModeEnum(str, Enum):
+    baseline_policy = "baseline_policy"
+    llm_inference = "llm_inference"
+    trained_rl = "trained_rl"
+SimulationAgentMode = SimulationAgentModeEnum
+LEGACY_NVIDIA_MODEL_POOL = [
+    "meta/llama-3.3-70b-instruct",
+    "qwen/qwen3-next-80b-a3b-instruct",
+    "moonshotai/kimi-k2-instruct-0905",
+    "meta/llama-3.1-405b-instruct",
+    "deepseek-ai/deepseek-v3.2",
+    "qwen/qwq-32b",
+    "mistralai/mixtral-8x22b-instruct-v0.1",
+    "google/gemma-3-27b-it",
+    "microsoft/phi-4-mini-instruct",
+    "meta/llama-3.1-8b-instruct",
+]
+@dataclass
+class SimulationRun:
+    task_id: str
+    agent_mode: SimulationAgentMode
+    seed: int
+    total_reward: float
+    score: float
+    grader_name: str
+    summary: dict[str, Any]
+    trace: list[dict[str, Any]]
+def _dedupe(values: list[str | None]) -> list[str]:
+    out: list[str] = []
+    for value in values:
+        if value is None:
+            continue
+        v = value.strip()
+        if v and v not in out:
+            out.append(v)
+    return out
+def _env_csv_list(name: str) -> list[str]:
+    raw = os.getenv(name, "").strip()
+    if not raw:
+        return []
+    return [x.strip() for x in raw.split(",") if x.strip()]
+def _extract_json_object(text: str) -> dict[str, Any] | None:
+    text = (text or "").strip()
+    if not text:
+        return None
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        pass
+    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
+    if not match:
+        return None
+    try:
+        parsed = json.loads(match.group(0))
+    except json.JSONDecodeError:
+        return None
+    return parsed if isinstance(parsed, dict) else None
+def _coerce_action(payload: dict[str, Any] | None) -> ActionModel:
+    if not payload:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+    try:
+        # Remap legacy Phase 1 field names to Phase 2
+        remapped = dict(payload)
+        if "service" in remapped and "service_target" not in remapped:
+            remapped["service_target"] = remapped.pop("service")
+        if "target_service" in remapped:
+            src = remapped.pop("service_target", None)
+            tgt = remapped.pop("target_service", None)
+            delta = remapped.pop("officer_delta", 1)
+            remapped["reallocation_delta"] = {
+                (src.value if hasattr(src, 'value') else str(src)): -int(delta),
+                (tgt.value if hasattr(tgt, 'value') else str(tgt)): int(delta),
+            } if src and tgt else None
+        if "officer_delta" in remapped and "capacity_assignment" not in remapped:
+            svc = remapped.get("service_target")
+            if svc:
+                svc_key = svc.value if hasattr(svc, 'value') else str(svc)
+                remapped["capacity_assignment"] = {svc_key: int(remapped.pop("officer_delta"))}
+            else:
+                remapped.pop("officer_delta", None)
+        if "case_id" in remapped:
+            remapped.pop("case_id", None)
+        return ActionModel(**remapped)
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def _queue_rows(obs: ObservationModel) -> list[dict[str, Any]]:
+    return [
+        {
+            "service": q.service_type.value,
+            "active_cases": q.total_pending,
+            "missing_docs_cases": q.blocked_missing_docs,
+            "urgent_cases": q.urgent_pending,
+            "breached_cases": q.total_sla_breached,
+            "avg_age_days": q.avg_waiting_days,
+        }
+        for q in obs.queue_snapshots.values()
+    ]
+def _recommended_min_steps(task_id: str) -> int:
+    if task_id == "cross_department_hard":
+        return 70
+    if task_id == "mixed_urgency_medium":
+        return 60
+    return 40
+def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
+    pool = obs.officer_pool
+    # Phase 2 uses 'allocated'; Phase 1 used 'allocations'
+    alloc_dict = getattr(pool, "allocated", None) or getattr(pool, "allocations", {})
+    raw = alloc_dict.get(service)
+    if raw is None:
+        raw = alloc_dict.get(service.value if hasattr(service, 'value') else str(service), 0)
+    return int(raw or 0)
+def _top_backlog_service(
+    obs: ObservationModel,
+    *,
+    exclude: ServiceType | None = None,
+) -> ServiceType | None:
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    ranked = [q for q in snapshots if getattr(q, 'service_type', getattr(q, 'service', None)) != exclude]
+    if not ranked:
+        return None
+    ranked.sort(
+        key=lambda q: (
+            getattr(q, 'total_pending', getattr(q, 'active_cases', 0))
+            + 2 * getattr(q, 'total_sla_breached', getattr(q, 'breached_cases', 0))
+            + getattr(q, 'urgent_pending', getattr(q, 'urgent_cases', 0)),
+            getattr(q, 'avg_waiting_days', getattr(q, 'avg_age_days', 0)),
+        ),
+        reverse=True,
+    )
+    return getattr(ranked[0], 'service_type', getattr(ranked[0], 'service', None))
+def _service_with_missing_docs(obs: ObservationModel) -> ServiceType | None:
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    candidates = [
+        q for q in snapshots
+        if getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)) > 0
+    ]
+    if not candidates:
+        return None
+    candidates.sort(
+        key=lambda q: (
+            getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)),
+            getattr(q, 'total_pending', getattr(q, 'active_cases', 0)),
+        ),
+        reverse=True,
+    )
+    return getattr(candidates[0], 'service_type', getattr(candidates[0], 'service', None))
+def _service_with_officers(obs: ObservationModel) -> ServiceType | None:
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    services = [
+        getattr(q, 'service_type', getattr(q, 'service', None))
+        for q in snapshots
+    ]
+    services.sort(key=lambda s: _alloc_for(obs, s), reverse=True)
+    for service in services:
+        if service and _alloc_for(obs, service) > 0:
+            return service
+    return None
+def _compute_action_mask(obs: ObservationModel) -> dict[ActionType, bool]:
+    pool = obs.officer_pool
+    has_reserve = int(getattr(pool, 'idle_officers', getattr(pool, 'reserve_officers', 0))) > 0
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    has_missing = any(
+        getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)) > 0
+        for q in snapshots
+    )
+    has_backlog = any(
+        getattr(q, 'total_pending', getattr(q, 'active_cases', 0)) > 0
+        for q in snapshots
+    )
+    has_budget = int(obs.escalation_budget_remaining) > 0
+    staffed_services = [
+        getattr(q, 'service_type', getattr(q, 'service', None))
+        for q in snapshots
+        if _alloc_for(obs, getattr(q, 'service_type', getattr(q, 'service', None))) > 0
+    ]
+    can_reallocate = len(staffed_services) >= 1 and len(snapshots) >= 2
+    return {
+        ActionType.SET_PRIORITY_MODE: True,
+        ActionType.ADVANCE_TIME: True,
+        ActionType.ASSIGN_CAPACITY: has_reserve and has_backlog,
+        ActionType.REQUEST_MISSING_DOCUMENTS: has_missing,
+        ActionType.ESCALATE_SERVICE: has_budget and has_backlog,
+        ActionType.REALLOCATE_OFFICERS: can_reallocate,
+    }
+def _masked_action_type_hints(obs: ObservationModel) -> tuple[list[str], list[str]]:
+    mask = _compute_action_mask(obs)
+    allowed = [k.value for k, ok in mask.items() if ok]
+    blocked = [k.value for k, ok in mask.items() if not ok]
+    return allowed, blocked
+def _best_high_impact_action(obs: ObservationModel) -> tuple[ActionModel, str]:
+    top_backlog = _top_backlog_service(obs)
+    top_missing = _service_with_missing_docs(obs)
+    if int(obs.officer_pool.idle_officers) > 0 and top_backlog is not None:
+        return (
+            ActionModel(action_type=ActionType.ASSIGN_CAPACITY, service=top_backlog, officer_delta=1),
+            "high-impact: assign reserve capacity to top backlog service",
+        )
+    if top_missing is not None:
+        return (
+            ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service=top_missing),
+            "high-impact: clear missing-document bottleneck",
+        )
+    if int(obs.escalation_budget_remaining) > 0:
+        qs = obs.queue_snapshots
+        snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+        hot = sorted(
+            snapshots,
+            key=lambda q: (
+                getattr(q, 'total_sla_breached', getattr(q, 'breached_cases', 0)),
+                getattr(q, 'total_pending', getattr(q, 'active_cases', 0)),
+                getattr(q, 'urgent_pending', getattr(q, 'urgent_cases', 0)),
+            ),
+            reverse=True,
+        )
+        if hot and (
+            getattr(hot[0], 'total_sla_breached', getattr(hot[0], 'breached_cases', 0)) > 0
+            or getattr(hot[0], 'total_pending', getattr(hot[0], 'active_cases', 0)) > 0
+        ):
+            svc = getattr(hot[0], 'service_type', getattr(hot[0], 'service', None))
+            return (
+                ActionModel(action_type=ActionType.ESCALATE_SERVICE, escalation_target=svc),
+                "high-impact: escalate highest SLA-risk service",
+            )
+    source = _service_with_officers(obs)
+    if source is not None and _alloc_for(obs, source) > 0:
+        target = _top_backlog_service(obs, exclude=source)
+        if target is not None and target != source:
+            return (
+                ActionModel(
+                    action_type=ActionType.REALLOCATE_OFFICERS,
+                    service_target=source,
+                    reallocation_delta={source.value: -1, target.value: 1},
+                ),
+                "high-impact: reallocate one officer toward highest backlog",
+            )
+    return ActionModel(action_type=ActionType.ADVANCE_TIME), "fallback: no high-impact action available"
+def _repair_action_for_observation(
+    action: ActionModel,
+    obs: ObservationModel,
+) -> tuple[ActionModel, str | None]:
+    mask = _compute_action_mask(obs)
+    at = action.action_type
+    if not bool(mask.get(at, True)):
+        fallback, why = _best_high_impact_action(obs)
+        return fallback, f"masked {at.value}; {why}"
+    if at == ActionType.ADVANCE_TIME:
+        return action, None
+    if at == ActionType.SET_PRIORITY_MODE:
+        if action.priority_mode is None:
+            return (
+                ActionModel(action_type=ActionType.SET_PRIORITY_MODE, priority_mode=PriorityMode.BACKLOG_CLEARANCE),
+                "missing priority_mode, defaulted to backlog_clearance",
+            )
+        return action, None
+    if at == ActionType.ASSIGN_CAPACITY:
+        pool = obs.officer_pool
+        reserve = int(getattr(pool, 'idle_officers', getattr(pool, 'reserve_officers', 0)))
+        if reserve <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"reserve officers exhausted; {why}"
+        service = getattr(action, 'service_target', None) or getattr(action, 'service', None) or _top_backlog_service(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no service available for assign_capacity; {why}"
+        cap = action.capacity_assignment or {}
+        delta = cap.get(service.value, cap.get(str(service), 1))
+        delta = max(1, min(int(delta), reserve))
+        repaired = ActionModel(
+            action_type=ActionType.ASSIGN_CAPACITY,
+            service_target=service,
+            capacity_assignment={service.value: delta},
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired assign_capacity payload"
+        return repaired, note
+    if at == ActionType.REQUEST_MISSING_DOCUMENTS:
+        service = getattr(action, 'service_target', None) or getattr(action, 'service', None) or _service_with_missing_docs(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no missing-doc queue available; {why}"
+        repaired = ActionModel(
+            action_type=ActionType.REQUEST_MISSING_DOCUMENTS,
+            service_target=service,
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired request_missing_documents payload"
+        return repaired, note
+    if at == ActionType.ESCALATE_SERVICE:
+        if int(obs.escalation_budget_remaining) <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"escalation budget exhausted; {why}"
+        service = (
+            getattr(action, 'escalation_target', None)
+            or getattr(action, 'service_target', None)
+            or getattr(action, 'service', None)
+            or _top_backlog_service(obs)
+        )
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no escalation target available; {why}"
+        repaired = ActionModel(
+            action_type=ActionType.ESCALATE_SERVICE,
+            escalation_target=service,
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired escalate_service payload"
+        return repaired, note
+    if at == ActionType.REALLOCATE_OFFICERS:
+        source = (
+            getattr(action, 'service_target', None)
+            or getattr(action, 'service', None)
+            or _service_with_officers(obs)
+        )
+        if source is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no staffed source service; {why}"
+        source_alloc = _alloc_for(obs, source)
+        if source_alloc <= 0:
+            source = _service_with_officers(obs)
+            source_alloc = _alloc_for(obs, source) if source is not None else 0
+        if source is None or source_alloc <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"insufficient source officers; {why}"
+        # Phase 2: target comes from reallocation_delta; Phase 1 from target_service
+        rdelta = action.reallocation_delta or {}
+        target = None
+        for k, v in rdelta.items():
+            if v > 0:
+                try:
+                    target = ServiceType(k)
+                except Exception:
+                    pass
+                break
+        if target is None:
+            target = getattr(action, 'target_service', None)
+        if target is None or target == source:
+            target = _top_backlog_service(obs, exclude=source)
+        if target is None or target == source:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"missing distinct target_service; {why}"
+        delta = max(1, min(abs(rdelta.get(source.value, 1)), source_alloc))
+        repaired = ActionModel(
+            action_type=ActionType.REALLOCATE_OFFICERS,
+            service_target=source,
+            reallocation_delta={source.value: -delta, target.value: delta},
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired reallocate_officers payload"
+        return repaired, note
+    return action, None
+"""
+The high-level simulation orchestration now lives in app.engine.
+This module re-exports the public runtime API so existing imports
+from app.simulator continue to work unchanged.
+"""
+def _model_label_for_mode(agent_mode: SimulationAgentMode) -> str:
+    if agent_mode == "baseline_policy":
+        return "baseline_policy"
+    if agent_mode == "trained_rl":
+        return "trained_rl"
+    return os.getenv("MODEL_NAME", "llm_inference")
+def _log_step_line(step_row: dict[str, Any]) -> str:
+    done = "true" if bool(step_row.get("done")) else "false"
+    error = step_row.get("last_action_error") or "null"
+    action = json.dumps(step_row.get("action_payload", {}), separators=(",", ":"))
+    source = step_row.get("decision_source") or "unknown"
+    model = step_row.get("model_used") or "null"
+    repair = step_row.get("repair_note") or "null"
+    switch_note = step_row.get("switch_note") or "null"
+    return (
+        f"[STEP] step={step_row.get('step', 0)} action={action} "
+        f"reward={float(step_row.get('reward', 0.0)):.2f} done={done} "
+        f"error={error} source={source} model={model} repair={repair} switch={switch_note}"
+    )
+class LiveSimulationSession:
+    def __init__(
+        self,
+        *,
+        task_id: str,
+        agent_mode: SimulationAgentMode,
+        max_steps: int,
+        seed: int | None,
+        policy_name: str | None = None,
+        model_path: str | None = None,
+        model_type: Literal["maskable", "recurrent"] = "maskable",
+    ) -> None:
+        self.task_id = task_id
+        self.agent_mode = agent_mode
+        recommended = _recommended_min_steps(task_id)
+        if agent_mode == "llm_inference":
+            self.max_steps = max(int(max_steps), int(recommended))
+        else:
+            self.max_steps = int(max_steps)
+        self.seed = int(seed if seed is not None else random.randint(1, 999999))
+        self.policy_name = policy_name or "backlog_clearance"
+        self.model_path = model_path
+        self.model_type = model_type
+        self.trace: list[dict[str, Any]] = []
+        self.total_reward = 0.0
+        self.step_idx = 0
+        self.done = False
+        self.summary: dict[str, Any] | None = None
+        self.score: float | None = None
+        self.grader_name: str | None = None
+        self.env: GovWorkflowEnv | None = None
+        self.obs: ObservationModel | Any = None
+        self.policy = None
+        self.rl_env: Any = None
+        self.rl_model: Any = None
+        self.rl_lstm_state: Any = None
+        self.rl_episode_start: Any = None
+        self.llm_runtimes: list[dict[str, Any]] = []
+        self.llm_route: list[str] = []
+        self.llm_model_stats: dict[tuple[str, str], dict[str, Any]] = {}
+        self.consecutive_failure_steps = 0
+        self.recovery_steps_remaining = 0
+        self.auto_switch_count = 0
+        self.last_switch_reason: str | None = None
+        if self.agent_mode == "trained_rl":
+            self._init_trained()
+        else:
+            self._init_core()
+    def start_line(self) -> str:
+        return (
+            f"[START] task={self.task_id} env=gov-workflow-openenv "
+            f"model={_model_label_for_mode(self.agent_mode)}"
+        )
+    def _init_core(self) -> None:
+        self.env = GovWorkflowEnv(task_id=self.task_id)
+        self.obs, _ = self.env.reset(seed=self.seed)
+        if self.agent_mode == "baseline_policy":
+            self.policy = POLICIES.get(self.policy_name, backlog_clearance_policy)
+        else:
+            self.policy = self._llm_action_with_meta
+            self._init_llm_runtimes()
+    def _init_llm_runtimes(self) -> None:
+        openai_base = os.getenv("API_BASE_URL") or os.getenv("OPENAI_API_BASE_URL") or "https://api.openai.com/v1"
+        nvidia_base = os.getenv("NVIDIA_API_BASE_URL", "https://integrate.api.nvidia.com/v1")
+        openai_keys = _dedupe(
+            [
+                os.getenv("HF_TOKEN"),
+                os.getenv("OPENAI_API_KEY"),
+                os.getenv("API_KEY"),
+            ]
+        )
+        nvidia_keys = _dedupe(
+            [
+                os.getenv("NVIDIA_API_KEY"),
+                os.getenv("NVIDIA_API_KEY_2"),
+            ]
+        )
+        openai_models = _dedupe(
+            [
+                os.getenv("MODEL_NAME", "meta/llama-3.3-70b-instruct"),
+                *_env_csv_list("MODEL_FALLBACKS"),
+            ]
+        )
+        nvidia_models = _dedupe(
+            [
+                os.getenv("NVIDIA_MODEL"),
+                *_env_csv_list("NVIDIA_MODEL_FALLBACKS"),
+                *LEGACY_NVIDIA_MODEL_POOL,
+            ]
+        )
+        runtimes: list[dict[str, Any]] = []
+        if openai_keys and openai_models:
+            clients: list[tuple[OpenAI, str]] = []
+            for idx, key in enumerate(openai_keys, start=1):
+                try:
+                    clients.append((OpenAI(base_url=openai_base, api_key=key, timeout=8.0, max_retries=0), f"openai_key_{idx}"))
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "openai-compatible",
+                        "base_url": openai_base,
+                        "clients": clients,
+                        "models": openai_models,
+                    }
+                )
+        if nvidia_keys and nvidia_models:
+            clients = []
+            for idx, key in enumerate(nvidia_keys, start=1):
+                try:
+                    clients.append((OpenAI(base_url=nvidia_base, api_key=key, timeout=8.0, max_retries=0), f"nvidia_key_{idx}"))
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "nvidia",
+                        "base_url": nvidia_base,
+                        "clients": clients,
+                        "models": nvidia_models,
+                    }
+                )
+        self.llm_runtimes = runtimes
+        self.llm_model_stats = {}
+        for runtime in runtimes:
+            provider = str(runtime.get("provider"))
+            for model in runtime.get("models", []):
+                self.llm_model_stats[(provider, str(model))] = {
+                    "calls": 0,
+                    "invalid": 0,
+                    "repaired": 0,
+                    "failures": 0,
+                    "cooldown_until_step": 0,
+                }
+        openai_runtime = next((rt for rt in runtimes if rt.get("provider") == "openai-compatible"), None)
+        nvidia_runtime = next((rt for rt in runtimes if rt.get("provider") == "nvidia"), None)
+        if openai_runtime is not None:
+            openai_route = (
+                f"openai-compatible ({len(openai_runtime['clients'])} keys, "
+                f"{len(openai_runtime['models'])} models)"
+            )
+        else:
+            openai_route = "openai-compatible (unavailable: missing API key/model)"
+        if nvidia_runtime is not None:
+            nvidia_route = (
+                f"nvidia ({len(nvidia_runtime['clients'])} keys, "
+                f"{len(nvidia_runtime['models'])} models)"
+            )
+        else:
+            nvidia_route = "nvidia (unavailable: missing API key/model)"
+        self.llm_route = [
+            openai_route,
+            nvidia_route,
+            "adaptive ranking: prefer models with lower invalid/repaired rates",
+            "heuristic fallback (backlog_clearance_policy)",
+        ]
+    def _rank_runtime_models(self, provider: str, models: list[str]) -> list[str]:
+        def _score(model_name: str) -> tuple[float, int]:
+            stat = self.llm_model_stats.get((provider, model_name), {})
+            calls = max(1, int(stat.get("calls", 0)))
+            invalid_rate = float(stat.get("invalid", 0)) / calls
+            repaired_rate = float(stat.get("repaired", 0)) / calls
+            fail_rate = float(stat.get("failures", 0)) / calls
+            cooldown = int(stat.get("cooldown_until_step", 0))
+            cooldown_penalty = 1.0 if self.step_idx < cooldown else 0.0
+            return (invalid_rate * 2.0 + repaired_rate * 1.25 + fail_rate * 1.5 + cooldown_penalty, -calls)
+        return sorted([str(m) for m in models], key=_score)
+    def _llm_action_with_meta(self, obs: ObservationModel) -> tuple[ActionModel, dict[str, Any]]:
+        if self.recovery_steps_remaining > 0:
+            self.recovery_steps_remaining -= 1
+            action, why = _best_high_impact_action(obs)
+            return action, {
+                "decision_source": "auto_recovery_policy",
+                "provider": "heuristic",
+                "model_used": "backlog_clearance_policy",
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+                "repair_note": why,
+            }
+        attempts = 0
+        last_error = ""
+        allowed_actions, blocked_actions = _masked_action_type_hints(obs)
+        schema_hint = {
+            "required_fields": {
+                "set_priority_mode": ["action_type", "priority_mode"],
+                "assign_capacity": ["action_type", "service", "officer_delta"],
+                "request_missing_documents": ["action_type", "service"],
+                "escalate_service": ["action_type", "service"],
+                "advance_time": ["action_type"],
+                "reallocate_officers": ["action_type", "service", "target_service", "officer_delta"],
+            },
+            "allowed_priority_mode": [m.value for m in PriorityMode],
+            "allowed_services": [s.value for s in ServiceType],
+        }
+        system_prompt = (
+            "You are controlling a government workflow simulator. "
+            "Return exactly one JSON object only. No markdown. No explanation. "
+            "Allowed action_type: set_priority_mode, assign_capacity, request_missing_documents, "
+            "escalate_service, advance_time, reallocate_officers. "
+            "Rules: "
+            "1) reallocate_officers requires service + target_service + officer_delta>0 and source!=target. "
+            "2) assign_capacity requires service + officer_delta>0. "
+            "3) request_missing_documents requires service with missing_docs_cases>0. "
+            "4) set_priority_mode requires priority_mode in [urgent_first, oldest_first, balanced, backlog_clearance]. "
+            "5) Always prefer high-impact actions that reduce backlog/SLA risk over no-op loops. "
+            "Use lowercase enum values."
+        )
+        user_prompt = (
+            "Observation:\n"
+            f"{obs.model_dump_json()}\n"
+            f"Allowed action types now: {allowed_actions}\n"
+            f"Blocked action types now: {blocked_actions}\n"
+            f"Action schema hints: {json.dumps(schema_hint, separators=(',', ':'))}\n"
+            f"Last action validity: {obs.last_action_valid}\n"
+            f"Last action message: {obs.last_action_message}\n"
+            "Return action JSON."
+        )
+        for runtime in self.llm_runtimes:
+            provider = str(runtime["provider"])
+            ranked_models = self._rank_runtime_models(provider, list(runtime["models"]))
+            for client, key_label in runtime["clients"]:
+                for model in ranked_models:
+                    attempts += 1
+                    stat_key = (provider, model)
+                    try:
+                        out = client.chat.completions.create(
+                            model=model,
+                            messages=[
+                                {"role": "system", "content": system_prompt},
+                                {"role": "user", "content": user_prompt},
+                            ],
+                            temperature=0.0,
+                            max_tokens=200,
+                            stream=False,
+                        )
+                        content = (out.choices[0].message.content or "").strip()
+                        action = _coerce_action(_extract_json_object(content))
+                        if stat_key in self.llm_model_stats:
+                            self.llm_model_stats[stat_key]["calls"] += 1
+                        return action, {
+                            "decision_source": "llm",
+                            "provider": provider,
+                            "model_used": model,
+                            "llm_attempts": attempts,
+                            "llm_error": None,
+                            "llm_key_label": key_label,
+                        }
+                    except Exception as exc:
+                        last_error = str(exc)
+                        stat = self.llm_model_stats.get(stat_key)
+                        if stat is not None:
+                            stat["calls"] += 1
+                            stat["failures"] += 1
+                            if stat["failures"] >= 2:
+                                stat["cooldown_until_step"] = self.step_idx + 5
+                        continue
+        action, why = _best_high_impact_action(obs)
+        if not self.llm_runtimes:
+            last_error = "No LLM credentials configured."
+        return action, {
+            "decision_source": "heuristic_fallback",
+            "provider": "heuristic",
+            "model_used": "backlog_clearance_policy",
+            "llm_attempts": attempts,
+            "llm_error": last_error or None,
+            "llm_key_label": None,
+            "repair_note": why,
+        }
+    def _init_trained(self) -> None:
+        import numpy as np
+        from app.main import _load_model_cached_or_503, _resolve_model_path_or_422
+        from rl.gym_wrapper import GovWorkflowGymEnv
+        if not self.model_path:
+            raise ValueError("model_path is required for trained_rl simulation.")
+        model_abs = _resolve_model_path_or_422(self.model_path)
+        self.rl_model = _load_model_cached_or_503(model_abs, self.model_type)
+        self.rl_env = GovWorkflowGymEnv(task_id=self.task_id, seed=self.seed, hard_action_mask=True)
+        self.obs, _ = self.rl_env.reset(seed=self.seed)
+        self.rl_lstm_state = None
+        self.rl_episode_start = np.array([True], dtype=bool)
+    def step_once(self) -> tuple[dict[str, Any], str, bool]:
+        if self.done:
+            raise RuntimeError("Simulation already finished.")
+        self.step_idx += 1
+        if self.agent_mode == "trained_rl":
+            row = self._step_trained()
+        else:
+            row = self._step_core()
+        self.trace.append(row)
+        self.total_reward += float(row["reward"])
+        step_log = _log_step_line(row)
+        if row["done"] or self.step_idx >= self.max_steps:
+            self._finalize()
+            row["done"] = True
+            return row, step_log, True
+        return row, step_log, False
+    def end_line(self) -> str:
+        if self.score is None:
+            return "[END] success=false steps=0 score=0.00 rewards="
+        rewards = ",".join(f"{float(x.get('reward', 0.0)):.2f}" for x in self.trace)
+        success = "true" if self.score >= 0.5 else "false"
+        return (
+            f"[END] success={success} steps={len(self.trace)} "
+            f"score={self.score:.2f} rewards={rewards}"
+        )
+    def snapshot(self) -> dict[str, Any]:
+        return {
+            "task_id": self.task_id,
+            "agent_mode": self.agent_mode,
+            "seed": self.seed,
+            "max_steps": self.max_steps,
+            "step_idx": self.step_idx,
+            "done": self.done,
+            "total_reward": float(self.total_reward),
+            "score": self.score,
+            "grader_name": self.grader_name,
+            "summary": self.summary,
+            "trace_len": len(self.trace),
+            "llm_route": list(self.llm_route),
+        }
+    def close(self) -> None:
+        try:
+            if self.env is not None and hasattr(self.env, "close"):
+                self.env.close()
+        except Exception:
+            pass
+        try:
+            if self.rl_env is not None and hasattr(self.rl_env, "close"):
+                self.rl_env.close()
+        except Exception:
+            pass
+    def _step_core(self) -> dict[str, Any]:
+        if self.env is None:
+            raise RuntimeError("Core simulation env not initialized.")
+        if self.agent_mode == "baseline_policy":
+            action = self.policy(self.obs)
+            meta = {
+                "decision_source": "baseline_policy",
+                "provider": "local_policy",
+                "model_used": self.policy_name,
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+            }
+        else:
+            raw_decision = self.policy(self.obs)
+            if isinstance(raw_decision, tuple) and len(raw_decision) == 2:
+                action, meta = raw_decision
+            else:
+                action, meta = raw_decision, {}
+            if not isinstance(meta, dict):
+                meta = {}
+            if not isinstance(action, ActionModel):
+                if isinstance(action, dict):
+                    action = _coerce_action(action)
+                else:
+                    action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+                    meta["repair_note"] = "non-action output from llm policy, coerced to advance_time"
+            allowed_mask = _compute_action_mask(self.obs)
+            if not bool(allowed_mask.get(action.action_type, True)):
+                masked_fallback, why = _best_high_impact_action(self.obs)
+                action = masked_fallback
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = f"action masked at runtime; {why}"
+            repaired_action, repair_note = _repair_action_for_observation(action, self.obs)
+            if repair_note:
+                action = repaired_action
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = repair_note
+        self.obs, reward, terminated, truncated, info = self.env.step(action)
+        done = bool(terminated or truncated)
+        # Read observation fields safely for both Phase 1 and Phase 2 model shapes
+        fairness_gap = float(
+            getattr(self.obs, 'fairness_gap',
+                    1.0 - getattr(self.obs, 'fairness_index', 1.0))
+        )
+        row = {
+            "step": self.step_idx,
+            "day": self.obs.day,
+            "action_type": action.action_type.value,
+            "action_payload": action.model_dump(exclude_none=True, mode="json"),
+            "reward": float(reward),
+            "done": done,
+            "backlog": self.obs.total_backlog,
+            "completed": self.obs.total_completed,
+            "sla_breaches": self.obs.total_sla_breaches,
+            "fairness_gap": fairness_gap,
+            "escalation_budget_remaining": self.obs.escalation_budget_remaining,
+            "invalid_action": bool(getattr(info, 'invalid_action', False)),
+            "last_action_error": getattr(info, 'last_action_error', None),
+            "queue_rows": _queue_rows(self.obs),
+        }
+        row.update(meta)
+        if self.agent_mode == "llm_inference":
+            is_repaired = row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+            is_invalid = bool(row.get("invalid_action")) or bool(row.get("last_action_error"))
+            model_used = str(row.get("model_used") or "")
+            provider = str(row.get("provider") or "")
+            stat_key = (provider, model_used)
+            stat = self.llm_model_stats.get(stat_key)
+            if stat is not None:
+                if is_repaired:
+                    stat["repaired"] += 1
+                if is_invalid:
+                    stat["invalid"] += 1
+                    stat["failures"] += 1
+                else:
+                    stat["failures"] = max(0, int(stat.get("failures", 0)) - 1)
+            is_failure_pattern = is_invalid or is_repaired
+            if is_failure_pattern:
+                self.consecutive_failure_steps += 1
+            else:
+                self.consecutive_failure_steps = 0
+            if self.consecutive_failure_steps >= 4:
+                if stat is not None:
+                    stat["cooldown_until_step"] = self.step_idx + 6
+                self.recovery_steps_remaining = max(self.recovery_steps_remaining, 3)
+                self.auto_switch_count += 1
+                self.last_switch_reason = "repeated invalid/repaired pattern detected"
+                row["switch_note"] = "auto-switched to recovery policy and deprioritized failing model"
+                self.consecutive_failure_steps = 0
+        return row
+    def _step_trained(self) -> dict[str, Any]:
+        import numpy as np
+        masks = self.rl_env.action_masks()
+        if self.model_type == "recurrent":
+            action, self.rl_lstm_state = self.rl_model.predict(
+                self.obs,
+                state=self.rl_lstm_state,
+                episode_start=self.rl_episode_start,
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+            if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
+                valid = np.flatnonzero(masks)
+                action_idx = int(valid[0]) if valid.size > 0 else 18
+        else:
+            from sb3_contrib.common.maskable.utils import get_action_masks
+            action, _ = self.rl_model.predict(
+                self.obs,
+                action_masks=get_action_masks(self.rl_env),
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+        self.obs, reward, terminated, truncated, info = self.rl_env.step(action_idx)
+        done = bool(terminated or truncated)
+        if self.model_type == "recurrent":
+            self.rl_episode_start = np.array([done], dtype=bool)
+        core_obs = self.rl_env._core_env._build_observation()
+        action_model, action_label = _decode_action_idx(action_idx)
+        return {
+            "step": self.step_idx,
+            "day": core_obs.day,
+            "action_type": action_label,
+            "action_payload": action_model.model_dump(exclude_none=True, mode="json"),
+            "action_index": action_idx,
+            "reward": float(reward),
+            "done": done,
+            "backlog": core_obs.total_backlog,
+            "completed": core_obs.total_completed,
+            "sla_breaches": core_obs.total_sla_breaches,
+            "fairness_gap": float(core_obs.fairness_gap),
+            "escalation_budget_remaining": core_obs.escalation_budget_remaining,
+            "invalid_action": bool(info.get("invalid_action", False)),
+            "last_action_error": info.get("last_action_error"),
+            "queue_rows": _queue_rows(core_obs),
+            "decision_source": "trained_rl",
+            "provider": "rl",
+            "model_used": self.model_path or "trained_rl",
+            "llm_attempts": 0,
+            "llm_error": None,
+            "llm_key_label": None,
+        }
+    def _finalize(self) -> None:
+        if self.done:
+            return
+        self.done = True
+        if self.agent_mode == "trained_rl":
+            final_state = self.rl_env._core_env.state()
+        else:
+            final_state = self.env.state()
+        gr = grade_episode(final_state)
+        self.score = float(gr.score)
+        self.grader_name = gr.grader_name
+        llm_steps = sum(
+            1 for row in self.trace if row.get("decision_source") in {"llm", "llm_repaired"}
+        )
+        fallback_steps = sum(
+            1
+            for row in self.trace
+            if row.get("decision_source") in {"heuristic_fallback", "auto_recovery_policy"}
+        )
+        repaired_steps = sum(
+            1
+            for row in self.trace
+            if row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+        )
+        total_steps = max(1, len(self.trace))
+        invalid_actions = int(final_state.metrics.total_invalid_actions)
+        invalid_rate = float(invalid_actions) / float(total_steps)
+        repaired_rate = float(repaired_steps) / float(total_steps)
+        ranked_models: list[dict[str, Any]] = []
+        if self.llm_model_stats:
+            for (provider, model), stat in self.llm_model_stats.items():
+                calls = int(stat.get("calls", 0))
+                if calls <= 0:
+                    continue
+                ranked_models.append(
+                    {
+                        "provider": provider,
+                        "model": model,
+                        "calls": calls,
+                        "invalid_rate": float(stat.get("invalid", 0)) / max(1, calls),
+                        "repaired_rate": float(stat.get("repaired", 0)) / max(1, calls),
+                    }
+                )
+            ranked_models.sort(key=lambda x: (x["invalid_rate"], x["repaired_rate"], -x["calls"]))
+        self.summary = {
+            "total_steps": final_state.total_steps,
+            "total_completed": final_state.total_completed,
+            "total_backlog": final_state.total_backlog,
+            "total_sla_breaches": final_state.total_sla_breaches,
+            "fairness_gap": float(final_state.fairness_gap),
+            "total_invalid_actions": final_state.metrics.total_invalid_actions,
+            "invalid_action_rate": invalid_rate,
+            "llm_steps": llm_steps,
+            "heuristic_fallback_steps": fallback_steps,
+            "llm_repaired_steps": repaired_steps,
+            "repaired_action_rate": repaired_rate,
+            "auto_switch_count": self.auto_switch_count,
+            "last_switch_reason": self.last_switch_reason,
+            "effective_max_steps": self.max_steps,
+            "recommended_min_steps": _recommended_min_steps(self.task_id),
+        }
+        if self.agent_mode == "llm_inference":
+            self.summary["llm_route"] = list(self.llm_route)
+            self.summary["llm_model_performance"] = ranked_models
+        if self.agent_mode == "trained_rl":
+            self.summary["model_path"] = self.model_path
+            self.summary["model_type"] = self.model_type
+def run_simulation(
+    *,
+    task_id: str,
+    agent_mode: SimulationAgentMode,
+    max_steps: int,
+    seed: int | None,
+    policy_name: str | None = None,
+    model_path: str | None = None,
+    model_type: Literal["maskable", "recurrent"] = "maskable",
+) -> SimulationRun:
+    session = LiveSimulationSession(
+        task_id=task_id,
+        agent_mode=agent_mode,
+        max_steps=max_steps,
+        seed=seed,
+        policy_name=policy_name,
+        model_path=model_path,
+        model_type=model_type,
+    )
+    try:
+        while not session.done:
+            session.step_once()
+        return SimulationRun(
+            task_id=session.task_id,
+            agent_mode=session.agent_mode,
+            seed=session.seed,
+            total_reward=float(session.total_reward),
+            score=float(session.score or 0.0),
+            grader_name=str(session.grader_name or "unknown"),
+            summary=dict(session.summary or {}),
+            trace=list(session.trace),
+        )
+    finally:
+        session.close()
+def _decode_action_idx(action_idx: int) -> tuple[ActionModel, str]:
+    try:
+        from rl.feature_builder import ACTION_DECODE_TABLE
+        from app.models import PriorityMode, ServiceType
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    row = ACTION_DECODE_TABLE.get(int(action_idx))
+    if row is None:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+from app.engine import (
+    DayResult,
+    DaySimulator,
+    LiveSimulationSession,
+    SimulationAgentMode,
+    SimulationRun,
+    run_simulation,
+)
+__all__ = [
+    "DayResult",
+    "DaySimulator",
+    "SimulationAgentMode",
+    "SimulationRun",
+    "LiveSimulationSession",
+    "run_simulation",
+]

app/state_machine.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""
+state_machine.py — Gov Workflow OpenEnv
+Deterministic workflow transition engine aligned with Phase 1 schemas.
+"""
+from __future__ import annotations
+from app.models import ApplicationCase, InternalSubstate, StageType
+INTERNAL_TO_PUBLIC_STAGE: dict[InternalSubstate, StageType] = {
+    InternalSubstate.PRE_SCRUTINY: StageType.SUBMISSION,
+    InternalSubstate.DOC_VALIDATION: StageType.DOCUMENT_VERIFICATION,
+    InternalSubstate.SERVICE_SPECIFIC_VALIDATION: StageType.DOCUMENT_VERIFICATION,
+    InternalSubstate.FIELD_VERIFICATION_PENDING: StageType.FIELD_VERIFICATION,
+    InternalSubstate.DECISION_PENDING: StageType.APPROVAL,
+    InternalSubstate.ISSUANCE_READY: StageType.ISSUANCE,
+    InternalSubstate.BLOCKED_MISSING_DOCS: StageType.DOCUMENT_VERIFICATION,
+    InternalSubstate.COMPLETED: StageType.ISSUANCE,
+    InternalSubstate.REJECTED: StageType.APPROVAL,
+}
+def build_public_stage(substate: InternalSubstate) -> StageType:
+    return INTERNAL_TO_PUBLIC_STAGE.get(substate, StageType.SUBMISSION)
+def transition_case(case: ApplicationCase, new_substate: InternalSubstate) -> None:
+    case.internal_substate = new_substate
+    case.public_stage = build_public_stage(new_substate)
+    case.days_in_current_stage = 0
+def can_advance(case: ApplicationCase) -> bool:
+    if case.completed or case.rejected:
+        return False
+    if case.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS:
+        return False
+    return True
+def advance_case(case: ApplicationCase, rng: object = None) -> tuple[bool, bool]:
+    """
+    Returns (progressed, completed).
+    """
+    if not can_advance(case):
+        return False, False
+    early_stages = {
+        InternalSubstate.PRE_SCRUTINY,
+        InternalSubstate.DOC_VALIDATION,
+    }
+    if case.has_missing_docs and case.internal_substate in early_stages:
+        transition_case(case, InternalSubstate.BLOCKED_MISSING_DOCS)
+        return True, False
+    current = case.internal_substate
+    if current == InternalSubstate.PRE_SCRUTINY:
+        transition_case(case, InternalSubstate.DOC_VALIDATION)
+        return True, False
+    if current == InternalSubstate.DOC_VALIDATION:
+        if case.field_verification_required:
+            transition_case(case, InternalSubstate.FIELD_VERIFICATION_PENDING)
+        else:
+            transition_case(case, InternalSubstate.DECISION_PENDING)
+        return True, False
+    if current == InternalSubstate.SERVICE_SPECIFIC_VALIDATION:
+        if case.field_verification_required:
+            transition_case(case, InternalSubstate.FIELD_VERIFICATION_PENDING)
+        else:
+            transition_case(case, InternalSubstate.DECISION_PENDING)
+        return True, False
+    if current == InternalSubstate.FIELD_VERIFICATION_PENDING:
+        return False, False
+    if current == InternalSubstate.DECISION_PENDING:
+        transition_case(case, InternalSubstate.ISSUANCE_READY)
+        return True, False
+    if current == InternalSubstate.ISSUANCE_READY:
+        transition_case(case, InternalSubstate.COMPLETED)
+        case.completed = True
+        return True, True
+    return False, False
+def unblock_missing_docs(case: ApplicationCase) -> bool:
+    if case.internal_substate != InternalSubstate.BLOCKED_MISSING_DOCS:
+        return False
+    case.has_missing_docs = False
+    case.doc_resolution_day = None
+    transition_case(case, InternalSubstate.DOC_VALIDATION)
+    return True
+def complete_field_verification(case: ApplicationCase) -> bool:
+    if case.internal_substate != InternalSubstate.FIELD_VERIFICATION_PENDING:
+        return False
+    case.field_verification_completion_day = None
+    transition_case(case, InternalSubstate.DECISION_PENDING)
+    return True

app/story_router.py ADDED Viewed

	@@ -0,0 +1,407 @@

+"""
+app/story_router.py
+FastAPI router that serves LLM training story data.
+All 7 endpoints are READ-ONLY - they serve pre-saved JSON files.
+No frontend elements are invoked from backend.
+No training runs happen here - only data serving.
+Mount in main.py with:
+  from app.story_router import router as story_router
+  app.include_router(story_router)
+"""
+from __future__ import annotations
+import asyncio
+import json
+from pathlib import Path
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+router = APIRouter(prefix="/training", tags=["Training Story"])
+# --- Data directory --------------------------------------------------
+DATA_DIR = Path("data/training_logs")
+HEURISTIC_BASELINES: dict[str, dict] = {
+    "district_backlog_easy": {
+        "score": 0.527, "completed": 41,
+        "breaches": 184, "reward": -79.86, "avg_wait": 6.9,
+    },
+    "mixed_urgency_medium": {
+        "score": 0.454, "completed": 58,
+        "breaches": 34,  "reward": -684.22, "avg_wait": 12.4,
+    },
+    "cross_department_hard": {
+        "score": 0.606, "completed": 83,
+        "breaches": 723, "reward": -2318.78, "avg_wait": 15.6,
+    },
+}
+# --- Internal helpers ------------------------------------------------
+def _load_log(task_id: str) -> dict:
+    """Load JSON training log for given task. Raises 404 if missing."""
+    path = DATA_DIR / f"{task_id}_training_log.json"
+    if not path.exists():
+        raise HTTPException(
+            status_code=404,
+            detail=(
+                f"Training log not found for task '{task_id}'. "
+                f"Run: python scripts/convert_grpo_csv.py "
+                f"--csv <your_csv> --task {task_id}"
+            ),
+        )
+    with open(path, encoding="utf-8") as f:
+        return json.load(f)
+def _dominant_action(episodes: list[dict]) -> str:
+    """Returns the action name with the highest total weight across episodes."""
+    totals: dict[str, float] = {}
+    for ep in episodes:
+        for action, val in ep.get("actions", {}).items():
+            totals[action] = totals.get(action, 0.0) + float(val)
+    return max(totals, key=totals.get) if totals else "advance_time"
+def _phase_message(ep: dict) -> str:
+    """Returns a human-readable learning message for one episode."""
+    phase = ep.get("phase", "random")
+    reward = ep.get("total_reward", 0)
+    score = ep.get("score", 0)
+    fn1 = ep.get("fn1_valid", 1.0)
+    fn2 = ep.get("fn2_no_halluc", 1.0)
+    episode = ep.get("episode", 0)
+    validity_note = "" if fn1 >= 1.0 else f" WARNING: Invalid action at step {episode}."
+    halluc_note = "" if fn2 >= 1.0 else " WARNING: Hallucination detected."
+    messages = {
+        "random": (
+            f"Step {episode}: LLM is exploring. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+        "exploring": (
+            f"Step {episode}: LLM finding patterns. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+        "learning": (
+            f"Step {episode}: LLM reinforcing good actions. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+        "converged": (
+            f"Step {episode}: LLM converged. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+    }
+    return messages.get(phase, f"Step {episode}: reward={reward:.3f}")
+# ================================================================
+# ENDPOINT 1 - GET /training/tasks
+# ================================================================
+@router.get("/tasks")
+async def list_trained_tasks() -> dict:
+    """
+    Returns all tasks that have a saved training log JSON file.
+    Frontend calls this first to populate task selector.
+    """
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    available = []
+    for path in sorted(DATA_DIR.glob("*_training_log.json")):
+        task_id = path.stem.replace("_training_log", "")
+        try:
+            log = _load_log(task_id)
+            available.append({
+                "task_id":            task_id,
+                "total_episodes":     log["total_episodes"],
+                "final_score":        log["summary"]["last_episode_score"],
+                "reward_improvement": log["summary"]["reward_improvement_pct"],
+                "base_model":         log.get("base_model", ""),
+                "training_method":    log.get("training_method", "GRPO"),
+            })
+        except HTTPException:
+            pass
+    return {"tasks": available}
+# ================================================================
+# ENDPOINT 2 - GET /training/summary/{task_id}
+# ================================================================
+@router.get("/summary/{task_id}")
+async def training_summary(task_id: str) -> dict:
+    """Returns overview stats + narrative for the ACT 2 header card."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    n = len(eps)
+    q1, q2, q3 = n // 4, n // 2, 3 * n // 4
+    p1_dom = _dominant_action(eps[:q1])
+    p2_dom = _dominant_action(eps[q1:q2])
+    p3_dom = _dominant_action(eps[q2:q3])
+    p4_dom = _dominant_action(eps[q3:])
+    avg_p1_r = sum(e["total_reward"] for e in eps[:q1]) / max(q1, 1)
+    avg_p4_r = sum(e["total_reward"] for e in eps[q3:]) / max(n - q3, 1)
+    return {
+        "task_id":          log["task_id"],
+        "base_model":       log.get("base_model", ""),
+        "training_method":  log.get("training_method", "GRPO"),
+        "lora_rank":        log.get("lora_rank", 16),
+        "total_episodes":   n,
+        "reward_functions": log.get("reward_functions", {}),
+        "summary":          log["summary"],
+        "narrative": {
+            "phase_1": (
+                f"Steps 1-{q1}: LLM chose '{p1_dom}' most often. "
+                f"Avg reward {avg_p1_r:.2f}. Still exploring randomly."
+            ),
+            "phase_2": (
+                f"Steps {q1}-{q2}: LLM discovered '{p2_dom}'. "
+                "Reward started improving as valid patterns emerged."
+            ),
+            "phase_3": (
+                f"Steps {q2}-{q3}: LLM reinforced '{p3_dom}'. "
+                "Action validity reaching near-perfect levels."
+            ),
+            "phase_4": (
+                f"Steps {q3}-{n}: LLM converged on '{p4_dom}'. "
+                f"Avg reward {avg_p4_r:.2f}. "
+                f"Final score {log['summary']['last_episode_score']:.1%}."
+            ),
+        },
+    }
+# ================================================================
+# ENDPOINT 3 - GET /training/curve/{task_id}
+# ================================================================
+@router.get("/curve/{task_id}")
+async def training_curve(
+    task_id: str,
+    downsample: int = 1,
+) -> dict:
+    """
+    Returns episode-by-episode reward + score for chart rendering.
+    downsample=5 -> returns every 5th step.
+    """
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    sampled = eps[::max(1, downsample)]
+    return {
+        "task_id":      task_id,
+        "total_points": len(sampled),
+        "curve": [
+            {
+                "episode":       e["episode"],
+                "reward":        e["total_reward"],
+                "score":         e["score"],
+                "fn1_valid":     e.get("fn1_valid", 1.0),
+                "fn2_no_halluc": e.get("fn2_no_halluc", 1.0),
+                "fn3_env_score": e.get("fn3_env_score", 0.0),
+                "phase":         e["phase"],
+            }
+            for e in sampled
+        ],
+    }
+# ================================================================
+# ENDPOINT 4 - GET /training/actions/{task_id}
+# ================================================================
+@router.get("/actions/{task_id}")
+async def action_evolution(task_id: str) -> dict:
+    """Returns action distribution at 5 checkpoints across training."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    n = len(eps)
+    idxs = [0, n // 4, n // 2, 3 * n // 4, n - 1]
+    result = []
+    for idx in idxs:
+        ep = eps[idx]
+        result.append({
+            "episode": ep["episode"],
+            "phase":   ep["phase"],
+            "actions": ep.get("actions", {}),
+            "reward":  ep["total_reward"],
+            "score":   ep["score"],
+        })
+    avg_fn1_start = sum(e.get("fn1_valid", 1.0) for e in eps[:n // 4]) / max(n // 4, 1)
+    avg_fn1_end = sum(e.get("fn1_valid", 1.0) for e in eps[3 * n // 4:]) / max(n - 3 * n // 4, 1)
+    insight = (
+        f"Action validity improved from {avg_fn1_start:.1%} (early) "
+        f"to {avg_fn1_end:.1%} (final). "
+        "LLM learned to output valid government workflow JSON consistently."
+    )
+    return {
+        "task_id":     task_id,
+        "checkpoints": result,
+        "insight":     insight,
+    }
+# ================================================================
+# ENDPOINT 5 - GET /training/episode/{task_id}/{episode_num}
+# ================================================================
+@router.get("/episode/{task_id}/{episode_num}")
+async def episode_detail(task_id: str, episode_num: int) -> dict:
+    """Returns detail for one specific training step."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    if episode_num < 1 or episode_num > len(eps):
+        raise HTTPException(
+            status_code=400,
+            detail=f"episode_num must be 1-{len(eps)}. Got {episode_num}.",
+        )
+    ep = eps[episode_num - 1]
+    rewards_so_far = [e["total_reward"] for e in eps[:episode_num]]
+    scores_so_far = [e["score"] for e in eps[:episode_num]]
+    return {
+        "task_id":             task_id,
+        "episode":             ep["episode"],
+        "total_episodes":      len(eps),
+        "reward":              ep["total_reward"],
+        "score":               ep["score"],
+        "fn1_valid":           ep.get("fn1_valid", 1.0),
+        "fn2_no_halluc":       ep.get("fn2_no_halluc", 1.0),
+        "fn3_env_score":       ep.get("fn3_env_score", 0.0),
+        "phase":               ep["phase"],
+        "actions":             ep.get("actions", {}),
+        "running_best_reward": max(rewards_so_far),
+        "running_avg_score":   round(sum(scores_so_far) / len(scores_so_far), 4),
+        "message":             _phase_message(ep),
+    }
+# ================================================================
+# ENDPOINT 6 - GET /training/stream/{task_id} [SSE]
+# ================================================================
+@router.get("/stream/{task_id}")
+async def stream_training_replay(
+    task_id: str,
+    delay_ms: int = 100,
+    start_episode: int = 1,
+    end_episode: Optional[int] = None,
+) -> StreamingResponse:
+    """Server-Sent Events endpoint for animated chart replay."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    end = min(end_episode or len(eps), len(eps))
+    subset = eps[start_episode - 1: end]
+    async def generate():
+        meta_event = json.dumps({
+            "type":             "meta",
+            "task_id":          task_id,
+            "total_episodes":   len(eps),
+            "summary":          log["summary"],
+            "reward_functions": log.get("reward_functions", {}),
+        })
+        yield f"data: {meta_event}\n\n"
+        rewards_so_far: list[float] = []
+        scores_so_far: list[float] = []
+        for ep in subset:
+            rewards_so_far.append(ep["total_reward"])
+            scores_so_far.append(ep["score"])
+            event = json.dumps({
+                "type":              "episode",
+                "episode":           ep["episode"],
+                "total_episodes":    len(eps),
+                "reward":            ep["total_reward"],
+                "score":             ep["score"],
+                "fn1_valid":         ep.get("fn1_valid",     1.0),
+                "fn2_no_halluc":     ep.get("fn2_no_halluc", 1.0),
+                "fn3_env_score":     ep.get("fn3_env_score", 0.0),
+                "phase":             ep["phase"],
+                "actions":           ep.get("actions", {}),
+                "running_best":      max(rewards_so_far),
+                "running_avg_score": round(
+                    sum(scores_so_far) / len(scores_so_far), 4
+                ),
+                "message":           _phase_message(ep),
+            })
+            yield f"data: {event}\n\n"
+            await asyncio.sleep(delay_ms / 1000.0)
+        done_event = json.dumps({
+            "type":        "done",
+            "final_score": scores_so_far[-1] if scores_so_far else 0.0,
+            "best_reward": max(rewards_so_far) if rewards_so_far else 0.0,
+            "total_steps": len(subset),
+        })
+        yield f"data: {done_event}\n\n"
+    return StreamingResponse(
+        generate(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+            "Connection": "keep-alive",
+        },
+    )
+# ================================================================
+# ENDPOINT 7 - GET /training/comparison/{task_id}
+# ================================================================
+@router.get("/comparison/{task_id}")
+async def before_after_comparison(task_id: str) -> dict:
+    """Returns before (heuristic) vs after (trained LLM)."""
+    log = _load_log(task_id)
+    baseline = HEURISTIC_BASELINES.get(task_id, {})
+    summary = log["summary"]
+    bef_score = baseline.get("score", 0.0)
+    after_score = summary["last_episode_score"]
+    delta = round(after_score - bef_score, 4)
+    pct = round((delta / bef_score) * 100, 1) if bef_score else 0.0
+    return {
+        "task_id": task_id,
+        "before": {
+            "label":     "Heuristic Baseline (no AI)",
+            "score":     bef_score,
+            "reward":    baseline.get("reward",    0.0),
+            "completed": baseline.get("completed", 0),
+            "breaches":  baseline.get("breaches",  0),
+            "avg_wait":  baseline.get("avg_wait",  0.0),
+        },
+        "after": {
+            "label":               f"GRPO Trained LLM ({log.get('base_model','')})",
+            "score":               after_score,
+            "reward":              summary["last_episode_reward"],
+            "avg_fn1_valid":       summary.get("avg_fn1_valid",     0.0),
+            "avg_fn2_no_halluc":   summary.get("avg_fn2_no_halluc", 0.0),
+            "invalid_steps":       summary.get("invalid_action_steps", 0),
+            "hallucination_steps": summary.get("hallucination_steps",  0),
+        },
+        "improvement": {
+            "score_delta": delta,
+            "score_pct":   pct,
+            "verdict": (
+                "LLM significantly outperforms baseline"
+                if delta > 0.10 else
+                "LLM moderately outperforms baseline"
+                if delta > 0.0 else
+                "LLM needs more training"
+            ),
+        },
+    }

app/tasks.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""
+tasks.py — Gov Workflow OpenEnv v2.0
+Three deterministic benchmark tasks: easy, medium, hard.
+"""
+from app.models import (
+    TaskConfig, ServiceType, ScenarioMode, EventType, OfficerPool
+)
+TASK_EASY = TaskConfig(
+    task_id="district_backlog_easy",
+    display_name="District Backlog Clearance — Revenue Office",
+    difficulty="easy",
+    scenario_mode=ScenarioMode.NORMAL,
+    seed=42,
+    max_days=30,
+    enabled_services=[ServiceType.INCOME_CERTIFICATE],
+    arrival_rate_per_day={ServiceType.INCOME_CERTIFICATE: 12.0},
+    digital_intake_ratio=0.65,
+    initial_officer_pool=OfficerPool(
+        total_officers=8, available_officers=8,
+        allocated={ServiceType.INCOME_CERTIFICATE: 8},
+    ),
+    missing_docs_probability_override={ServiceType.INCOME_CERTIFICATE: 0.20},
+    field_verification_probability_override={ServiceType.INCOME_CERTIFICATE: 0.15},
+    escalation_budget=5,
+    fairness_threshold=None,
+    event_probability=0.05,
+    allowed_events=[EventType.NO_EVENT],
+)
+TASK_MEDIUM = TaskConfig(
+    task_id="mixed_urgency_medium",
+    display_name="Mixed Urgency Backlog — Taluka Office",
+    difficulty="medium",
+    scenario_mode=ScenarioMode.NORMAL,
+    seed=123,
+    max_days=45,
+    enabled_services=[
+        ServiceType.INCOME_CERTIFICATE,
+        ServiceType.LAND_REGISTRATION,
+        ServiceType.PASSPORT,
+        ServiceType.DRIVING_LICENSE,
+        ServiceType.AADHAAR_CARD,
+    ],
+    arrival_rate_per_day={
+        ServiceType.INCOME_CERTIFICATE: 8.0,
+        ServiceType.LAND_REGISTRATION:  4.0,
+        ServiceType.PASSPORT:           4.0,
+        ServiceType.DRIVING_LICENSE:    5.0,
+        ServiceType.AADHAAR_CARD:       6.0,
+    },
+    digital_intake_ratio=0.72,
+    initial_officer_pool=OfficerPool(
+        total_officers=14, available_officers=14,
+        allocated={
+            ServiceType.INCOME_CERTIFICATE: 4,
+            ServiceType.LAND_REGISTRATION:  2,
+            ServiceType.PASSPORT:           2,
+            ServiceType.DRIVING_LICENSE:    3,
+            ServiceType.AADHAAR_CARD:       3,
+        },
+    ),
+    missing_docs_probability_override=None,
+    field_verification_probability_override=None,
+    escalation_budget=8,
+    fairness_threshold=None,
+    event_probability=0.15,
+    allowed_events=[EventType.DOCUMENT_REJECTION_SPIKE],
+)
+TASK_HARD = TaskConfig(
+    task_id="cross_department_hard",
+    display_name="Cross-Department Crisis — District Collectorate",
+    difficulty="hard",
+    scenario_mode=ScenarioMode.CRISIS,
+    seed=999,
+    max_days=60,
+    enabled_services=[
+        ServiceType.INCOME_CERTIFICATE,
+        ServiceType.LAND_REGISTRATION,
+        ServiceType.PASSPORT,
+        ServiceType.DRIVING_LICENSE,
+        ServiceType.AADHAAR_CARD,
+    ],
+    arrival_rate_per_day={
+        ServiceType.INCOME_CERTIFICATE: 11.0,
+        ServiceType.LAND_REGISTRATION:  6.0,
+        ServiceType.PASSPORT:           6.0,
+        ServiceType.DRIVING_LICENSE:    7.0,
+        ServiceType.AADHAAR_CARD:       8.0,
+    },
+    digital_intake_ratio=0.80,
+    initial_officer_pool=OfficerPool(
+        total_officers=18, available_officers=18,
+        allocated={
+            ServiceType.INCOME_CERTIFICATE: 5,
+            ServiceType.LAND_REGISTRATION:  3,
+            ServiceType.PASSPORT:           3,
+            ServiceType.DRIVING_LICENSE:    3,
+            ServiceType.AADHAAR_CARD:       4,
+        },
+    ),
+    missing_docs_probability_override=None,
+    field_verification_probability_override=None,
+    escalation_budget=10,
+    fairness_threshold=0.70,
+    event_probability=0.30,
+    allowed_events=[
+        EventType.SURGE_APPLICATIONS,
+        EventType.OFFICER_UNAVAILABLE,
+        EventType.DOCUMENT_REJECTION_SPIKE,
+        EventType.REVENUE_DB_DELAY,
+        EventType.SLA_ESCALATION_ORDER,
+    ],
+)
+def make_extreme_variant(base_task: TaskConfig) -> TaskConfig:
+    variant = base_task.model_copy(deep=True)
+    variant.task_id = base_task.task_id + "_extreme"
+    variant.display_name = base_task.display_name + " [EXTREME]"
+    variant.scenario_mode = ScenarioMode.EXTREME_OVERLOAD
+    variant.event_probability = min(1.0, base_task.event_probability * 3.0)
+    variant.allowed_events = [e for e in EventType if e != EventType.NO_EVENT]
+    return variant
+TASK_REGISTRY: dict = {
+    "district_backlog_easy":         TASK_EASY,
+    "mixed_urgency_medium":          TASK_MEDIUM,
+    "cross_department_hard":         TASK_HARD,
+    "district_backlog_easy_extreme": make_extreme_variant(TASK_EASY),
+}
+def get_task(task_id: str) -> TaskConfig:
+    if task_id not in TASK_REGISTRY:
+        raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASK_REGISTRY)}")
+    return TASK_REGISTRY[task_id]
+def list_tasks() -> list:
+    return list(TASK_REGISTRY.keys())
+def list_benchmark_tasks() -> list:
+    return ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]
+TASKS = TASK_REGISTRY

app/training_jobs.py ADDED Viewed

	@@ -0,0 +1,634 @@

+from __future__ import annotations
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+import math
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Literal
+from uuid import uuid4
+from app.persistence import PersistenceStore
+Status = Literal["queued", "running", "completed", "failed", "stopped"]
+_PROGRESS_RE = re.compile(r"(\d[\d,]*)/(\d[\d,]*)")
+_METRIC_ROW_RE = re.compile(r"\|\s*([a-zA-Z0-9_ ]+?)\s*\|\s*(-?\d+(?:\.\d+)?)\s*\|")
+_EVAL_PROGRESS_RE = re.compile(
+    r"Eval\s+num_timesteps=(\d+),\s*episode_reward=([-]?\d+(?:\.\d+)?)",
+    re.IGNORECASE,
+)
+_EVAL_ROW_RE = re.compile(
+    r"^\[Eval\]\s+([a-z_]+)\s+score=([0-9.]+)\s+reward=([-0-9.]+)\s+completed=(\d+)\s+sla_breaches=(\d+)$"
+)
+_AVG_RE = re.compile(r"^\[Eval\]\s+Average grader score:\s+([0-9.]+)$")
+_BEST_GRADER_RE = re.compile(
+    r"\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)",
+    re.IGNORECASE,
+)
+def _now() -> float:
+    return time.time()
+def _tail_append(lines: list[str], line: str, max_size: int = 500) -> None:
+    lines.append(line.rstrip("\n"))
+    if len(lines) > max_size:
+        del lines[: len(lines) - max_size]
+def _normalize_metric_key(raw: str) -> str:
+    return raw.strip().lower().replace(" ", "_")
+def _parse_eval(stdout: str) -> tuple[list[dict[str, Any]], float | None]:
+    rows: list[dict[str, Any]] = []
+    avg: float | None = None
+    for line in stdout.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        row = _EVAL_ROW_RE.match(line)
+        if row:
+            rows.append(
+                {
+                    "task_id": row.group(1),
+                    "grader_score": float(row.group(2)),
+                    "total_reward": float(row.group(3)),
+                    "total_completed": int(row.group(4)),
+                    "total_sla_breaches": int(row.group(5)),
+                }
+            )
+            continue
+        m = _AVG_RE.match(line)
+        if m:
+            avg = float(m.group(1))
+    return rows, avg
+@dataclass
+class TrainingJob:
+    job_id: str
+    phase: int
+    timesteps: int
+    n_envs: int
+    seed: int
+    config_path: str
+    created_at: float = field(default_factory=_now)
+    started_at: float | None = None
+    updated_at: float = field(default_factory=_now)
+    ended_at: float | None = None
+    status: Status = "queued"
+    progress: float = 0.0
+    process_id: int | None = None
+    command: list[str] = field(default_factory=list)
+    output_model_path: str | None = None
+    output_model_name: str | None = None
+    latest_metrics: dict[str, float] = field(default_factory=dict)
+    metric_history: list[dict[str, Any]] = field(default_factory=list)
+    evaluation_rows: list[dict[str, Any]] = field(default_factory=list)
+    evaluation_avg_score: float | None = None
+    logs_tail: list[str] = field(default_factory=list)
+    error_message: str | None = None
+    return_code: int | None = None
+    process: subprocess.Popen[str] | None = field(default=None, repr=False)
+    lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
+    last_persist_at: float = field(default_factory=lambda: 0.0, repr=False)
+    def snapshot(self) -> dict[str, Any]:
+        with self.lock:
+            return {
+                "job_id": self.job_id,
+                "phase": self.phase,
+                "timesteps": self.timesteps,
+                "n_envs": self.n_envs,
+                "seed": self.seed,
+                "config_path": self.config_path,
+                "created_at": self.created_at,
+                "started_at": self.started_at,
+                "updated_at": self.updated_at,
+                "ended_at": self.ended_at,
+                "status": self.status,
+                "progress": self.progress,
+                "process_id": self.process_id,
+                "command": self.command,
+                "output_model_path": self.output_model_path,
+                "output_model_name": self.output_model_name,
+                "latest_metrics": dict(self.latest_metrics),
+                "metric_history": list(self.metric_history),
+                "evaluation_rows": list(self.evaluation_rows),
+                "evaluation_avg_score": self.evaluation_avg_score,
+                "logs_tail": list(self.logs_tail),
+                "error_message": self.error_message,
+                "return_code": self.return_code,
+            }
+class TrainingJobManager:
+    def __init__(self, repo_root: Path, persistence: PersistenceStore | None = None) -> None:
+        self._repo_root = repo_root
+        self._persistence = persistence
+        self._jobs: dict[str, TrainingJob] = {}
+        self._lock = threading.Lock()
+        self._training_runs_root = (
+            self._persistence.training_runs_dir
+            if self._persistence is not None and self._persistence.enabled
+            else self._repo_root / "results" / "training_runs"
+        )
+        self._load_persisted_jobs()
+    def _load_persisted_jobs(self) -> None:
+        if self._persistence is None or not self._persistence.enabled:
+            return
+        persisted = self._persistence.list_training_jobs(limit=500)
+        with self._lock:
+            for snap in persisted:
+                try:
+                    job = TrainingJob(
+                        job_id=str(snap["job_id"]),
+                        phase=int(snap["phase"]),
+                        timesteps=int(snap["timesteps"]),
+                        n_envs=int(snap["n_envs"]),
+                        seed=int(snap["seed"]),
+                        config_path=str(snap.get("config_path") or ""),
+                        created_at=float(snap.get("created_at") or _now()),
+                        started_at=float(snap["started_at"]) if snap.get("started_at") is not None else None,
+                        updated_at=float(snap.get("updated_at") or _now()),
+                        ended_at=float(snap["ended_at"]) if snap.get("ended_at") is not None else None,
+                        status=str(snap.get("status") or "failed"),
+                        progress=float(snap.get("progress") or 0.0),
+                        process_id=int(snap["process_id"]) if snap.get("process_id") is not None else None,
+                        command=list(snap.get("command") or []),
+                        output_model_path=snap.get("output_model_path"),
+                        output_model_name=snap.get("output_model_name"),
+                        latest_metrics=dict(snap.get("latest_metrics") or {}),
+                        metric_history=list(snap.get("metric_history") or []),
+                        evaluation_rows=list(snap.get("evaluation_rows") or []),
+                        evaluation_avg_score=(
+                            float(snap["evaluation_avg_score"])
+                            if snap.get("evaluation_avg_score") is not None
+                            else None
+                        ),
+                        logs_tail=list(snap.get("logs_tail") or []),
+                        error_message=snap.get("error_message"),
+                        return_code=int(snap["return_code"]) if snap.get("return_code") is not None else None,
+                    )
+                except Exception:
+                    continue
+                # Process handles cannot survive a server restart. Recover to terminal state.
+                if job.status in ("queued", "running"):
+                    job.status = "failed"
+                    msg = "Recovered after restart: previous process state unavailable."
+                    job.error_message = f"{job.error_message} {msg}".strip() if job.error_message else msg
+                    if job.ended_at is None:
+                        job.ended_at = _now()
+                job.process = None
+                self._jobs[job.job_id] = job
+    def clear_jobs(self, *, clear_artifacts: bool = False) -> int:
+        to_stop: list[subprocess.Popen[str]] = []
+        with self._lock:
+            removed = len(self._jobs)
+            for job in self._jobs.values():
+                with job.lock:
+                    proc = job.process
+                    if proc is not None and job.status in ("queued", "running"):
+                        to_stop.append(proc)
+            self._jobs.clear()
+        for proc in to_stop:
+            try:
+                proc.terminate()
+            except Exception:
+                pass
+        if self._persistence is not None and self._persistence.enabled:
+            self._persistence.clear_training_jobs()
+        if clear_artifacts:
+            try:
+                if self._training_runs_root.exists():
+                    shutil.rmtree(self._training_runs_root, ignore_errors=True)
+                self._training_runs_root.mkdir(parents=True, exist_ok=True)
+            except Exception:
+                pass
+        return removed
+    def _persist_job(self, job: TrainingJob) -> None:
+        if self._persistence is None or not self._persistence.enabled:
+            return
+        snapshot = job.snapshot()
+        self._persistence.upsert_training_job(snapshot)
+        with job.lock:
+            job.last_persist_at = _now()
+    def list_jobs(self) -> list[dict[str, Any]]:
+        with self._lock:
+            jobs = list(self._jobs.values())
+        jobs.sort(key=lambda x: x.created_at, reverse=True)
+        return [job.snapshot() for job in jobs]
+    def get_job(self, job_id: str) -> dict[str, Any] | None:
+        with self._lock:
+            job = self._jobs.get(job_id)
+        return None if job is None else job.snapshot()
+    def start_job(
+        self,
+        *,
+        phase: int,
+        timesteps: int,
+        n_envs: int,
+        seed: int | None,
+        config_path: str | None,
+    ) -> dict[str, Any]:
+        job_id = str(uuid4())
+        job_seed = int(seed if seed is not None else int(time.time()) % 1_000_000)
+        cfg = config_path or (
+            "rl/configs/ppo_easy.yaml" if phase == 1 else "rl/configs/curriculum.yaml"
+        )
+        job = TrainingJob(
+            job_id=job_id,
+            phase=phase,
+            timesteps=timesteps,
+            n_envs=n_envs,
+            seed=job_seed,
+            config_path=cfg,
+        )
+        with self._lock:
+            self._jobs[job_id] = job
+        cmd = [
+            sys.executable,
+            "-u",
+            "-m",
+            "rl.train_ppo",
+            "--phase",
+            str(phase),
+            "--timesteps",
+            str(timesteps),
+            "--n-envs",
+            str(n_envs),
+            "--seed",
+            str(job_seed),
+        ]
+        if phase == 1:
+            # Keep Phase 1 UI responsive by emitting multiple eval checkpoints
+            # across the requested run length instead of only near the end.
+            phase1_eval_freq = max(128, int((timesteps / max(n_envs, 1)) / 15))
+            cmd.extend(
+                [
+                    "--phase1-config",
+                    cfg,
+                    "--phase1-eval-freq",
+                    str(phase1_eval_freq),
+                ]
+            )
+        else:
+            cmd.extend(["--phase2-config", cfg])
+        env = os.environ.copy()
+        env["PYTHONUNBUFFERED"] = "1"
+        proc = subprocess.Popen(
+            cmd,
+            cwd=str(self._repo_root),
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+        with job.lock:
+            job.command = cmd
+            job.status = "running"
+            job.started_at = _now()
+            job.updated_at = _now()
+            job.process_id = proc.pid
+            job.process = proc
+            _tail_append(job.logs_tail, f"[training_jobs] started pid={proc.pid}")
+            _tail_append(job.logs_tail, f"[training_jobs] command: {' '.join(cmd)}")
+        self._persist_job(job)
+        t = threading.Thread(target=self._watch_job, args=(job,), daemon=True)
+        t.start()
+        return job.snapshot()
+    @staticmethod
+    def _append_metric_point_locked(
+        job: TrainingJob,
+        *,
+        timesteps: float | None,
+        reward: float | None = None,
+        score: float | None = None,
+        source: str | None = None,
+        max_points: int = 5000,
+    ) -> None:
+        """
+        Append (or merge) a structured metric point while holding job.lock.
+        """
+        if timesteps is None or not math.isfinite(float(timesteps)):
+            return
+        payload: dict[str, Any] = {"t": float(timesteps)}
+        if reward is not None and math.isfinite(float(reward)):
+            payload["ep_rew_mean"] = float(reward)
+        if score is not None and math.isfinite(float(score)):
+            payload["grader_score"] = float(score)
+        if source:
+            payload["source"] = str(source)
+        if "ep_rew_mean" not in payload and "grader_score" not in payload:
+            return
+        if job.metric_history and float(job.metric_history[-1].get("t", -1.0)) == float(payload["t"]):
+            job.metric_history[-1].update(payload)
+        else:
+            job.metric_history.append(payload)
+        if len(job.metric_history) > max_points:
+            del job.metric_history[: len(job.metric_history) - max_points]
+    def stop_job(self, job_id: str) -> dict[str, Any] | None:
+        with self._lock:
+            job = self._jobs.get(job_id)
+        if job is None:
+            return None
+        with job.lock:
+            proc = job.process
+            if proc is None or job.status not in ("running", "queued"):
+                return job.snapshot()
+            job.status = "stopped"
+            job.updated_at = _now()
+        self._persist_job(job)
+        try:
+            proc.terminate()
+        except Exception:
+            pass
+        return job.snapshot()
+    def delete_job(self, job_id: str, *, clear_artifacts: bool = False) -> bool:
+        with self._lock:
+            job = self._jobs.pop(job_id, None)
+        if job is None:
+            return False
+        with job.lock:
+            proc = job.process
+            status = job.status
+            output_model_path = job.output_model_path
+        if proc is not None and status in ("queued", "running"):
+            try:
+                proc.terminate()
+            except Exception:
+                pass
+        if self._persistence is not None and self._persistence.enabled:
+            self._persistence.delete_training_job(job_id)
+        if clear_artifacts and output_model_path:
+            try:
+                out = Path(output_model_path)
+                if out.exists() and out.is_file():
+                    out.unlink(missing_ok=True)
+                parent = out.parent
+                if parent.exists() and parent.is_dir() and not any(parent.iterdir()):
+                    parent.rmdir()
+            except Exception:
+                pass
+        return True
+    def _watch_job(self, job: TrainingJob) -> None:
+        proc = job.process
+        if proc is None or proc.stdout is None:
+            with job.lock:
+                job.status = "failed"
+                job.error_message = "Training process failed to start."
+                job.updated_at = _now()
+                job.ended_at = _now()
+            self._persist_job(job)
+            return
+        for line in proc.stdout:
+            self._update_from_line(job, line)
+        return_code = proc.wait()
+        with job.lock:
+            job.return_code = int(return_code)
+            if job.status == "stopped":
+                job.ended_at = _now()
+                job.updated_at = _now()
+                job.process = None
+                return
+            if return_code == 0:
+                job.status = "completed"
+                job.progress = 1.0
+            else:
+                job.status = "failed"
+                base_error = f"Training exited with code {return_code}."
+                if not job.logs_tail:
+                    _tail_append(
+                        job.logs_tail,
+                        "[training_jobs] Process ended before producing logs. "
+                        "Check RL dependencies/environment and training command arguments.",
+                    )
+                job.error_message = base_error
+            job.ended_at = _now()
+            job.updated_at = _now()
+            job.process = None
+        self._persist_job(job)
+        if return_code == 0:
+            self._finalize_artifacts(job)
+    def _update_from_line(self, job: TrainingJob, line: str) -> None:
+        line = line.rstrip("\n")
+        should_persist = False
+        with job.lock:
+            _tail_append(job.logs_tail, line)
+            job.updated_at = _now()
+            p = _PROGRESS_RE.search(line)
+            if p:
+                num = int(p.group(1).replace(",", ""))
+                den = int(p.group(2).replace(",", ""))
+                if den > 0:
+                    job.progress = max(0.0, min(1.0, num / den))
+            ep = _EVAL_PROGRESS_RE.search(line)
+            if ep:
+                ts = int(ep.group(1))
+                rew = float(ep.group(2))
+                job.latest_metrics["total_timesteps"] = float(ts)
+                job.latest_metrics["ep_rew_mean"] = rew
+                self._append_metric_point_locked(
+                    job,
+                    timesteps=float(ts),
+                    reward=rew,
+                    source="eval_progress",
+                )
+                if job.timesteps > 0:
+                    job.progress = max(0.0, min(1.0, ts / float(job.timesteps)))
+            m = _METRIC_ROW_RE.search(line)
+            if m:
+                key = _normalize_metric_key(m.group(1))
+                val = float(m.group(2))
+                interesting = {
+                    "total_timesteps",
+                    "ep_rew_mean",
+                    "ep_len_mean",
+                    "grader_score",
+                    "mean_reward",
+                    "mean_ep_length",
+                    "episode_mean_sla_penalty",
+                    "episode_mean_fairness_penalty",
+                    "explained_variance",
+                    "approx_kl",
+                }
+                if key in interesting:
+                    job.latest_metrics[key] = val
+                    current_ts = job.latest_metrics.get("total_timesteps")
+                    if key == "total_timesteps":
+                        self._append_metric_point_locked(
+                            job,
+                            timesteps=val,
+                            reward=job.latest_metrics.get("ep_rew_mean"),
+                            score=job.latest_metrics.get("grader_score") or job.latest_metrics.get("avg_grader_score"),
+                            source="metrics_row_ts",
+                        )
+                    elif key in {"ep_rew_mean", "mean_reward"}:
+                        self._append_metric_point_locked(
+                            job,
+                            timesteps=float(current_ts) if current_ts is not None else None,
+                            reward=val,
+                            source="metrics_row_reward",
+                        )
+                    elif key in {"grader_score", "avg_grader_score"}:
+                        self._append_metric_point_locked(
+                            job,
+                            timesteps=float(current_ts) if current_ts is not None else None,
+                            score=val,
+                            source="metrics_row_score",
+                        )
+            best = _BEST_GRADER_RE.search(line)
+            if best:
+                score = float(best.group(1))
+                job.latest_metrics["grader_score"] = score
+                fallback_ts = (
+                    float(job.latest_metrics.get("total_timesteps"))
+                    if "total_timesteps" in job.latest_metrics
+                    else float(job.metric_history[-1]["t"]) if job.metric_history else 0.0
+                )
+                self._append_metric_point_locked(
+                    job,
+                    timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
+                    score=score,
+                    source="best_grader",
+                )
+            avg_line = _AVG_RE.match(line.strip())
+            if avg_line:
+                avg_score = float(avg_line.group(1))
+                job.latest_metrics["avg_grader_score"] = avg_score
+                fallback_ts = (
+                    float(job.latest_metrics.get("total_timesteps"))
+                    if "total_timesteps" in job.latest_metrics
+                    else float(job.metric_history[-1]["t"]) if job.metric_history else 0.0
+                )
+                self._append_metric_point_locked(
+                    job,
+                    timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
+                    score=avg_score,
+                    source="avg_grader",
+                )
+            if job.updated_at - job.last_persist_at >= 1.5:
+                should_persist = True
+        if should_persist:
+            self._persist_job(job)
+    def _finalize_artifacts(self, job: TrainingJob) -> None:
+        src_name = "phase1_final.zip" if job.phase == 1 else "phase2_final.zip"
+        src = self._repo_root / "results" / "best_model" / src_name
+        run_dir = self._training_runs_root / job.job_id
+        run_dir.mkdir(parents=True, exist_ok=True)
+        # Keep a mirror under repo/results for local developer convenience.
+        mirror_dir = self._repo_root / "results" / "training_runs" / job.job_id
+        if mirror_dir != run_dir:
+            mirror_dir.mkdir(parents=True, exist_ok=True)
+        if src.exists():
+            ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+            unique_name = f"phase{job.phase}_seed{job.seed}_{ts}_{job.job_id[:8]}.zip"
+            out = run_dir / unique_name
+            shutil.copy2(src, out)
+            if mirror_dir != run_dir:
+                try:
+                    shutil.copy2(src, mirror_dir / unique_name)
+                except Exception:
+                    pass
+            with job.lock:
+                job.output_model_path = str(out.resolve())
+                job.output_model_name = unique_name
+                job.updated_at = _now()
+            model_type = "maskable"
+            eval_cmd = [
+                sys.executable,
+                "-m",
+                "rl.evaluate",
+                "--model",
+                str(out),
+                "--episodes",
+                "3",
+                "--model-type",
+                model_type,
+            ]
+            proc = subprocess.run(
+                eval_cmd,
+                cwd=str(self._repo_root),
+                env=os.environ.copy(),
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            rows, avg = _parse_eval(proc.stdout or "")
+            with job.lock:
+                job.evaluation_rows = rows
+                job.evaluation_avg_score = avg
+                if avg is not None:
+                    job.latest_metrics["avg_grader_score"] = float(avg)
+                    fallback_ts = (
+                        float(job.latest_metrics.get("total_timesteps"))
+                        if "total_timesteps" in job.latest_metrics
+                        else float(job.timesteps)
+                    )
+                    self._append_metric_point_locked(
+                        job,
+                        timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
+                        score=float(avg),
+                        source="final_eval_avg",
+                    )
+                _tail_append(job.logs_tail, "----- EVALUATION -----")
+                for ln in (proc.stdout or "").splitlines():
+                    _tail_append(job.logs_tail, ln)
+                if proc.returncode != 0 and not job.error_message:
+                    job.error_message = f"Evaluation exited with code {proc.returncode}."
+                job.updated_at = _now()
+            self._persist_job(job)
+        else:
+            self._persist_job(job)

app/utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""
+utils.py — Shared pure-function helpers.
+No imports from env.py or simulator.py (prevents circular imports).
+"""
+from __future__ import annotations
+from app.models import ServiceType
+def completion_fairness_gap(
+    arrived_by_service: dict,
+    completed_by_service: dict,
+) -> float:
+    """
+    Fairness gap = max completion rate difference across services.
+    Returns 0.0 if only one service, 1.0 if perfectly unfair.
+    """
+    rates = []
+    for svc in arrived_by_service:
+        arrived   = arrived_by_service.get(svc, 0)
+        completed = completed_by_service.get(svc, 0)
+        if arrived > 0:
+            rates.append(completed / arrived)
+    if len(rates) < 2:
+        return 0.0
+    return round(max(rates) - min(rates), 4)

app/web/app.js ADDED Viewed

	@@ -0,0 +1,380 @@

+const state = {
+  sessionId: null,
+  taskId: "district_backlog_easy",
+  agentPolicy: "backlog_clearance",
+  availableAgents: [],
+  trace: [],
+  running: false,
+};
+const AGENTS_FALLBACK = ["urgent_first", "oldest_first", "backlog_clearance"];
+const els = {
+  taskSelect: document.getElementById("taskSelect"),
+  agentSelect: document.getElementById("agentSelect"),
+  stepsInput: document.getElementById("stepsInput"),
+  startRunBtn: document.getElementById("startRunBtn"),
+  resetSessionBtn: document.getElementById("resetSessionBtn"),
+  statusLine: document.getElementById("statusLine"),
+  stepTableBody: document.querySelector("#stepTable tbody"),
+  runChart: document.getElementById("runChart"),
+  benchTaskSelect: document.getElementById("benchTaskSelect"),
+  benchRunsInput: document.getElementById("benchRunsInput"),
+  benchStepsInput: document.getElementById("benchStepsInput"),
+  runBenchmarkBtn: document.getElementById("runBenchmarkBtn"),
+  benchChart: document.getElementById("benchChart"),
+  benchTableBody: document.querySelector("#benchTable tbody"),
+  kpiReward: document.getElementById("kpiReward"),
+  kpiBacklog: document.getElementById("kpiBacklog"),
+  kpiCompleted: document.getElementById("kpiCompleted"),
+  kpiSla: document.getElementById("kpiSla"),
+  kpiFairness: document.getElementById("kpiFairness"),
+  kpiScore: document.getElementById("kpiScore"),
+};
+function setStatus(msg) {
+  els.statusLine.textContent = msg;
+}
+async function api(path, options = {}) {
+  const response = await fetch(`/api${path}`, {
+    headers: { "Content-Type": "application/json" },
+    ...options,
+  });
+  let payload = null;
+  try {
+    payload = await response.json();
+  } catch (e) {
+    payload = null;
+  }
+  if (!response.ok) {
+    const detail = payload && payload.detail ? payload.detail : `${response.status}`;
+    throw new Error(`API ${path} failed: ${detail}`);
+  }
+  return payload;
+}
+function setLoading(isLoading) {
+  state.running = isLoading;
+  els.startRunBtn.disabled = isLoading;
+  els.resetSessionBtn.disabled = isLoading;
+  els.runBenchmarkBtn.disabled = isLoading;
+}
+function formatFloat(v) {
+  return Number(v).toFixed(2);
+}
+function updateKpis(step) {
+  if (!step) return;
+  const totalReward = state.trace.reduce((sum, row) => sum + row.reward, 0);
+  els.kpiReward.textContent = formatFloat(totalReward);
+  els.kpiBacklog.textContent = `${step.backlog}`;
+  els.kpiCompleted.textContent = `${step.completed}`;
+  els.kpiSla.textContent = `${step.slaBreaches}`;
+  els.kpiFairness.textContent = formatFloat(step.fairnessGap);
+}
+function renderAction(actionObj) {
+  if (!actionObj || typeof actionObj !== "object") {
+    return "unknown";
+  }
+  const actionType = actionObj.action_type || "unknown";
+  const extras = [];
+  if (actionObj.service) extras.push(`service=${actionObj.service}`);
+  if (actionObj.target_service) extras.push(`target=${actionObj.target_service}`);
+  if (typeof actionObj.officer_delta === "number") extras.push(`delta=${actionObj.officer_delta}`);
+  if (actionObj.priority_mode) extras.push(`mode=${actionObj.priority_mode}`);
+  return extras.length ? `${actionType} (${extras.join(", ")})` : actionType;
+}
+function appendStepRow(row) {
+  const tr = document.createElement("tr");
+  const status = row.done ? "done" : "running";
+  tr.innerHTML = `
+    <td>${row.step}</td>
+    <td>${row.day}</td>
+    <td>${row.action}</td>
+    <td>${formatFloat(row.reward)}</td>
+    <td>${row.backlog}</td>
+    <td>${row.completed}</td>
+    <td>${row.slaBreaches}</td>
+    <td>${status}</td>
+  `;
+  els.stepTableBody.appendChild(tr);
+}
+function clearRunView() {
+  state.trace = [];
+  els.stepTableBody.innerHTML = "";
+  els.kpiReward.textContent = "0.00";
+  els.kpiBacklog.textContent = "0";
+  els.kpiCompleted.textContent = "0";
+  els.kpiSla.textContent = "0";
+  els.kpiFairness.textContent = "0.00";
+  els.kpiScore.textContent = "-";
+  drawRunChart([]);
+}
+function drawAxes(ctx, w, h, pad) {
+  ctx.strokeStyle = "#2f2f2f";
+  ctx.lineWidth = 1;
+  ctx.beginPath();
+  ctx.moveTo(pad, pad);
+  ctx.lineTo(pad, h - pad);
+  ctx.lineTo(w - pad, h - pad);
+  ctx.stroke();
+}
+function drawSeries(ctx, points, color, pad, w, h, yMax) {
+  if (!points.length) return;
+  const xStep = (w - pad * 2) / Math.max(points.length - 1, 1);
+  ctx.strokeStyle = color;
+  ctx.lineWidth = 2;
+  ctx.beginPath();
+  points.forEach((v, i) => {
+    const x = pad + i * xStep;
+    const y = h - pad - (v / Math.max(yMax, 1e-6)) * (h - pad * 2);
+    if (i === 0) ctx.moveTo(x, y);
+    else ctx.lineTo(x, y);
+  });
+  ctx.stroke();
+}
+function drawRunChart(trace) {
+  const canvas = els.runChart;
+  const ctx = canvas.getContext("2d");
+  const w = canvas.width;
+  const h = canvas.height;
+  const pad = 34;
+  ctx.clearRect(0, 0, w, h);
+  drawAxes(ctx, w, h, pad);
+  if (!trace.length) return;
+  const rewards = trace.map((x) => Math.max(0, x.reward));
+  const backlogs = trace.map((x) => x.backlog);
+  const yMax = Math.max(...rewards, ...backlogs, 1);
+  drawSeries(ctx, rewards, "#ffffff", pad, w, h, yMax);
+  drawSeries(ctx, backlogs, "#7a7a7a", pad, w, h, yMax);
+  ctx.fillStyle = "#d2d2d2";
+  ctx.font = "12px Segoe UI";
+  ctx.fillText("reward", pad + 6, pad + 8);
+  ctx.fillText("backlog", pad + 70, pad + 8);
+}
+function drawBenchmarkChart(agentResults) {
+  const canvas = els.benchChart;
+  const ctx = canvas.getContext("2d");
+  const w = canvas.width;
+  const h = canvas.height;
+  const pad = 34;
+  ctx.clearRect(0, 0, w, h);
+  drawAxes(ctx, w, h, pad);
+  if (!agentResults.length) return;
+  const barAreaW = w - pad * 2;
+  const slotW = barAreaW / agentResults.length;
+  agentResults.forEach((agent, idx) => {
+    const cx = pad + idx * slotW + slotW / 2;
+    const barW = Math.max(24, slotW * 0.48);
+    const barH = (h - pad * 2) * Math.min(1, Math.max(0, agent.average_score));
+    const topY = h - pad - barH;
+    ctx.fillStyle = "#ffffff";
+    ctx.fillRect(cx - barW / 2, topY, barW, barH);
+    ctx.fillStyle = "#9a9a9a";
+    agent.runs.forEach((run, runIdx) => {
+      const jitter = ((runIdx % 7) - 3) * 2.5;
+      const dotY = h - pad - (h - pad * 2) * Math.min(1, Math.max(0, run.score));
+      ctx.beginPath();
+      ctx.arc(cx + jitter, dotY, 3, 0, Math.PI * 2);
+      ctx.fill();
+    });
+    ctx.fillStyle = "#d0d0d0";
+    ctx.font = "11px Segoe UI";
+    ctx.textAlign = "center";
+    ctx.fillText(agent.agent_policy, cx, h - 10);
+  });
+  ctx.textAlign = "start";
+}
+async function resetSession() {
+  if (state.sessionId) {
+    try {
+      await api(`/sessions/${state.sessionId}`, { method: "DELETE" });
+    } catch (err) {
+      // Ignore stale session cleanup errors; reset will still create a fresh session.
+    }
+  }
+  state.taskId = els.taskSelect.value;
+  const payload = await api("/reset", {
+    method: "POST",
+    body: JSON.stringify({ task_id: state.taskId }),
+  });
+  state.sessionId = payload.session_id;
+  clearRunView();
+  setStatus(`Session ready: ${state.sessionId.slice(0, 8)}... (${state.taskId})`);
+}
+async function runSimulation() {
+  const requestedSteps = Number(els.stepsInput.value || 0);
+  if (!requestedSteps || requestedSteps < 1) {
+    setStatus("Enter a valid step count.");
+    return;
+  }
+  setLoading(true);
+  try {
+    if (!state.sessionId || state.taskId !== els.taskSelect.value) {
+      await resetSession();
+    }
+    state.agentPolicy = els.agentSelect.value;
+    setStatus(`Running ${requestedSteps} steps with ${state.agentPolicy}...`);
+    for (let i = 0; i < requestedSteps; i += 1) {
+      const stepRes = await api("/autostep", {
+        method: "POST",
+        body: JSON.stringify({
+          session_id: state.sessionId,
+          agent_policy: state.agentPolicy,
+        }),
+      });
+      const obs = stepRes.observation;
+      const row = {
+        step: state.trace.length + 1,
+        day: obs.day,
+        action: renderAction(stepRes.action),
+        reward: Number(stepRes.reward || 0),
+        backlog: obs.total_backlog,
+        completed: obs.total_completed,
+        slaBreaches: obs.total_sla_breaches,
+        fairnessGap: Number(obs.fairness_gap || 0),
+        done: !!stepRes.done,
+      };
+      state.trace.push(row);
+      appendStepRow(row);
+      updateKpis(row);
+      drawRunChart(state.trace);
+      if (stepRes.done) break;
+    }
+    const gradeRes = await api("/grade", {
+      method: "POST",
+      body: JSON.stringify({ session_id: state.sessionId }),
+    });
+    els.kpiScore.textContent = formatFloat(gradeRes.score);
+    setStatus(`Run finished. Score: ${formatFloat(gradeRes.score)} (${gradeRes.grader_name})`);
+  } catch (err) {
+    setStatus(err.message);
+  } finally {
+    setLoading(false);
+  }
+}
+async function runBenchmark() {
+  setLoading(true);
+  try {
+    const taskId = els.benchTaskSelect.value;
+    const runs = Number(els.benchRunsInput.value || 0);
+    const maxSteps = Number(els.benchStepsInput.value || 0);
+    if (!runs || !maxSteps) {
+      setStatus("Benchmark inputs are invalid.");
+      return;
+    }
+    const benchmarkAgents = state.availableAgents.length ? state.availableAgents : AGENTS_FALLBACK;
+    setStatus(`Running benchmark on ${taskId} with ${benchmarkAgents.length} agents...`);
+    const res = await api("/benchmark", {
+      method: "POST",
+      body: JSON.stringify({
+        task_id: taskId,
+        runs,
+        max_steps: maxSteps,
+        agent_policies: benchmarkAgents,
+      }),
+    });
+    els.benchTableBody.innerHTML = "";
+    res.agent_results.forEach((agent) => {
+      const tr = document.createElement("tr");
+      tr.innerHTML = `
+        <td>${agent.agent_policy}</td>
+        <td>${formatFloat(agent.average_score)}</td>
+        <td>${formatFloat(agent.min_score)}</td>
+        <td>${formatFloat(agent.max_score)}</td>
+      `;
+      els.benchTableBody.appendChild(tr);
+    });
+    drawBenchmarkChart(res.agent_results);
+    setStatus("Benchmark completed.");
+  } catch (err) {
+    setStatus(err.message);
+  } finally {
+    setLoading(false);
+  }
+}
+async function init() {
+  setLoading(true);
+  try {
+    const health = await api("/health");
+    const tasksRes = await api("/tasks");
+    const agents = await api("/agents").catch(() => AGENTS_FALLBACK);
+    tasksRes.tasks.forEach((task) => {
+      const optA = new Option(task, task);
+      const optB = new Option(task, task);
+      els.taskSelect.add(optA);
+      els.benchTaskSelect.add(optB);
+    });
+    state.availableAgents = agents.length ? agents : AGENTS_FALLBACK;
+    state.availableAgents.forEach((agent) => {
+      els.agentSelect.add(new Option(agent, agent));
+    });
+    els.taskSelect.value = health.available_tasks.includes("district_backlog_easy")
+      ? "district_backlog_easy"
+      : tasksRes.tasks[0];
+    els.benchTaskSelect.value = els.taskSelect.value;
+    els.agentSelect.value = state.availableAgents.includes("backlog_clearance")
+      ? "backlog_clearance"
+      : state.availableAgents[0];
+    await resetSession();
+  } catch (err) {
+    setStatus(`Initialization failed: ${err.message}`);
+  } finally {
+    setLoading(false);
+  }
+}
+els.startRunBtn.addEventListener("click", runSimulation);
+els.resetSessionBtn.addEventListener("click", async () => {
+  setLoading(true);
+  try {
+    await resetSession();
+  } catch (err) {
+    setStatus(err.message);
+  } finally {
+    setLoading(false);
+  }
+});
+els.runBenchmarkBtn.addEventListener("click", runBenchmark);
+init();

app/web/index.html ADDED Viewed

	@@ -0,0 +1,27 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Gov Workflow OpenEnv Console</title>
+  <link rel="stylesheet" href="/ui/assets/styles.css" />
+</head>
+<body>
+  <div id="app-root" class="app-root">
+    <div class="boot">Loading frontend...</div>
+  </div>
+  <script>
+    window.__APP_MOUNTED__ = false;
+    setTimeout(function () {
+      if (!window.__APP_MOUNTED__) {
+        var root = document.getElementById("app-root");
+        if (root) {
+          root.innerHTML = "<div class='boot boot-error'><h2>Frontend bootstrap failed</h2><p>The React runtime could not load in this environment. Backend API is still active at <code>/docs</code>.</p></div>";
+        }
+      }
+    }, 6000);
+  </script>
+  <script type="module" src="/ui/assets/react_app.js"></script>
+</body>
+</html>

app/web/react_app.js ADDED Viewed

	@@ -0,0 +1,933 @@

+import React, { useEffect, useMemo, useRef, useState } from "https://esm.sh/react@18.3.1";
+import { createRoot } from "https://esm.sh/react-dom@18.3.1/client";
+async function api(path, options = {}) {
+  const res = await fetch(`/api${path}`, {
+    headers: { "Content-Type": "application/json" },
+    ...options,
+  });
+  let payload = null;
+  try {
+    payload = await res.json();
+  } catch (err) {
+    payload = null;
+  }
+  if (!res.ok) {
+    const detail = payload && payload.detail ? payload.detail : `${res.status}`;
+    throw new Error(`API ${path} failed: ${detail}`);
+  }
+  return payload;
+}
+function drawAxes(ctx, w, h, pad) {
+  ctx.clearRect(0, 0, w, h);
+  ctx.strokeStyle = "#2f2f2f";
+  ctx.lineWidth = 1;
+  ctx.beginPath();
+  ctx.moveTo(pad, pad);
+  ctx.lineTo(pad, h - pad);
+  ctx.lineTo(w - pad, h - pad);
+  ctx.stroke();
+}
+function LineCanvas({ pointsA, pointsB, labelA, labelB }) {
+  const ref = useRef(null);
+  useEffect(() => {
+    const canvas = ref.current;
+    if (!canvas) return;
+    const ctx = canvas.getContext("2d");
+    const w = canvas.width;
+    const h = canvas.height;
+    const pad = 34;
+    drawAxes(ctx, w, h, pad);
+    const all = [...pointsA, ...pointsB];
+    if (!all.length) return;
+    const yMax = Math.max(...all, 1);
+    const draw = (arr, color) => {
+      if (!arr.length) return;
+      const stepX = (w - pad * 2) / Math.max(arr.length - 1, 1);
+      ctx.strokeStyle = color;
+      ctx.lineWidth = 2;
+      ctx.beginPath();
+      arr.forEach((v, i) => {
+        const x = pad + i * stepX;
+        const y = h - pad - (v / yMax) * (h - pad * 2);
+        if (i === 0) ctx.moveTo(x, y);
+        else ctx.lineTo(x, y);
+      });
+      ctx.stroke();
+    };
+    draw(pointsA, "#ffffff");
+    draw(pointsB, "#808080");
+    ctx.fillStyle = "#d5d5d5";
+    ctx.font = "12px Segoe UI";
+    ctx.fillText(labelA, pad + 6, pad + 8);
+    ctx.fillText(labelB, pad + 92, pad + 8);
+  }, [pointsA, pointsB, labelA, labelB]);
+  return React.createElement("canvas", { ref, width: 1200, height: 320 });
+}
+function CompareCanvas({ baselineScore, rlScore }) {
+  const ref = useRef(null);
+  useEffect(() => {
+    const canvas = ref.current;
+    if (!canvas) return;
+    const ctx = canvas.getContext("2d");
+    const w = canvas.width;
+    const h = canvas.height;
+    const pad = 36;
+    drawAxes(ctx, w, h, pad);
+    if (baselineScore == null || rlScore == null) return;
+    const bars = [
+      { name: "baseline", score: baselineScore, color: "#9a9a9a", x: w * 0.35 },
+      { name: "phase2", score: rlScore, color: "#ffffff", x: w * 0.65 },
+    ];
+    bars.forEach((bar) => {
+      const barW = 120;
+      const barH = (h - pad * 2) * Math.max(0, Math.min(1, bar.score));
+      const y = h - pad - barH;
+      ctx.fillStyle = bar.color;
+      ctx.fillRect(bar.x - barW / 2, y, barW, barH);
+      ctx.fillStyle = "#dddddd";
+      ctx.font = "13px Segoe UI";
+      ctx.textAlign = "center";
+      ctx.fillText(`${bar.name}: ${bar.score.toFixed(3)}`, bar.x, h - 10);
+    });
+    ctx.textAlign = "start";
+  }, [baselineScore, rlScore]);
+  return React.createElement("canvas", { ref, width: 1200, height: 300 });
+}
+function formatNumber(value, digits = 2) {
+  if (value == null || Number.isNaN(Number(value))) return "-";
+  return Number(value).toFixed(digits);
+}
+function App() {
+  const [loading, setLoading] = useState(false);
+  const [status, setStatus] = useState("Initializing...");
+  const [tasks, setTasks] = useState([]);
+  const [agents, setAgents] = useState([]);
+  const [components, setComponents] = useState([]);
+  const [models, setModels] = useState([]);
+  const [taskId, setTaskId] = useState("district_backlog_easy");
+  const [agentPolicy, setAgentPolicy] = useState("backlog_clearance");
+  const [steps, setSteps] = useState(40);
+  const [sessionId, setSessionId] = useState("");
+  const [manualSeed, setManualSeed] = useState("");
+  const [manualActionJson, setManualActionJson] = useState('{\n  "action_type": "advance_time"\n}');
+  const [manualOutput, setManualOutput] = useState("{}");
+  const [baselineTrace, setBaselineTrace] = useState([]);
+  const [graderScore, setGraderScore] = useState(null);
+  const [benchmarkRows, setBenchmarkRows] = useState([]);
+  const [modelPath, setModelPath] = useState("results/best_model/phase2_final.zip");
+  const [modelType, setModelType] = useState("maskable");
+  const [rlMaxSteps, setRlMaxSteps] = useState(80);
+  const [rlRun, setRlRun] = useState(null);
+  const [rlEval, setRlEval] = useState([]);
+  const [compareData, setCompareData] = useState({ baseline: null, rl: null });
+  const [workflowOutput, setWorkflowOutput] = useState("");
+  const [workflowMeta, setWorkflowMeta] = useState(null);
+  useEffect(() => {
+    const init = async () => {
+      setLoading(true);
+      try {
+        const [health, tasksRes, agentsRes, componentsRes, modelsRes] = await Promise.all([
+          api("/health"),
+          api("/tasks"),
+          api("/agents"),
+          api("/workflows/components"),
+          api("/rl/models"),
+        ]);
+        const taskList = tasksRes.tasks || [];
+        const agentList = agentsRes || [];
+        const modelList = (modelsRes.models || []).filter((m) => m.exists);
+        setTasks(taskList);
+        setAgents(agentList);
+        setComponents(componentsRes.components || []);
+        setModels(modelsRes.models || []);
+        const defaultTask = taskList.includes("district_backlog_easy") ? "district_backlog_easy" : taskList[0];
+        setTaskId(defaultTask || "district_backlog_easy");
+        const defaultAgent = agentList.includes("backlog_clearance") ? "backlog_clearance" : (agentList[0] || "backlog_clearance");
+        setAgentPolicy(defaultAgent);
+        const phase2 = modelList.find((m) => m.path.toLowerCase().includes("phase2_final")) || modelList[0];
+        if (phase2) {
+          setModelPath(phase2.path);
+          setModelType(phase2.model_type);
+        }
+        setStatus(`API ready (v${health.version}).`);
+      } catch (err) {
+        setStatus(err.message);
+      } finally {
+        setLoading(false);
+      }
+    };
+    init();
+  }, []);
+  const baselineRewards = useMemo(() => baselineTrace.map((x) => Math.max(0, Number(x.reward || 0))), [baselineTrace]);
+  const baselineBacklog = useMemo(() => baselineTrace.map((x) => Number(x.backlog || 0)), [baselineTrace]);
+  const baselineKpi = useMemo(() => {
+    const totalReward = baselineTrace.reduce((sum, row) => sum + Number(row.reward || 0), 0);
+    const last = baselineTrace.length ? baselineTrace[baselineTrace.length - 1] : null;
+    return {
+      reward: totalReward,
+      backlog: last ? last.backlog : 0,
+      completed: last ? last.completed : 0,
+      sla: last ? last.sla_breaches : 0,
+      fairness: last ? last.fairness_gap : 0,
+    };
+  }, [baselineTrace]);
+  const activeModel = useMemo(() => models.find((m) => m.path === modelPath), [models, modelPath]);
+  const manualReset = async () => {
+    setLoading(true);
+    try {
+      const payload = {
+        task_id: taskId,
+      };
+      if (manualSeed.trim()) {
+        payload.seed = Number(manualSeed.trim());
+      }
+      const res = await api("/reset", {
+        method: "POST",
+        body: JSON.stringify(payload),
+      });
+      setSessionId(res.session_id);
+      setManualOutput(JSON.stringify(res, null, 2));
+      setStatus(`Session created: ${res.session_id}`);
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const manualStep = async () => {
+    if (!sessionId) {
+      setStatus("Create a session first with Reset.");
+      return;
+    }
+    setLoading(true);
+    try {
+      const action = JSON.parse(manualActionJson);
+      const res = await api("/step", {
+        method: "POST",
+        body: JSON.stringify({ session_id: sessionId, action }),
+      });
+      setManualOutput(JSON.stringify(res, null, 2));
+      setStatus(`Manual step done. reward=${formatNumber(res.reward)}`);
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const manualState = async () => {
+    if (!sessionId) {
+      setStatus("Create a session first with Reset.");
+      return;
+    }
+    setLoading(true);
+    try {
+      const res = await api("/state", {
+        method: "POST",
+        body: JSON.stringify({ session_id: sessionId, include_action_history: true }),
+      });
+      setManualOutput(JSON.stringify(res, null, 2));
+      setStatus("State fetched.");
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const manualGrade = async () => {
+    if (!sessionId) {
+      setStatus("Create a session first with Reset.");
+      return;
+    }
+    setLoading(true);
+    try {
+      const res = await api("/grade", {
+        method: "POST",
+        body: JSON.stringify({ session_id: sessionId }),
+      });
+      setManualOutput(JSON.stringify(res, null, 2));
+      setStatus(`Grade score=${formatNumber(res.score, 3)} (${res.grader_name})`);
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const resetBaselineSession = async () => {
+    const res = await api("/reset", {
+      method: "POST",
+      body: JSON.stringify({ task_id: taskId }),
+    });
+    setSessionId(res.session_id);
+    setBaselineTrace([]);
+    setGraderScore(null);
+    return res.session_id;
+  };
+  const runBaseline = async () => {
+    setLoading(true);
+    try {
+      let sid = sessionId;
+      if (!sid) {
+        sid = await resetBaselineSession();
+      }
+      const rows = [];
+      for (let i = 0; i < Number(steps); i += 1) {
+        const stepRes = await api("/autostep", {
+          method: "POST",
+          body: JSON.stringify({ session_id: sid, agent_policy: agentPolicy }),
+        });
+        rows.push({
+          step: rows.length + 1,
+          day: stepRes.observation.day,
+          action: stepRes.action.action_type,
+          reward: Number(stepRes.reward || 0),
+          backlog: stepRes.observation.total_backlog,
+          completed: stepRes.observation.total_completed,
+          sla_breaches: stepRes.observation.total_sla_breaches,
+          fairness_gap: Number(stepRes.observation.fairness_gap || 0),
+          done: stepRes.done,
+        });
+        if (stepRes.done) break;
+      }
+      setBaselineTrace(rows);
+      const gradeRes = await api("/grade", {
+        method: "POST",
+        body: JSON.stringify({ session_id: sid }),
+      });
+      setGraderScore(Number(gradeRes.score));
+      setStatus(`Baseline run done. score=${formatNumber(gradeRes.score, 3)}`);
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const runBenchmark = async () => {
+    if (!agents.length) {
+      setStatus("No baseline agents available.");
+      return;
+    }
+    setLoading(true);
+    try {
+      const res = await api("/benchmark", {
+        method: "POST",
+        body: JSON.stringify({
+          task_id: taskId,
+          runs: 3,
+          max_steps: Number(steps),
+          agent_policies: agents,
+        }),
+      });
+      setBenchmarkRows(res.agent_results || []);
+      setStatus("Baseline benchmark done.");
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const runTrainedEpisode = async () => {
+    setLoading(true);
+    try {
+      const res = await api("/rl/run", {
+        method: "POST",
+        body: JSON.stringify({
+          task_id: taskId,
+          model_path: modelPath,
+          model_type: modelType,
+          max_steps: Number(rlMaxSteps),
+        }),
+      });
+      setRlRun(res);
+      setStatus(`Trained run done. score=${formatNumber(res.grader_score, 3)} (${res.grader_name})`);
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const evaluateTrainedModel = async () => {
+    setLoading(true);
+    try {
+      const res = await api("/rl/evaluate", {
+        method: "POST",
+        body: JSON.stringify({
+          model_path: modelPath,
+          model_type: modelType,
+          episodes: 3,
+          task_ids: tasks,
+        }),
+      });
+      setRlEval(res.results || []);
+      setStatus(`Trained evaluation done. avg=${formatNumber(res.average_grader_score, 3)}`);
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const compareBaselineVsPhase2 = async () => {
+    setLoading(true);
+    try {
+      const [base, rl] = await Promise.all([
+        api("/benchmark", {
+          method: "POST",
+          body: JSON.stringify({
+            task_id: taskId,
+            runs: 3,
+            max_steps: Number(steps),
+            agent_policies: [agentPolicy],
+          }),
+        }),
+        api("/rl/evaluate", {
+          method: "POST",
+          body: JSON.stringify({
+            model_path: modelPath,
+            model_type: modelType,
+            episodes: 3,
+            task_ids: [taskId],
+          }),
+        }),
+      ]);
+      const baselineScore = base.agent_results && base.agent_results.length
+        ? Number(base.agent_results[0].average_score)
+        : null;
+      const rlScore = rl.results && rl.results.length
+        ? Number(rl.results[0].grader_score)
+        : null;
+      setCompareData({ baseline: baselineScore, rl: rlScore });
+      setStatus("Comparison done.");
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  const workflowIdForComponent = (componentName) => {
+    if (componentName === "baseline_openai.py") return "baseline_openai";
+    if (componentName === "inference.py") return "inference";
+    if (componentName === "phase2_final.zip") return "phase2_eval";
+    return null;
+  };
+  const runWorkflowFromUi = async (workflowId) => {
+    setLoading(true);
+    try {
+      const payload = {
+        workflow_id: workflowId,
+        max_steps: Number(steps),
+        episodes: 3,
+        model_path: modelPath,
+        model_type: modelType,
+        timeout_seconds: 240,
+      };
+      const res = await api("/workflows/run", {
+        method: "POST",
+        body: JSON.stringify(payload),
+      });
+      setWorkflowMeta({
+        workflow_id: res.workflow_id,
+        exit_code: res.exit_code,
+        duration_seconds: res.duration_seconds,
+        timed_out: res.timed_out,
+        command: res.command,
+      });
+      const out = [
+        "$ " + (res.command || []).join(" "),
+        "",
+        "STDOUT:",
+        res.stdout || "",
+        "",
+        "STDERR:",
+        res.stderr || "",
+      ].join("\n");
+      setWorkflowOutput(out);
+      setStatus(
+        `Workflow ${res.workflow_id} finished. exit_code=${res.exit_code}, duration=${formatNumber(res.duration_seconds, 2)}s`
+      );
+    } catch (err) {
+      setStatus(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  return React.createElement(
+    "div",
+    { className: "shell" },
+    React.createElement(
+      "header",
+      { className: "hero" },
+      React.createElement("h1", null, "Gov Workflow OpenEnv - React Console"),
+      React.createElement(
+        "p",
+        null,
+        "Shows OpenEnv API execution, baseline/inference workflow visibility, and trained Phase 2 RL model behavior from one screen."
+      )
+    ),
+    React.createElement("div", { className: "status" }, status),
+    React.createElement(
+      "section",
+      { className: "panel" },
+      React.createElement("h2", null, "Workflow Components Visibility"),
+      React.createElement(
+        "div",
+        { className: "grid cols-2" },
+        ...components.map((c) =>
+          React.createElement(
+            "article",
+            { key: c.component, className: "panel" },
+            React.createElement("h3", null, c.component),
+            React.createElement("div", { className: `badge ${c.available ? "ok" : ""}` }, c.available ? "available" : "missing"),
+            React.createElement("p", { className: "small" }, c.description),
+            c.command ? React.createElement("pre", null, c.command) : null,
+            workflowIdForComponent(c.component)
+              ? React.createElement(
+                  "div",
+                  { className: "btn-row", style: { marginTop: "8px" } },
+                  React.createElement(
+                    "button",
+                    {
+                      className: "secondary",
+                      onClick: () => runWorkflowFromUi(workflowIdForComponent(c.component)),
+                      disabled: loading,
+                    },
+                    "Run In Frontend"
+                  )
+                )
+              : null,
+            c.notes ? React.createElement("p", { className: "small" }, c.notes) : null
+          )
+        )
+      ),
+      workflowMeta
+        ? React.createElement(
+            "div",
+            { className: "small", style: { marginTop: "10px" } },
+            `Last run: ${workflowMeta.workflow_id} | exit=${workflowMeta.exit_code} | timeout=${workflowMeta.timed_out ? "true" : "false"} | duration=${formatNumber(workflowMeta.duration_seconds, 2)}s`
+          )
+        : null,
+      workflowOutput ? React.createElement("pre", { style: { marginTop: "10px" } }, workflowOutput) : null
+    ),
+    React.createElement(
+      "section",
+      { className: "panel" },
+      React.createElement("h2", null, "OpenEnv API Runner (step/reset/state/grade)"),
+      React.createElement(
+        "div",
+        { className: "form-row" },
+        React.createElement(
+          "label",
+          null,
+          "Task",
+          React.createElement(
+            "select",
+            { value: taskId, onChange: (e) => setTaskId(e.target.value) },
+            ...tasks.map((t) => React.createElement("option", { key: t, value: t }, t))
+          )
+        ),
+        React.createElement(
+          "label",
+          null,
+          "Seed (optional)",
+          React.createElement("input", {
+            value: manualSeed,
+            onChange: (e) => setManualSeed(e.target.value),
+            placeholder: "11",
+          })
+        ),
+        React.createElement(
+          "label",
+          null,
+          "Session ID",
+          React.createElement("input", {
+            value: sessionId,
+            onChange: (e) => setSessionId(e.target.value),
+            placeholder: "auto after reset",
+          })
+        )
+      ),
+      React.createElement(
+        "label",
+        { style: { marginTop: "10px" } },
+        "Action JSON for /step",
+        React.createElement("textarea", {
+          value: manualActionJson,
+          onChange: (e) => setManualActionJson(e.target.value),
+        })
+      ),
+      React.createElement(
+        "div",
+        { className: "btn-row", style: { marginTop: "10px" } },
+        React.createElement("button", { onClick: manualReset, disabled: loading }, "Reset"),
+        React.createElement("button", { onClick: manualStep, disabled: loading }, "Step"),
+        React.createElement("button", { onClick: manualState, disabled: loading }, "State"),
+        React.createElement("button", { onClick: manualGrade, disabled: loading }, "Grade"),
+      ),
+      React.createElement("pre", { style: { marginTop: "10px" } }, manualOutput)
+    ),
+    React.createElement(
+      "section",
+      { className: "panel" },
+      React.createElement("h2", null, "Baseline Agent Runner (backend policy)"),
+      React.createElement(
+        "div",
+        { className: "form-row" },
+        React.createElement(
+          "label",
+          null,
+          "Baseline Agent",
+          React.createElement(
+            "select",
+            { value: agentPolicy, onChange: (e) => setAgentPolicy(e.target.value) },
+            ...agents.map((a) => React.createElement("option", { key: a, value: a }, a))
+          )
+        ),
+        React.createElement(
+          "label",
+          null,
+          "Steps",
+          React.createElement("input", {
+            type: "number",
+            min: 1,
+            max: 500,
+            value: steps,
+            onChange: (e) => setSteps(e.target.value),
+          })
+        )
+      ),
+      React.createElement(
+        "div",
+        { className: "btn-row", style: { marginTop: "10px" } },
+        React.createElement("button", { onClick: runBaseline, disabled: loading }, "Run Baseline"),
+        React.createElement("button", { className: "secondary", onClick: resetBaselineSession, disabled: loading }, "Reset Session"),
+        React.createElement("button", { className: "secondary", onClick: runBenchmark, disabled: loading }, "Run Benchmark"),
+      ),
+      React.createElement(
+        "div",
+        { className: "kpis", style: { marginTop: "10px" } },
+        React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Total Reward"), React.createElement("div", { className: "v" }, formatNumber(baselineKpi.reward))),
+        React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Backlog"), React.createElement("div", { className: "v" }, baselineKpi.backlog)),
+        React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Completed"), React.createElement("div", { className: "v" }, baselineKpi.completed)),
+        React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "SLA Breaches"), React.createElement("div", { className: "v" }, baselineKpi.sla)),
+        React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Fairness Gap"), React.createElement("div", { className: "v" }, formatNumber(baselineKpi.fairness))),
+        React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Grader Score"), React.createElement("div", { className: "v" }, graderScore == null ? "-" : formatNumber(graderScore, 3))),
+      ),
+      React.createElement("div", { style: { marginTop: "10px" } }, React.createElement(LineCanvas, {
+        pointsA: baselineRewards,
+        pointsB: baselineBacklog,
+        labelA: "reward",
+        labelB: "backlog",
+      })),
+      React.createElement(
+        "div",
+        { className: "table-wrap", style: { marginTop: "10px" } },
+        React.createElement(
+          "table",
+          null,
+          React.createElement(
+            "thead",
+            null,
+            React.createElement(
+              "tr",
+              null,
+              React.createElement("th", null, "Step"),
+              React.createElement("th", null, "Day"),
+              React.createElement("th", null, "Action"),
+              React.createElement("th", null, "Reward"),
+              React.createElement("th", null, "Backlog"),
+              React.createElement("th", null, "Completed"),
+              React.createElement("th", null, "SLA"),
+              React.createElement("th", null, "Done")
+            )
+          ),
+          React.createElement(
+            "tbody",
+            null,
+            ...baselineTrace.map((r) =>
+              React.createElement(
+                "tr",
+                { key: `b-${r.step}` },
+                React.createElement("td", null, r.step),
+                React.createElement("td", null, r.day),
+                React.createElement("td", null, r.action),
+                React.createElement("td", null, formatNumber(r.reward)),
+                React.createElement("td", null, r.backlog),
+                React.createElement("td", null, r.completed),
+                React.createElement("td", null, r.sla_breaches),
+                React.createElement("td", null, r.done ? "true" : "false")
+              )
+            )
+          )
+        )
+      ),
+      benchmarkRows.length
+        ? React.createElement(
+            "div",
+            { className: "table-wrap", style: { marginTop: "10px" } },
+            React.createElement(
+              "table",
+              null,
+              React.createElement(
+                "thead",
+                null,
+                React.createElement(
+                  "tr",
+                  null,
+                  React.createElement("th", null, "Agent"),
+                  React.createElement("th", null, "Avg Score"),
+                  React.createElement("th", null, "Min"),
+                  React.createElement("th", null, "Max")
+                )
+              ),
+              React.createElement(
+                "tbody",
+                null,
+                ...benchmarkRows.map((r) =>
+                  React.createElement(
+                    "tr",
+                    { key: `bench-${r.agent_policy}` },
+                    React.createElement("td", null, r.agent_policy),
+                    React.createElement("td", null, formatNumber(r.average_score, 3)),
+                    React.createElement("td", null, formatNumber(r.min_score, 3)),
+                    React.createElement("td", null, formatNumber(r.max_score, 3))
+                  )
+                )
+              )
+            )
+          )
+        : null
+    ),
+    React.createElement(
+      "section",
+      { className: "panel" },
+      React.createElement("h2", null, "Trained RL Model (Phase 2 / Phase 3)"),
+      React.createElement(
+        "div",
+        { className: "form-row" },
+        React.createElement(
+          "label",
+          null,
+          "Model",
+          React.createElement(
+            "select",
+            {
+              value: modelPath,
+              onChange: (e) => {
+                const p = e.target.value;
+                setModelPath(p);
+                const hit = models.find((m) => m.path === p);
+                if (hit) setModelType(hit.model_type);
+              },
+            },
+            ...models.filter((m) => m.exists).map((m) =>
+              React.createElement("option", { key: m.path, value: m.path }, `${m.label}`)
+            )
+          )
+        ),
+        React.createElement(
+          "label",
+          null,
+          "Model Type",
+          React.createElement(
+            "select",
+            { value: modelType, onChange: (e) => setModelType(e.target.value) },
+            React.createElement("option", { value: "maskable" }, "maskable"),
+            React.createElement("option", { value: "recurrent" }, "recurrent")
+          )
+        ),
+        React.createElement(
+          "label",
+          null,
+          "Max Steps",
+          React.createElement("input", {
+            type: "number",
+            min: 1,
+            max: 1000,
+            value: rlMaxSteps,
+            onChange: (e) => setRlMaxSteps(e.target.value),
+          })
+        )
+      ),
+      React.createElement(
+        "div",
+        { className: "btn-row", style: { marginTop: "10px" } },
+        React.createElement("button", { onClick: runTrainedEpisode, disabled: loading }, "Run Trained Episode"),
+        React.createElement("button", { className: "secondary", onClick: evaluateTrainedModel, disabled: loading }, "Evaluate Model"),
+        React.createElement("button", { className: "secondary", onClick: compareBaselineVsPhase2, disabled: loading }, "Compare vs Baseline"),
+      ),
+      activeModel
+        ? React.createElement("p", { className: "small", style: { marginTop: "10px" } }, `Using: ${activeModel.path}`)
+        : null,
+      rlRun
+        ? React.createElement(
+            "div",
+            { style: { marginTop: "10px" } },
+            React.createElement(
+              "div",
+              { className: "kpis" },
+              React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Task"), React.createElement("div", { className: "v" }, rlRun.task_id)),
+              React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Seed"), React.createElement("div", { className: "v" }, rlRun.seed)),
+              React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Total Reward"), React.createElement("div", { className: "v" }, formatNumber(rlRun.total_reward))),
+              React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Grader Score"), React.createElement("div", { className: "v" }, formatNumber(rlRun.grader_score, 3))),
+            ),
+            React.createElement("div", { style: { marginTop: "10px" } }, React.createElement(LineCanvas, {
+              pointsA: (rlRun.trace || []).map((x) => Math.max(0, Number(x.reward || 0))),
+              pointsB: (rlRun.trace || []).map((x) => Number(x.backlog || 0)),
+              labelA: "rl reward",
+              labelB: "rl backlog",
+            })),
+            React.createElement(
+              "div",
+              { className: "table-wrap", style: { marginTop: "10px" } },
+              React.createElement(
+                "table",
+                null,
+                React.createElement(
+                  "thead",
+                  null,
+                  React.createElement(
+                    "tr",
+                    null,
+                    React.createElement("th", null, "Step"),
+                    React.createElement("th", null, "Action Index"),
+                    React.createElement("th", null, "Action"),
+                    React.createElement("th", null, "Reward"),
+                    React.createElement("th", null, "Backlog"),
+                    React.createElement("th", null, "Completed"),
+                    React.createElement("th", null, "SLA")
+                  )
+                ),
+                React.createElement(
+                  "tbody",
+                  null,
+                  ...(rlRun.trace || []).map((r) =>
+                    React.createElement(
+                      "tr",
+                      { key: `rl-${r.step}` },
+                      React.createElement("td", null, r.step),
+                      React.createElement("td", null, r.action_index),
+                      React.createElement("td", null, r.action_label),
+                      React.createElement("td", null, formatNumber(r.reward)),
+                      React.createElement("td", null, r.backlog),
+                      React.createElement("td", null, r.completed),
+                      React.createElement("td", null, r.sla_breaches)
+                    )
+                  )
+                )
+              )
+            )
+          )
+        : null,
+      rlEval.length
+        ? React.createElement(
+            "div",
+            { className: "table-wrap", style: { marginTop: "10px" } },
+            React.createElement(
+              "table",
+              null,
+              React.createElement(
+                "thead",
+                null,
+                React.createElement(
+                  "tr",
+                  null,
+                  React.createElement("th", null, "Task"),
+                  React.createElement("th", null, "Score"),
+                  React.createElement("th", null, "Reward"),
+                  React.createElement("th", null, "Completed"),
+                  React.createElement("th", null, "SLA Breaches")
+                )
+              ),
+              React.createElement(
+                "tbody",
+                null,
+                ...rlEval.map((r) =>
+                  React.createElement(
+                    "tr",
+                    { key: `eval-${r.task_id}` },
+                    React.createElement("td", null, r.task_id),
+                    React.createElement("td", null, formatNumber(r.grader_score, 3)),
+                    React.createElement("td", null, formatNumber(r.total_reward, 2)),
+                    React.createElement("td", null, r.total_completed),
+                    React.createElement("td", null, r.total_sla_breaches)
+                  )
+                )
+              )
+            )
+          )
+        : null,
+      React.createElement("div", { style: { marginTop: "12px" } }, React.createElement(CompareCanvas, {
+        baselineScore: compareData.baseline,
+        rlScore: compareData.rl,
+      }))
+    )
+  );
+}
+const rootEl = document.getElementById("app-root");
+const root = createRoot(rootEl);
+root.render(React.createElement(App));
+window.__APP_MOUNTED__ = true;

app/web/styles.css ADDED Viewed

	@@ -0,0 +1,256 @@

+:root {
+  --bg: #050505;
+  --panel: #0f0f0f;
+  --panel-2: #141414;
+  --line: #2b2b2b;
+  --text: #f4f4f4;
+  --muted: #b6b6b6;
+  --accent: #ffffff;
+}
+* {
+  box-sizing: border-box;
+}
+html,
+body {
+  margin: 0;
+  min-height: 100%;
+  background: radial-gradient(circle at 0% 0%, #1b1b1b 0%, #070707 40%, #000 100%);
+  color: var(--text);
+  font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
+}
+.app-root {
+  min-height: 100vh;
+}
+.boot {
+  width: min(1000px, 92vw);
+  margin: 32px auto;
+  padding: 16px;
+  border: 1px solid var(--line);
+  border-radius: 12px;
+  background: var(--panel);
+}
+.boot-error h2 {
+  margin-top: 0;
+}
+.shell {
+  width: min(1300px, 94vw);
+  margin: 0 auto;
+  padding: 20px;
+  display: grid;
+  gap: 14px;
+}
+.hero {
+  padding: 18px;
+  border-radius: 14px;
+  color: #000;
+  background: linear-gradient(130deg, #fff 0%, #d0d0d0 40%, #7b7b7b 100%);
+}
+.hero h1 {
+  margin: 0;
+  font-size: 28px;
+}
+.hero p {
+  margin: 8px 0 0;
+  font-weight: 600;
+}
+.grid {
+  display: grid;
+  gap: 12px;
+}
+.cols-3 {
+  grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+}
+.cols-2 {
+  grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+}
+.panel {
+  border: 1px solid var(--line);
+  border-radius: 12px;
+  background: var(--panel);
+  padding: 14px;
+}
+.panel h2 {
+  margin: 0 0 10px;
+  font-size: 18px;
+}
+.panel h3 {
+  margin: 0 0 8px;
+  font-size: 15px;
+}
+.form-row {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(170px, 1fr));
+  gap: 10px;
+}
+label {
+  display: grid;
+  gap: 6px;
+  font-size: 12px;
+  color: var(--muted);
+}
+input,
+select,
+textarea,
+button {
+  width: 100%;
+  border-radius: 8px;
+  border: 1px solid #3a3a3a;
+  background: var(--panel-2);
+  color: var(--text);
+  padding: 8px 10px;
+  font-size: 13px;
+}
+textarea {
+  min-height: 95px;
+  resize: vertical;
+}
+button {
+  cursor: pointer;
+  border: none;
+  background: var(--accent);
+  color: #000;
+  font-weight: 700;
+}
+button.secondary {
+  background: transparent;
+  border: 1px solid #505050;
+  color: var(--text);
+}
+button:disabled {
+  opacity: 0.55;
+  cursor: wait;
+}
+.btn-row {
+  display: flex;
+  gap: 8px;
+  flex-wrap: wrap;
+}
+.btn-row button {
+  width: auto;
+}
+.kpis {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+  gap: 10px;
+}
+.kpi {
+  border: 1px solid var(--line);
+  border-radius: 10px;
+  background: #0a0a0a;
+  padding: 10px;
+}
+.kpi .k {
+  color: var(--muted);
+  font-size: 12px;
+}
+.kpi .v {
+  margin-top: 5px;
+  font-size: 19px;
+  font-weight: 700;
+}
+.table-wrap {
+  overflow: auto;
+  border: 1px solid #222;
+  border-radius: 8px;
+}
+.table-wrap table {
+  border-collapse: collapse;
+  width: 100%;
+  font-size: 12px;
+}
+.table-wrap th,
+.table-wrap td {
+  border-bottom: 1px solid #202020;
+  text-align: left;
+  padding: 8px;
+  white-space: nowrap;
+}
+.table-wrap thead th {
+  background: #0c0c0c;
+  position: sticky;
+  top: 0;
+}
+canvas {
+  width: 100%;
+  border: 1px solid #292929;
+  border-radius: 8px;
+  background: #050505;
+}
+.status {
+  padding: 10px;
+  border-radius: 8px;
+  border: 1px solid #303030;
+  background: #0b0b0b;
+  color: var(--muted);
+  font-size: 12px;
+}
+.small {
+  font-size: 12px;
+  color: var(--muted);
+}
+pre {
+  margin: 0;
+  border: 1px solid #232323;
+  border-radius: 8px;
+  background: #080808;
+  padding: 10px;
+  max-height: 240px;
+  overflow: auto;
+  font-size: 12px;
+}
+.badge {
+  display: inline-block;
+  border: 1px solid #515151;
+  border-radius: 999px;
+  padding: 2px 8px;
+  font-size: 11px;
+  color: #dcdcdc;
+}
+.badge.ok {
+  border-color: #bdbdbd;
+  color: #ffffff;
+}
+@media (max-width: 680px) {
+  .hero h1 {
+    font-size: 23px;
+  }
+}

audit.py ADDED Viewed

	@@ -0,0 +1,367 @@

+import os
+import sys
+import json
+import inspect
+import requests
+import numpy as np
+import yaml
+import gymnasium as gym
+from stable_baselines3.common.env_checker import check_env
+from sb3_contrib import MaskablePPO
+def print_result(check_num, desc, status, detail=""):
+    print(f"[CHECK {check_num}] {desc}\nSTATUS: {status}\nDETAIL: {detail}\n")
+# B1
+try:
+    from app.models import (
+        ServiceType, StageType, PriorityMode, ActionType,
+        OfficerPool, QueueSnapshot, ObservationModel, ActionModel,
+        RewardModel, EpisodeStateModel, StepInfoModel,
+        SimulationConfig, TaskConfig, GraderResult,
+        BenchmarkResult, LiveRunResult, EpisodeMetrics
+    )
+    print_result("B1", "All 17 Schemas Present", "PASS", "All 17 names resolve")
+except Exception as e:
+    print_result("B1", "All 17 Schemas Present", "FAIL", str(e))
+# B2
+try:
+    fields = QueueSnapshot.model_fields
+    assert 'total_pending' in fields, "total_pending missing"
+    assert 'blocked_missing_docs' in fields, "blocked_missing_docs missing"
+    assert 'active_cases' not in fields, "legacy field active_cases found"
+    assert 'missing_docs_cases' not in fields, "legacy field found"
+    m_fields = EpisodeMetrics.model_fields
+    assert 'total_invalid_actions' in m_fields, "total_invalid_actions missing"
+    print_result("B2", "Canonical Field Name Verification", "PASS", "Fields verified")
+except Exception as e:
+    print_result("B2", "Canonical Field Name Verification", "FAIL", str(e))
+# B3
+try:
+    from app.simulator import SimulationAgentMode
+    assert hasattr(SimulationAgentMode, 'BASELINE_POLICY'), "BASELINE_POLICY missing"
+    assert hasattr(SimulationAgentMode, 'RANDOM'), "RANDOM missing"
+    assert hasattr(SimulationAgentMode, 'LLM_AGENT'), "LLM_AGENT missing"
+    assert hasattr(SimulationAgentMode, 'HEURISTIC'), "HEURISTIC missing"
+    try:
+        _ = SimulationAgentMode.baseline_policy
+        print_result("B3", "Enum Casing Check", "FAIL", "lowercase alias exists")
+    except AttributeError:
+        print_result("B3", "Enum Casing Check", "PASS", "No lowercase alias")
+except Exception as e:
+    print_result("B3", "Enum Casing Check", "FAIL", str(e))
+# C1
+try:
+    from app.env import GovWorkflowEnv
+    env = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, info = env.reset(seed=42)
+    assert isinstance(obs, dict), f"obs is {type(obs)}, expected dict"
+    assert isinstance(info, dict), f"info is {type(info)}, expected dict"
+    assert len(obs) > 0, "empty observation"
+    print_result("C1", "reset() Returns (observation, info)", "PASS", "Valid dicts returned")
+except Exception as e:
+    print_result("C1", "reset() Returns (observation, info)", "FAIL", str(e))
+# C2
+try:
+    from app.models import ActionModel, ActionType
+    action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+    result = env.step(action)
+    assert len(result) == 5, f"step() returned {len(result)} values, expected 5"
+    obs2, reward, terminated, truncated, info2 = result
+    assert isinstance(reward, float), f"reward type {type(reward)}"
+    assert isinstance(terminated, bool), "terminated not bool"
+    assert isinstance(truncated, bool), "truncated not bool"
+    print_result("C2", "step() Returns (obs, reward, terminated, truncated, info)", "PASS", "Valid step signature")
+except Exception as e:
+    print_result("C2", "step() Returns (obs, reward, terminated, truncated, info)", "FAIL", str(e))
+# C3 (Skipping dictionary check since MaskablePPO actually uses rl.gov_workflow_env for gym.Env spaces, doing that in J instead)
+# Wait, let's just check the wrapper.
+try:
+    from rl.gov_workflow_env import GovWorkflowGymEnv
+    genv = GovWorkflowGymEnv(task_id="district_backlog_easy", seed=42)
+    gobs, _ = genv.reset(seed=42)
+    def check_dtype(obs_dict, path="obs"):
+        for k, v in obs_dict.items():
+            if isinstance(v, np.ndarray):
+                assert v.dtype == np.float32 or v.dtype == np.int64, f"FAIL: {path}.{k} dtype={v.dtype}"
+            elif isinstance(v, dict):
+                check_dtype(v, f"{path}.{k}")
+    check_dtype(gobs)
+    print_result("C3", "Observation Space Dtype (SB3 Requirement)", "PASS", "Wrapper dict is fine")
+except Exception as e:
+    print_result("C3", "Observation Space Dtype (SB3 Requirement)", "FAIL", str(e))
+# C4
+try:
+    env1 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    env2 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs1, _ = env1.reset(seed=42)
+    obs2, _ = env2.reset(seed=42)
+    # Strip volatile message field before comparison (as in tests)
+    obs1.last_action_explanation = ""
+    obs2.last_action_explanation = ""
+    obs1.episode_id = ""
+    obs2.episode_id = ""
+    assert json.dumps(obs1.model_dump(), sort_keys=True, default=str) == json.dumps(obs2.model_dump(), sort_keys=True, default=str), "Different observations"
+    print_result("C4", "Determinism Check", "PASS", "Observations match")
+except Exception as e:
+    print_result("C4", "Determinism Check", "FAIL", str(e))
+# C5
+try:
+    env_c5 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, _ = env_c5.reset(seed=42)
+    terminated = False
+    truncated = False
+    steps = 0
+    max_steps = 500
+    while not (terminated or truncated) and steps < max_steps:
+        action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+        obs, reward, terminated, truncated, info = env_c5.step(action)
+        steps += 1
+    assert terminated or truncated, f"episode never ended after {max_steps} steps"
+    print_result("C5", "Episode Termination Check", "PASS", f"ended at step {steps}")
+except Exception as e:
+    print_result("C5", "Episode Termination Check", "FAIL", str(e))
+# D1
+try:
+    env_d1 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, _ = env_d1.reset(seed=42)
+    rewards = []
+    for _ in range(20):
+        action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+        obs, reward, term, trunc, info = env_d1.step(action)
+        rewards.append(reward)
+        if term or trunc: break
+    nonzero = sum(1 for r in rewards if abs(r) > 1e-6)
+    assert nonzero > len(rewards) * 0.5, f"Only {nonzero}/{len(rewards)} steps had nonzero reward"
+    print_result("D1", "Reward is Dense", "PASS", f"{nonzero}/{len(rewards)} steps nonzero")
+except Exception as e:
+    print_result("D1", "Reward is Dense", "FAIL", str(e))
+# D2
+try:
+    for r in rewards:
+        assert -100 <= r <= 100, f"reward {r} outside [-100, 100]"
+    print_result("D2", "Reward Range Sanity Check", "PASS", "Rewards in bounds")
+except Exception as e:
+    print_result("D2", "Reward Range Sanity Check", "FAIL", str(e))
+# D3
+try:
+    from app.models import ServiceType
+    env_d3 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, _ = env_d3.reset(seed=42)
+    # Using a valid enum but perhaps invalid context to cause penalty
+    # The framework doesn't allow 'nonexistent' string if it's an Enum, so let's use valid enum but no cases.
+    bad_action = ActionModel(action_type=ActionType.ESCALATE_SERVICE, service_target=ServiceType.PASSPORT)
+    obs, reward, term, trunc, info = env_d3.step(bad_action)
+    assert reward <= 0, f"invalid action produced positive reward {reward}"
+    print_result("D3", "Invalid Action Penalty Fires", "PASS", f"reward={reward:.3f}")
+except Exception as e:
+    print_result("D3", "Invalid Action Penalty Fires", "FAIL", str(e))
+# E1
+try:
+    from app.tasks import get_task
+    for task_id in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
+        cfg = get_task(task_id)
+        assert cfg.seed is not None, f"{task_id} has no seed"
+        assert cfg.max_days > 0, f"{task_id} max_days={cfg.max_days}"
+    print_result("E1", "All 3 Tasks Loadable", "PASS", "All config loaded")
+except Exception as e:
+    print_result("E1", "All 3 Tasks Loadable", "FAIL", str(e))
+# E2
+try:
+    from app.graders import grade_episode
+    for task_id in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
+        env_e2 = GovWorkflowEnv(task_id=task_id, seed=42)
+        obs, _ = env_e2.reset(seed=42)
+        terminated = truncated = False
+        while not (terminated or truncated):
+            obs, reward, terminated, truncated, info = env_e2.step(ActionModel(action_type=ActionType.ADVANCE_TIME))
+        episode_state = env_e2.state()
+        score_res = grade_episode(episode_state)
+        assert isinstance(score_res.score, float), f"grader returned {type(score_res.score)}"
+        assert 0.0 <= score_res.score <= 1.0, f"score={score_res.score} outside [0.0, 1.0]"
+    print_result("E2", "Graders Return [0.0, 1.0]", "PASS", "Valid scores returned")
+except Exception as e:
+    print_result("E2", "Graders Return [0.0, 1.0]", "FAIL", str(e))
+# E3
+try:
+    scores = []
+    for _ in range(2):
+        env_e3 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+        obs, _ = env_e3.reset(seed=42)
+        terminated = truncated = False
+        while not (terminated or truncated):
+            obs, r, terminated, truncated, info = env_e3.step(ActionModel(action_type=ActionType.ADVANCE_TIME))
+        scores.append(grade_episode(env_e3.state()).score)
+    assert scores[0] == scores[1], f"grader is non-deterministic: {scores}"
+    print_result("E3", "Grader Scores Are Deterministic", "PASS", f"score={scores[0]:.4f} both runs")
+except Exception as e:
+    print_result("E3", "Grader Scores Are Deterministic", "FAIL", str(e))
+# F1
+try:
+    from app.state_machine import StateMachine, StageType, WorkflowAction
+    sm = StateMachine()
+    stages = [StageType.SUBMISSION, StageType.DOCUMENT_VERIFICATION, StageType.FIELD_VERIFICATION, StageType.APPROVAL, StageType.ISSUANCE]
+    for i in range(len(stages) - 1):
+        current = stages[i]
+        next_stage = stages[i + 1]
+        result = sm.transition(current, WorkflowAction.ADVANCE)
+        assert result == next_stage, f"{current} -> {result}, expected {next_stage}"
+    print_result("F1", "All Legal Transitions Work", "PASS", "Transitions validated")
+except Exception as e:
+    print_result("F1", "All Legal Transitions Work", "FAIL", str(e))
+# F2
+try:
+    assert sm.is_terminal(StageType.ISSUANCE) == True, "issuance not recognized as terminal"
+    assert sm.is_terminal(StageType.SUBMISSION) == False, "submission wrongly marked terminal"
+    print_result("F2", "Terminal State Recognized", "PASS", "Terminal states correct")
+except Exception as e:
+    print_result("F2", "Terminal State Recognized", "FAIL", str(e))
+# G1
+try:
+    import app.simulator as sim_module
+    source = inspect.getfile(sim_module.LiveSimulationSession)
+    assert 'engine' in source.lower(), f"LiveSimulationSession defined in {source}, not engine.py"
+    print_result("G1", "simulator.py Is a Pure Shim", "PASS", "Shim logic confirmed")
+except Exception as e:
+    print_result("G1", "simulator.py Is a Pure Shim", "FAIL", str(e))
+# G2
+try:
+    from app.simulator import LiveSimulationSession, SimulationAgentMode, run_simulation
+    assert callable(run_simulation), "run_simulation not callable"
+    assert callable(LiveSimulationSession), "LiveSimulationSession not callable"
+    print_result("G2", "All 3 Engine Exports Importable", "PASS", "Exports valid")
+except Exception as e:
+    print_result("G2", "All 3 Engine Exports Importable", "FAIL", str(e))
+# G3
+try:
+    session = LiveSimulationSession(
+        task_id="district_backlog_easy",
+        agent_mode=SimulationAgentMode.BASELINE_POLICY,
+        seed=42,
+        max_steps=10
+    )
+    start_info = session.start_line()
+    assert isinstance(start_info, str), "start_line() did not return str"
+    step_result, _, _ = session.step_once()
+    assert "observation" in step_result, "step_once missing 'observation'"
+    assert "reward" in step_result, "step_once missing 'reward'"
+    print_result("G3", "LiveSimulationSession Full Lifecycle", "PASS", "Lifecycle valid")
+    session.close()
+except Exception as e:
+    print_result("G3", "LiveSimulationSession Full Lifecycle", "FAIL", str(e))
+# H2 / H3
+# We will do H checks via curl/pytest in bash to test the live server.
+# I1
+try:
+    from app.baselines import (
+        random_policy,
+        backlog_clearance_policy as baseline_policy,
+        greedy_sla_policy,
+        fairness_aware_policy,
+    )
+    for name, fn in [
+        ("random_policy", random_policy),
+        ("baseline_policy", baseline_policy),
+        ("greedy_sla_policy", greedy_sla_policy),
+        ("fairness_aware_policy", fairness_aware_policy),
+    ]:
+        assert callable(fn), f"{name} is not callable"
+    print_result("I1", "All 4 Policies Are Callable", "PASS", "Policies callable")
+except Exception as e:
+    print_result("I1", "All 4 Policies Are Callable", "FAIL", str(e))
+# I2
+try:
+    from app.baselines import greedy_sla_policy
+    env_i2 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs_i2, _ = env_i2.reset(seed=42)
+    action_i2 = greedy_sla_policy(obs_i2)
+    assert isinstance(action_i2, ActionModel), f"policy returned {type(action_i2)}"
+    print_result("I2", "Policy Returns Valid Action", "PASS", f"action_type={action_i2.action_type}")
+except Exception as e:
+    print_result("I2", "Policy Returns Valid Action", "FAIL", str(e))
+# J1
+try:
+    env_j1 = GovWorkflowGymEnv(task_id="district_backlog_easy", seed=42)
+    assert hasattr(env_j1, 'observation_space'), "no observation_space"
+    assert hasattr(env_j1, 'action_space'), "no action_space"
+    print_result("J1", "Gymnasium API Compliance", "PASS", "Spaces defined")
+except Exception as e:
+    print_result("J1", "Gymnasium API Compliance", "FAIL", str(e))
+# J2
+try:
+    obs, _ = env_j1.reset(seed=42)
+    assert hasattr(env_j1, 'action_masks'), "action_masks() method missing"
+    masks = env_j1.action_masks()
+    assert hasattr(masks, '__len__'), "action_masks() must return array-like"
+    assert len(masks) == env_j1.action_space.n, f"mask length {len(masks)} != action_space.n {env_j1.action_space.n}"
+    print_result("J2", "action_masks() Method Required by MaskablePPO", "PASS", f"n={len(masks)}")
+except Exception as e:
+    print_result("J2", "action_masks() Method Required by MaskablePPO", "FAIL", str(e))
+# J3
+try:
+    check_env(env_j1, warn=True)
+    print_result("J3", "SB3 VecEnv Compatibility", "PASS", "check_env passed")
+except Exception as e:
+    print_result("J3", "SB3 VecEnv Compatibility", "FAIL", str(e))
+# J4
+try:
+    model = MaskablePPO("MlpPolicy", env_j1, verbose=0, seed=42)
+    print_result("J4", "MaskablePPO Can Initialize", "PASS", "Model initialized")
+except Exception as e:
+    print_result("J4", "MaskablePPO Can Initialize", "FAIL", str(e))
+# J5
+try:
+    obs, _ = env_j1.reset(seed=42)
+    for step in range(10):
+        masks = env_j1.action_masks()
+        valid_actions = [i for i, m in enumerate(masks) if m]
+        action = valid_actions[0] if valid_actions else 0
+        obs, reward, terminated, truncated, info = env_j1.step(action)
+        if terminated or truncated:
+            obs, _ = env_j1.reset(seed=42)
+    print_result("J5", "10-Step Rollout Without Crash", "PASS", "Rollout passed")
+except Exception as e:
+    print_result("J5", "10-Step Rollout Without Crash", "FAIL", str(e))
+# M1
+try:
+    with open("openenv.yaml", "r") as f:
+        config = yaml.safe_load(f)
+    assert "tasks" in config, "openenv.yaml missing 'tasks' key"
+    task_ids = [t["id"] for t in config["tasks"]]
+    for required in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
+        assert required in task_ids, f"{required} missing from openenv.yaml"
+    print_result("M1", "YAML Loads and Contains All 3 Tasks", "PASS", f"{len(task_ids)} tasks registered")
+except Exception as e:
+    print_result("M1", "YAML Loads and Contains All 3 Tasks", "FAIL", str(e))

baseline_openai.py ADDED Viewed

	@@ -0,0 +1,983 @@

+from __future__ import annotations
+# ── Path bootstrap ──────────────────────────────────────────────────────────
+import sys
+from pathlib import Path
+_ROOT = Path(__file__).resolve().parent
+if str(_ROOT) not in sys.path:
+    sys.path.insert(0, str(_ROOT))
+# ── Load .env ────────────────────────────────────────────────────────────────
+from dotenv import load_dotenv
+load_dotenv(dotenv_path=_ROOT / ".env", override=False)
+import argparse
+import json
+import os
+import random as _random
+import re
+import time
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from typing import Any
+from app.env import GovWorkflowEnv
+from app.models import (
+    ActionModel,
+    ActionType,
+    ObservationModel,
+    PriorityMode,
+    ServiceType,
+    StepInfoModel,
+)
+from app.tasks import get_task, list_tasks
+from app.api_gateway import create_env_gateway, TransportMode
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 1 — Model Registry & Per-Task Pools
+# ══════════════════════════════════════════════════════════════════════════════
+NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1"
+# ── Global 10-Model Sequential Pool (April 2026 — Verified on NVIDIA NIM) ────
+#
+# CHANGES FROM PREVIOUS VERSION:
+#   REMOVED (invalid/unavailable IDs):
+#     qwen/qwen3-next-80b-a3b-instruct     → invalid model ID
+#     moonshotai/kimi-k2-instruct-0905     → not on NVIDIA NIM
+#     deepseek-ai/deepseek-v3.2            → wrong ID (use deepseek-v3)
+#     google/gemma-3-27b-it               → outdated (gemma-4 released)
+#     mistralai/mixtral-8x22b-instruct-v0.1 → replaced by newer models
+#   ADDED (verified April 2026):
+#     deepseek-ai/deepseek-v4-flash        → FREE endpoint, 1M context
+#     deepseek-ai/deepseek-r1             → reasoning, 685B MoE
+#     nvidia/nemotron-3-super-120b-a12b   → hybrid Mamba-Transformer, 1M ctx
+#     minimaxai/minimax-m2.7             → FREE endpoint, 230B
+#     google/gemma-4-31b-it             → latest Gemma on NVIDIA NIM
+#     qwen/qwen3.5-122b-a10b            → latest Qwen on NVIDIA NIM
+GLOBAL_MODEL_POOL: list[str] = [
+    "meta/llama-3.3-70b-instruct",          # 1. Primary
+    "deepseek-ai/deepseek-v4-flash",         # 2. FREE endpoint — 1M context
+    "deepseek-ai/deepseek-r1",              # 3. Reasoning — 685B MoE
+    "nvidia/nemotron-3-super-120b-a12b",    # 4. NVIDIA native — 1M ctx
+    "qwen/qwen3.5-122b-a10b",              # 5. Qwen3.5 — tool calling
+    "deepseek-ai/deepseek-v3",             # 6. DeepSeek V3 — hybrid mode
+    "minimaxai/minimax-m2.7",             # 7. FREE endpoint — 230B
+    "google/gemma-4-31b-it",             # 8. Dense 31B — agentic workflows
+    "microsoft/phi-4-mini-instruct",     # 9. Reliable small — last resort
+    "meta/llama-3.1-8b-instruct",       # 10. Fastest safety fallback
+]
+# ── Free endpoint pool (KEY 2 — NVIDIA_API_KEY_2 fallback) ───────────────────
+FREE_POOL: list[str] = [
+    "deepseek-ai/deepseek-v4-flash",
+    "minimaxai/minimax-m2.7",
+    "microsoft/phi-4-mini-instruct",
+    "meta/llama-3.1-8b-instruct",
+]
+# ── Fixed seeds ────────────────────────────────────────────────────────────────
+TASK_SEEDS: dict[str, int] = {
+    "district_backlog_easy": 11,
+    "mixed_urgency_medium":  22,
+    "cross_department_hard": 33,
+}
+LLM_TEMPERATURE = 0.2
+LLM_TOP_P       = 0.7
+LLM_MAX_TOKENS  = 512
+MAX_LLM_STEPS   = 80
+LLM_CALL_DELAY  = float(os.environ.get("LLM_CALL_DELAY", "12.0"))
+LLM_CALL_JITTER = 1.0
+# ── Enum fields that MUST be lowercase for Pydantic StrEnum ──────────────────
+_ENUM_FIELDS = {"action_type", "priority_mode", "service", "target_service"}
+# ── Canonical field names (Phase 2 update — do NOT use legacy names) ─────────
+#   CORRECT                        WRONG (legacy)
+#   snap.blocked_missing_docs  ←   snap.missing_docs_cases
+#   snap.total_pending         ←   snap.active_cases
+#   obs.fairness_gap           ←   obs.fairness_index
+# ═══════════════════════════════════════════════════════════════���══════════════
+# SECTION 2 — Model Rotator
+# ══════════════════════════════════════════════════════════════════════════════
+class ModelRotator:
+    def __init__(self, task_id: str) -> None:
+        self._sequence: list[str] = GLOBAL_MODEL_POOL.copy()
+        self._index = 0
+        self._task_id = task_id
+        self._rotation_log: list[dict[str, str]] = []
+    @property
+    def current(self) -> str:
+        return self._sequence[self._index]
+    @property
+    def current_key_id(self) -> int:
+        return 2 if self.current in FREE_POOL else 1
+    @property
+    def pool_exhausted(self) -> bool:
+        return len(self._rotation_log) >= 50
+    def rotate(self, reason: str = "error") -> str | None:
+        old = self.current
+        self._rotation_log.append({"from": old, "reason": reason})
+        self._index = (self._index + 1) % len(self._sequence)
+        new = self._sequence[self._index]
+        print(
+            f"\n  🔄 Model rotated: "
+            f"{old.split('/')[-1]}  →  {new.split('/')[-1]}  ({reason})"
+        )
+        return new
+    def summary(self) -> list[dict]:
+        return list(self._rotation_log)
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 3 — Result Dataclasses
+# ══════════════════════════════════════════════════════════════════════════════
+@dataclass
+class StepRecord:
+    step: int
+    day: int
+    action_type: str
+    reward: float
+    invalid: bool
+    total_backlog: int
+    total_completed: int
+    model_used: str
+    notes: list[str]
+@dataclass
+class EpisodeResult:
+    task_id: str
+    agent: str
+    primary_model: str
+    seed: int
+    score: float
+    grader_name: str
+    total_steps: int
+    total_reward: float
+    total_completed: int
+    total_sla_breaches: int
+    total_invalid_actions: int
+    final_day: int
+    terminated: bool
+    truncated: bool
+    grader_metrics: dict[str, float]
+    step_log: list[StepRecord]
+    elapsed_seconds: float
+    model_rotations: list[dict]
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    def summary(self) -> str:
+        usage: dict[str, int] = {}
+        for r in self.step_log:
+            usage[r.model_used] = usage.get(r.model_used, 0) + 1
+        usage_str = ", ".join(
+            f"{m.split('/')[-1]} ({c})" for m, c in usage.items()
+        )
+        return (
+            f"[{self.task_id}] agent={self.agent} "
+            f"score={self.score:.3f} reward={self.total_reward:.2f} "
+            f"completed={self.total_completed} breaches={self.total_sla_breaches} "
+            f"invalid={self.total_invalid_actions} "
+            f"rotations={len(self.model_rotations)} "
+            f"day={self.final_day} steps={self.total_steps} "
+            f"time={self.elapsed_seconds:.1f}s\n"
+            f"  Model usage: {usage_str}"
+        )
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 4 — Direct Environment Wrapper
+# ══════════════════════════════════════════════════════════════════════════════
+class DirectEnvClient:
+    """
+    FIX: grade() now calls grade_episode(task_id, episode_state) correctly.
+    Previous version called grade_episode(self.env.state()) — wrong signature.
+    get_episode_state() returns EpisodeStateModel, not ObservationModel.
+    """
+    def __init__(self, task_id: str, seed: int) -> None:
+        self.env = GovWorkflowEnv(task_id=task_id)
+        self._seed = seed
+        self._task_id = task_id
+        self.terminated = False
+        self.truncated = False
+    def reset(self) -> ObservationModel:
+        obs, _ = self.env.reset(seed=self._seed)
+        self.terminated = False
+        self.truncated = False
+        return obs
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        obs, reward, terminated, truncated, info = self.env.step(action)
+        self.terminated = terminated
+        self.truncated = truncated
+        return obs, reward, terminated, truncated, info
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        from app.graders import grade_episode
+        episode_state = self.env.state()
+        result = grade_episode(episode_state)
+        return result.score, result.grader_name, result.metrics
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 5 — HTTP Environment Wrapper
+# ══════════════════════════════════════════════════════════════════════════════
+class HttpEnvClient:
+    def __init__(
+        self, task_id: str, seed: int, base_url: str = "http://localhost:7860"
+    ) -> None:
+        try:
+            import requests as _req
+            self._req = _req
+        except ImportError:
+            raise ImportError("pip install requests  — required for --mode http")
+        self._task_id = task_id
+        self._seed = seed
+        self._base_url = base_url.rstrip("/")
+        self._session_id: str | None = None
+        self.terminated = False
+        self.truncated = False
+    def _post(self, path: str, body: dict) -> dict:
+        r = self._req.post(
+            f"{self._base_url}{path}", json=body, timeout=30
+        )
+        r.raise_for_status()
+        return r.json()
+    def reset(self) -> ObservationModel:
+        data = self._post("/reset", {"task_id": self._task_id, "seed": self._seed})
+        self._session_id = data["session_id"]
+        self.terminated = False
+        self.truncated = False
+        return ObservationModel(**data["observation"])
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        data = self._post("/step", {
+            "session_id": self._session_id,
+            "action": action.model_dump(exclude_none=True),
+        })
+        obs  = ObservationModel(**data["observation"])
+        info = StepInfoModel(**data["info"])
+        self.terminated = data["terminated"]
+        self.truncated  = data["truncated"]
+        return obs, data["reward"], data["terminated"], data["truncated"], info
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        data = self._post("/grade", {"session_id": self._session_id})
+        return data["score"], data["grader_name"], data["metrics"]
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 6 — Heuristic Baseline Agent
+# ══════════════════════════════════════════════════════════════════════════════
+class HeuristicAgent:
+    """
+    Rule-based agent. Requires no API key.
+    FIXED field names (Phase 2 canonical):
+      snap.blocked_missing_docs  ← was snap.missing_docs_cases
+      snap.total_pending         ← was snap.active_cases
+    """
+    def __init__(self) -> None:
+        self._priority_set = False
+        self._admin_action_day: int | None = None
+        self._last_doc_request_day: int | None = None
+    def reset(self) -> None:
+        self._priority_set = False
+        self._admin_action_day = None
+        self._last_doc_request_day = None
+    current_model = "heuristic"
+    def rotation_summary(self) -> list[dict]:
+        return []
+    def update_reward(self, _: float) -> None:
+        pass
+    @staticmethod
+    def _svc_key(service: str | ServiceType) -> str:
+        return service.value if isinstance(service, ServiceType) else str(service)
+    def act(self, obs: ObservationModel) -> ActionModel:
+        snapshots = list(obs.queue_snapshots.values())
+        # One admin action per simulated day; then always advance time.
+        if self._admin_action_day == obs.day:
+            return ActionModel(action_type=ActionType.ADVANCE_TIME)
+        # 1. Set priority mode once
+        if not self._priority_set:
+            self._priority_set = True
+            self._admin_action_day = obs.day
+            return ActionModel(
+                action_type=ActionType.SET_PRIORITY_MODE,
+                priority_mode=PriorityMode.URGENT_FIRST,
+            )
+        # 2. Allocate any idle officer to the currently most loaded service.
+        if obs.officer_pool.idle_officers > 0 and snapshots:
+            most_loaded = max(snapshots, key=lambda s: s.total_pending)
+            self._admin_action_day = obs.day
+            return ActionModel(
+                action_type=ActionType.ASSIGN_CAPACITY,
+                capacity_assignment={most_loaded.service_type.value: 1},
+            )
+        days_left = obs.max_days - obs.day
+        # 3. Reallocate one officer if load/officer ratio is clearly imbalanced.
+        allocated = {
+            self._svc_key(svc): int(off)
+            for svc, off in obs.officer_pool.allocated.items()
+        }
+        if snapshots and len(allocated) >= 2:
+            case_counts = {s.service_type.value: s.total_pending for s in snapshots}
+            best_src: tuple[str, int] | None = None
+            best_tgt: tuple[str, int] | None = None
+            src_ratio = float("inf")
+            tgt_ratio = -1.0
+            for svc, officers in allocated.items():
+                if officers <= 1:
+                    continue
+                ratio = case_counts.get(svc, 0) / max(officers, 1)
+                if ratio < src_ratio:
+                    src_ratio = ratio
+                    best_src = (svc, officers)
+            for svc, officers in allocated.items():
+                ratio = case_counts.get(svc, 0) / max(officers, 1)
+                if ratio > tgt_ratio:
+                    tgt_ratio = ratio
+                    best_tgt = (svc, officers)
+            if best_src and best_tgt and best_src[0] != best_tgt[0] and tgt_ratio > src_ratio * 1.8:
+                self._admin_action_day = obs.day
+                return ActionModel(
+                    action_type=ActionType.REALLOCATE_OFFICERS,
+                    reallocation_delta={best_src[0]: -1, best_tgt[0]: 1},
+                )
+        # 4. Request missing docs conservatively to avoid repeatedly resetting
+        # resolution days for already-requested cases.
+        can_request_docs = (
+            any(s.blocked_missing_docs > 0 for s in snapshots)
+            and (
+                self._last_doc_request_day is None
+                or (obs.day - self._last_doc_request_day) >= 3
+                or obs.pending_doc_resolutions == 0
+            )
+        )
+        if can_request_docs:
+            target_docs = max(
+                snapshots,
+                key=lambda s: (s.blocked_missing_docs, s.current_sla_risk, s.total_pending),
+            )
+            if target_docs.blocked_missing_docs > 0:
+                self._admin_action_day = obs.day
+                self._last_doc_request_day = obs.day
+                return ActionModel(
+                    action_type=ActionType.REQUEST_MISSING_DOCUMENTS,
+                    service_target=target_docs.service_type,
+                )
+        # 5. Escalate in the final window when urgency is present.
+        if obs.escalation_budget_remaining > 0:
+            urgent_snaps = [s for s in snapshots if s.urgent_pending > 0]
+            if urgent_snaps and days_left <= 5:
+                target = max(urgent_snaps, key=lambda s: s.urgent_pending)
+                self._admin_action_day = obs.day
+                return ActionModel(
+                    action_type=ActionType.ESCALATE_SERVICE,
+                    escalation_target=target.service_type,
+                )
+        # 6. Default — progress simulation.
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 7 — System Prompt
+# ══════════════════════════════════════════════════════════════════════════════
+SYSTEM_PROMPT = """You are an expert government-office workflow manager AI.
+Your job is to control a simulated government district office processing citizen
+applications across multiple services.
+SERVICES: passport, driving_license, gst_registration, income_certificate,
+          caste_certificate, birth_certificate, land_registration
+WORKFLOW STAGES (in order):
+  submission → document_verification → field_verification → approval → issuance
+YOUR GOAL: Maximise the episode score (0.0 to 1.0) by:
+  - Completing as many applications as possible within SLA deadlines
+  - Prioritising urgent cases (urgency level 3 > 2 > 1)
+  - Keeping all services fairly served (no service left behind)
+  - Using escalations sparingly — only when a case is about to breach SLA
+  - Keeping officers productively busy (not idle)
+QUEUE STATUS FIELDS EXPLAINED:
+  backlog      = total_pending applications in queue
+  missing_docs = blocked_missing_docs (stuck waiting for documents)
+  urgent       = urgent_cases (high-urgency applications)
+  breached     = breached_cases (already past SLA deadline)
+AVAILABLE ACTIONS — return exactly ONE per turn as JSON:
+1. Set queue processing order (do this FIRST on day 0 only):
+   {"action_type": "set_priority_mode", "priority_mode": "urgent_first"}
+   priority_mode options: urgent_first | oldest_first | balanced | backlog_clearance
+2. Deploy a reserve officer to a service (day 0 only if reserves available):
+   {"action_type": "assign_capacity", "service": "driving_license", "officer_delta": 1}
+3. Unblock a stuck application with missing documents:
+   {"action_type": "request_missing_documents", "service": "driving_license"}
+4. Escalate one case to emergency priority (VERY LIMITED — use wisely):
+   {"action_type": "escalate_service", "service": "income_certificate"}
+5. Move officer between services (only when load ratio > 4x):
+   {"action_type": "reallocate_officers", "service": "birth_certificate",
+    "target_service": "driving_license", "officer_delta": 1}
+6. Let one working day pass — THE ONLY ACTION THAT PROCESSES APPLICATIONS:
+   {"action_type": "advance_time"}
+CRITICAL RULES:
+  - ALL values MUST be lowercase: driving_license NOT DRIVING_LICENSE
+  - advance_time is the ONLY action that earns progress reward
+  - Do NOT chain more than 2 admin actions before calling advance_time
+  - Do NOT escalate before (max_days - 5) unless case already breached SLA
+  - Do NOT reallocate if source service has fewer than 2 officers
+OPTIMAL STRATEGY:
+  Day 0:     set_priority_mode → assign_capacity (if reserves > 0) → advance_time
+  Every day: request_missing_documents (ONE service, highest missing_docs) → advance_time
+  Final 5:   escalate_service (urgent/breached only) → advance_time
+RESPONSE FORMAT — return ONLY a raw JSON object, nothing else:
+  CORRECT:   {"action_type": "advance_time"}
+  CORRECT:   {"action_type": "request_missing_documents", "service": "driving_license"}
+  WRONG:     ```json\n{"action_type": "ADVANCE_TIME"}```
+"""
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 8 — JSON Extraction with Lowercase Normaliser
+# ══════════════════════════════════════════════════════════════════════════════
+def _extract_json_action(raw: str) -> dict[str, Any]:
+    cleaned = re.sub(r"```(?:json)?", "", raw).strip()
+    parsed: dict[str, Any] | None = None
+    try:
+        parsed = json.loads(cleaned)
+    except json.JSONDecodeError:
+        pass
+    if parsed is None:
+        match = re.search(r"\{[^{}]*\}", cleaned, re.DOTALL)
+        if match:
+            try:
+                parsed = json.loads(match.group())
+            except json.JSONDecodeError:
+                pass
+    if parsed is None:
+        print(f"  ⚠ JSON parse failed, falling back to advance_time. Raw: {raw[:120]!r}")
+        return {"action_type": "advance_time"}
+    for enum_field in _ENUM_FIELDS:
+        if enum_field in parsed and isinstance(parsed[enum_field], str):
+            parsed[enum_field] = parsed[enum_field].lower()
+    return parsed
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 9 — Observation → User Message Builder
+# ══════════════════════════════════════════════════════════════════════════════
+def _build_user_message(
+    obs: ObservationModel, step_num: int, cumulative_reward: float
+) -> str:
+    """
+    FIXED field names (Phase 2 canonical):
+      snap.total_pending        ← was snap.active_cases
+      snap.blocked_missing_docs ← was snap.missing_docs_cases
+    """
+    queue_lines = []
+    for snap in obs.queue_snapshots:
+        officers = obs.officer_pool.allocations.get(snap.service, 0)
+        queue_lines.append(
+            f"  {snap.service:<22}: "
+            f"backlog={snap.total_pending:>3} "
+            f"officers={officers} "
+            f"missing_docs={snap.blocked_missing_docs:>2} "
+            f"urgent={snap.urgent_cases} "
+            f"breached={snap.breached_cases} "
+            f"avg_age={snap.avg_age_days:.1f}d"
+        )
+    return (
+        f"STEP {step_num} | Day {obs.day}/{obs.max_days} "
+        f"| Days remaining: {obs.max_days - obs.day}\n"
+        f"Cumulative reward: {cumulative_reward:.2f}\n"
+        f"Priority mode: {obs.priority_mode}\n"
+        f"Reserve officers: {obs.officer_pool.reserve_officers}\n"
+        f"Escalation budget remaining: {obs.escalation_budget_remaining}\n"
+        f"Total pending: {obs.total_backlog} "
+        f"| Completed: {obs.total_completed} "
+        f"| SLA breaches: {obs.total_sla_breaches}\n"
+        f"Fairness gap: {obs.fairness_gap:.3f}\n\n"
+        f"QUEUE STATUS:\n" + "\n".join(queue_lines) + "\n\n"
+        f"Return a single JSON action object. All values lowercase."
+    )
+# ════════════════════════════════════════════════════════════════��═════════════
+# SECTION 10 — LLM Agent with Model Rotation
+# ══════════════════════════════════════════════════════════════════════════════
+class LLMAgent:
+    def __init__(
+        self,
+        task_id: str,
+        model_override: str | None = None,
+        api_key: str | None = None,
+    ) -> None:
+        try:
+            from openai import OpenAI
+            self._OpenAI = OpenAI
+        except ImportError:
+            raise ImportError("pip install openai  — required for LLM agent")
+        resolved_key = api_key or os.environ.get("NVIDIA_API_KEY", "")
+        self._api_key_2 = os.environ.get("NVIDIA_API_KEY_2", "")
+        if not resolved_key:
+            raise ValueError(
+                "NVIDIA_API_KEY not set.\n"
+                "  .env file : NVIDIA_API_KEY=nvapi-xxxxxxxxxxxx\n"
+                "  Get free key: https://build.nvidia.com/explore/discover"
+            )
+        self._api_key = resolved_key
+        self._task_id = task_id
+        self._rotator = ModelRotator(task_id)
+        if model_override:
+            seq = [model_override] + [
+                m for m in self._rotator._sequence if m != model_override
+            ]
+            self._rotator._sequence = seq
+        self._client = self._OpenAI(base_url=NVIDIA_BASE_URL, api_key=self._api_key)
+        self._client_2 = (
+            self._OpenAI(base_url=NVIDIA_BASE_URL, api_key=self._api_key_2)
+            if self._api_key_2 else None
+        )
+        self._history: list[dict[str, str]] = []
+        self._cumulative_reward = 0.0
+    @property
+    def current_model(self) -> str:
+        return self._rotator.current
+    def reset(self) -> None:
+        self._history = []
+        self._cumulative_reward = 0.0
+        self._rotator = ModelRotator(self._task_id)
+    def update_reward(self, reward: float) -> None:
+        self._cumulative_reward += reward
+    def rotation_summary(self) -> list[dict]:
+        return self._rotator.summary()
+    def act(self, obs: ObservationModel, step_num: int) -> ActionModel:
+        if self._rotator.pool_exhausted:
+            print("  ⚠ Pool exhausted — returning advance_time")
+            return ActionModel(action_type=ActionType.ADVANCE_TIME)
+        user_message = _build_user_message(obs, step_num, self._cumulative_reward)
+        self._history.append({"role": "user", "content": user_message})
+        if len(self._history) > 20:
+            self._history = self._history[-20:]
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}] + self._history
+        raw_reply = ""
+        while True:
+            try:
+                active_client = self._client
+                if self._rotator.current_key_id == 2 and self._client_2:
+                    active_client = self._client_2
+                response = active_client.chat.completions.create(
+                    model=self._rotator.current,
+                    messages=messages,
+                    temperature=LLM_TEMPERATURE,
+                    top_p=LLM_TOP_P,
+                    max_tokens=LLM_MAX_TOKENS,
+                    timeout=30,
+                )
+                raw_reply = response.choices.message.content or ""
+                break
+            except KeyboardInterrupt:
+                raise
+            except Exception as exc:
+                err_name = type(exc).__name__
+                err_msg  = str(exc)[:120]
+                print(f"  ⚠ {err_name} on {self._rotator.current.split('/')[-1]}: {err_msg}")
+                self._rotator.rotate(reason=err_name)
+                time.sleep(1.0)
+                if self._rotator.pool_exhausted:
+                    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+        self._history.append({"role": "assistant", "content": raw_reply})
+        action_dict = _extract_json_action(raw_reply)
+        try:
+            return ActionModel(**action_dict)
+        except Exception as exc:
+            print(f"  ⚠ ActionModel parse failed ({exc}), using advance_time")
+            return ActionModel(action_type=ActionType.ADVANCE_TIME)
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 11 — Episode Runner
+# ══════════════════════════════════════════════════════════════════════════════
+def run_episode(
+    task_id: str,
+    agent_type: str,
+    model_override: str | None,
+    mode: TransportMode,
+    server_url: str,
+    api_key: str | None,
+    verbose: bool,
+    max_steps: int = MAX_LLM_STEPS,
+    delay_override: float | None = None,
+) -> EpisodeResult:
+    seed  = TASK_SEEDS.get(task_id, get_task(task_id).seed)
+    delay = delay_override if delay_override is not None else LLM_CALL_DELAY
+    force_fastapi = os.getenv("FORCE_FASTAPI_GATEWAY", "0").strip().lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+    env_api_prefix = os.getenv("OPENENV_ENV_API_PREFIX", "").strip()
+    client = create_env_gateway(
+        task_id=task_id,
+        seed=seed,
+        mode=mode,  # type: ignore[arg-type]
+        base_url=server_url,
+        api_prefix=env_api_prefix,
+        enforce_fastapi=force_fastapi,
+    )
+    if agent_type == "llm":
+        agent: HeuristicAgent | LLMAgent = LLMAgent(
+            task_id=task_id,
+            model_override=model_override,
+            api_key=api_key,
+        )
+        primary_label = agent.current_model
+    else:
+        agent = HeuristicAgent()
+        primary_label = "heuristic"
+    agent.reset()
+    obs = client.reset()
+    step_log: list[StepRecord] = []
+    total_reward = 0.0
+    total_invalid = 0
+    step_num = 0
+    start = time.perf_counter()
+    print(f"\n{'═'*65}")
+    print(f"  Task  : {task_id}")
+    if agent_type == "llm":
+        k1 = "✅ loaded" if os.environ.get("NVIDIA_API_KEY", "") else "❌ MISSING"
+        k2 = "✅ loaded" if os.environ.get("NVIDIA_API_KEY_2", "") else "⚠ not set"
+        print(f"  KEY 1 : {k1}   KEY 2 : {k2}")
+        pool_short = " → ".join(m.split("/")[-1][:14] for m in GLOBAL_MODEL_POOL)
+        print(f"  Pool  : {pool_short}")
+    resolved_mode = getattr(client, "transport", mode)
+    print(f"  Agent : {agent_type}  |  Mode: {resolved_mode}  |  Seed: {seed}")
+    print(f"  Max steps: {max_steps}  |  Delay: {delay}s")
+    print(f"{'═'*65}")
+    while not (client.terminated or client.truncated) and step_num < max_steps:
+        step_num += 1
+        current_model = agent.current_model
+        if agent_type == "llm":
+            action = agent.act(obs, step_num)
+        else:
+            action = agent.act(obs)
+        obs, reward, terminated, truncated, info = client.step(action)
+        agent.update_reward(reward)
+        total_reward += reward
+        if info.invalid_action:
+            total_invalid += 1
+        step_notes: list[str] = []
+        legacy_notes = getattr(info, "notes", None)
+        if isinstance(legacy_notes, list):
+            step_notes.extend(str(n).strip() for n in legacy_notes if str(n).strip())
+        elif isinstance(legacy_notes, str) and legacy_notes.strip():
+            step_notes.append(legacy_notes.strip())
+        if info.action_explanation.strip():
+            step_notes.append(info.action_explanation.strip())
+        step_notes.extend(s.strip() for s in info.effects_resolved_this_step if s.strip())
+        step_notes = list(dict.fromkeys(step_notes))
+        record = StepRecord(
+            step=step_num,
+            day=obs.day,
+            action_type=action.action_type.value,
+            reward=round(reward, 4),
+            invalid=info.invalid_action,
+            total_backlog=obs.total_backlog,
+            total_completed=obs.total_completed,
+            model_used=current_model,
+            notes=step_notes,
+        )
+        step_log.append(record)
+        if verbose:
+            status    = "❌" if info.invalid_action else "✅"
+            model_tag = (
+                f"[{current_model.split('/')[-1][:22]}]"
+                if agent_type == "llm" else ""
+            )
+            print(
+                f"  step={step_num:3d} day={obs.day:2d} "
+                f"action={action.action_type.value:<28} "
+                f"reward={reward:+.3f}  {status}  {model_tag}"
+            )
+            if step_notes:
+                print(f"         notes: {step_notes}")
+        if agent_type == "llm":
+            actual_delay = delay + _random.uniform(-LLM_CALL_JITTER, LLM_CALL_JITTER)
+            if not verbose:
+                print(
+                    f"  Step {step_num}/{max_steps} — sleeping {actual_delay:.1f}s "
+                    f"[{current_model.split('/')[-1][:20]}]",
+                    end="\r", flush=True,
+                )
+            time.sleep(max(1.0, actual_delay))
+            if not verbose:
+                print(" " * 80, end="\r", flush=True)
+    score, grader_name, grader_metrics = client.grade()
+    elapsed = round(time.perf_counter() - start, 2)
+    rotations = agent.rotation_summary()
+    print(f"\n{'-'*65}")
+    print(f"  SCORE  : {score:.3f} / 1.000  (grader: {grader_name})")
+    print(f"  Reward : {total_reward:.2f}  |  Steps: {step_num}")
+    print(f"  Completed: {obs.total_completed}  |  SLA breaches: {obs.total_sla_breaches}")
+    print(f"  Invalid actions: {total_invalid}  |  Model rotations: {len(rotations)}")
+    print(f"  Time: {elapsed}s")
+    print(f"  Grader metrics:")
+    for metric, value in grader_metrics.items():
+        bar = "█" * int(value * 20)
+        print(f"    {metric:<34} {value:.3f}  {bar}")
+    if rotations:
+        print(f"  Rotation log:")
+        for r in rotations:
+            print(f"    {r['from'].split('/')[-1]:<30} → rotated ({r['reason']})")
+    print(f"{'-'*65}")
+    return EpisodeResult(
+        task_id=task_id,
+        agent=agent_type,
+        primary_model=primary_label,
+        seed=seed,
+        score=score,
+        grader_name=grader_name,
+        total_steps=step_num,
+        total_reward=round(total_reward, 4),
+        total_completed=obs.total_completed,
+        total_sla_breaches=obs.total_sla_breaches,
+        total_invalid_actions=total_invalid,
+        final_day=obs.day,
+        terminated=client.terminated,
+        truncated=client.truncated,
+        grader_metrics=grader_metrics,
+        step_log=step_log,
+        elapsed_seconds=elapsed,
+        model_rotations=rotations,
+    )
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 12 — Reporter
+# ══════════════════════════════════════════════════════════════════════════════
+def save_results(results: list[EpisodeResult], out_dir: Path) -> Path:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_path = out_dir / f"baseline_run_{ts}.json"
+    payload = {
+        "run_timestamp": datetime.now().isoformat(),
+        "total_episodes": len(results),
+        "average_score": round(sum(r.score for r in results) / len(results), 4),
+        "model_pool": GLOBAL_MODEL_POOL,
+        "free_pool": FREE_POOL,
+        "episodes": [asdict(r) for r in results],
+    }
+    out_path.write_text(json.dumps(payload, indent=2))
+    return out_path
+def print_leaderboard(results: list[EpisodeResult]) -> None:
+    print(f"\n{'═'*72}")
+    print("  LEADERBOARD")
+    print(f"{'═'*72}")
+    header = (
+        f"  {'TASK':<32} {'MODEL':<24} {'SCORE':>7}  "
+        f"{'REWARD':>8}  {'DONE':>5}  {'ROT':>4}"
+    )
+    print(header)
+    print(f"  {'-'*32} {'-'*24} {'-'*7}  {'-'*8}  {'-'*5}  {'-'*4}")
+    for r in sorted(results, key=lambda x: -x.score):
+        model_label = r.primary_model.split("/")[-1][:23]
+        print(
+            f"  {r.task_id:<32} {model_label:<24} {r.score:>7.3f}  "
+            f"{r.total_reward:>8.2f}  {r.total_completed:>5}  "
+            f"{len(r.model_rotations):>4}"
+        )
+    avg = sum(r.score for r in results) / len(results)
+    print(f"  {'-'*32} {'-'*24} {'-'*7}  {'-'*8}  {'-'*5}  {'-'*4}")
+    print(f"  {'AVERAGE':<32} {'':<24} {avg:>7.3f}")
+    print(f"{'═'*72}\n")
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 13 — CLI Entry Point
+# ══════════════════════════════════════════════════════════════════════════════
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="Gov Workflow OpenEnv — Multi-Model Rotating LLM Baseline",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+10-model pool (April 2026):
+  llama-3.3-70b → deepseek-v4-flash → deepseek-r1 → nemotron-3-super →
+  qwen3.5-122b → deepseek-v3 → minimax-m2.7 → gemma-4-31b →
+  phi-4-mini → llama-3.1-8b
+Examples:
+  python baseline_openai.py --agent heuristic --verbose
+  python baseline_openai.py --agent llm --task district_backlog_easy --verbose
+  python baseline_openai.py --agent llm --task all --save-results
+  python baseline_openai.py --agent llm --model deepseek-ai/deepseek-v4-flash
+  python baseline_openai.py --mode http --url http://localhost:7860 --agent llm
+  python baseline_openai.py --mode auto --url http://localhost:7860 --agent llm
+        """,
+    )
+    p.add_argument("--agent", choices=["llm", "heuristic"], default="heuristic")
+    p.add_argument("--task", choices=list_tasks() + ["all"], default="all")
+    p.add_argument("--model", default=None)
+    p.add_argument("--mode", choices=["direct", "http", "auto"], default="auto")
+    p.add_argument("--url", default="http://localhost:7860")
+    p.add_argument("--max-steps", type=int, default=MAX_LLM_STEPS)
+    p.add_argument("--delay", type=float, default=None)
+    p.add_argument("--api-key", default=None)
+    p.add_argument("--verbose", action="store_true")
+    p.add_argument("--save-results", action="store_true")
+    return p
+def main() -> None:
+    args = build_parser().parse_args()
+    tasks = list_tasks() if args.task == "all" else [args.task]
+    print(f"\n{'═'*65}")
+    print("  Gov Workflow OpenEnv — Baseline Runner (April 2026)")
+    print(f"  Agent : {args.agent.upper()}")
+    if args.agent == "llm":
+        pool_disp = " → ".join(m.split("/")[-1][:12] for m in GLOBAL_MODEL_POOL)
+        print(f"  Pool  : {pool_disp}")
+    print(f"  Mode  : {args.mode}  |  Tasks: {', '.join(tasks)}")
+    print(f"{'═'*65}")
+    if args.agent == "llm":
+        key = args.api_key or os.environ.get("NVIDIA_API_KEY", "")
+        if not key:
+            print("\n❌  NVIDIA_API_KEY not set.")
+            print("    .env file  : NVIDIA_API_KEY=nvapi-xxxx")
+            print("    PowerShell : $env:NVIDIA_API_KEY='nvapi-xxxx'")
+            print("    Get free key: https://build.nvidia.com/explore/discover\n")
+            sys.exit(1)
+    else:
+        key = None
+    results: list[EpisodeResult] = []
+    for task_id in tasks:
+        result = run_episode(
+            task_id=task_id,
+            agent_type=args.agent,
+            model_override=args.model,
+            mode=args.mode,
+            server_url=args.url,
+            api_key=key,
+            verbose=args.verbose,
+            max_steps=args.max_steps,
+            delay_override=args.delay,
+        )
+        results.append(result)
+    print_leaderboard(results)
+    if args.save_results:
+        out = save_results(results, Path("results"))
+        print(f"  Results saved → {out}\n")
+if __name__ == "__main__":
+    main()

client.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""
+Typed HTTP client for Gov Workflow OpenEnv.
+This keeps a simple OpenEnv-style client interface:
+    reset() -> observation wrapper
+    step(action) -> step wrapper
+    state() -> state wrapper
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, TYPE_CHECKING
+import requests
+try:
+    from openenv.core import EnvClient
+    from openenv.core.env_client import StepResult
+except ModuleNotFoundError:
+    EnvClient = None  # type: ignore[assignment]
+    StepResult = None  # type: ignore[assignment]
+if TYPE_CHECKING:
+    from app.models import ActionModel, EpisodeStateModel, ObservationModel, StepInfoModel
+@dataclass
+class ClientStepResult:
+    observation: "ObservationModel"
+    reward: float
+    done: bool
+    terminated: bool
+    truncated: bool
+    info: "StepInfoModel"
+class GovWorkflowClient:
+    """Small typed client for the FastAPI deployment."""
+    def __init__(self, base_url: str) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.session_id: str | None = None
+    def _post(self, path: str, body: dict[str, Any]) -> dict[str, Any]:
+        response = requests.post(f"{self.base_url}{path}", json=body, timeout=30)
+        response.raise_for_status()
+        return response.json()
+    def reset(self, task_id: str = "district_backlog_easy", seed: int | None = None) -> "ObservationModel":
+        from app.models import ObservationModel
+        payload: dict[str, Any] = {"task_id": task_id}
+        if seed is not None:
+            payload["seed"] = seed
+        data = self._post("/reset", payload)
+        self.session_id = data["session_id"]
+        return ObservationModel(**data["observation"])
+    def step(self, action: "ActionModel") -> ClientStepResult:
+        from app.models import ObservationModel, StepInfoModel
+        if not self.session_id:
+            raise RuntimeError("Session not initialized. Call reset() first.")
+        data = self._post(
+            "/step",
+            {
+                "session_id": self.session_id,
+                "action": action.model_dump(exclude_none=True),
+            },
+        )
+        return ClientStepResult(
+            observation=ObservationModel(**data["observation"]),
+            reward=float(data["reward"]),
+            done=bool(data["done"]),
+            terminated=bool(data["terminated"]),
+            truncated=bool(data["truncated"]),
+            info=StepInfoModel(**data["info"]),
+        )
+    def state(self, include_action_history: bool = False) -> "EpisodeStateModel":
+        from app.models import EpisodeStateModel
+        if not self.session_id:
+            raise RuntimeError("Session not initialized. Call reset() first.")
+        data = self._post(
+            "/state",
+            {
+                "session_id": self.session_id,
+                "include_action_history": include_action_history,
+            },
+        )
+        return EpisodeStateModel(**data["state"])
+if EnvClient is not None and StepResult is not None:
+    class GovWorkflowOpenEnvClient(
+        EnvClient["ActionModel", "ObservationModel", "EpisodeStateModel"]
+    ):
+        """
+        OpenEnv-native websocket client.
+        This class is additive and does not replace the existing HTTP client above.
+        """
+        def _step_payload(self, action: "ActionModel") -> dict[str, Any]:
+            return action.model_dump(exclude_none=True, mode="json")
+        def _parse_result(self, payload: dict[str, Any]) -> StepResult["ObservationModel"]:
+            from app.models import ObservationModel
+            observation_payload = payload.get("observation", {})
+            obs = ObservationModel(**observation_payload)
+            return StepResult(
+                observation=obs,
+                reward=payload.get("reward"),
+                done=bool(payload.get("done", False)),
+            )
+        def _parse_state(self, payload: dict[str, Any]) -> "EpisodeStateModel":
+            from app.models import EpisodeStateModel
+            state_payload = payload.get("state", payload)
+            return EpisodeStateModel(**state_payload)
+else:
+    class GovWorkflowOpenEnvClient:  # type: ignore[no-redef]
+        """
+        Placeholder when optional `openenv` package is unavailable.
+        """
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            raise ModuleNotFoundError(
+                "GovWorkflowOpenEnvClient requires the optional 'openenv' package. "
+                "Install it to use websocket OpenEnv client features."
+            )

docs/FRONTEND_WORKFLOW.md ADDED Viewed

	@@ -0,0 +1,48 @@

+# Frontend Workflow
+The frontend is React-based, backend-driven, and served directly by FastAPI.
+## Access
+- UI: `/ui`
+- Assets: `/ui/assets/*`
+- API namespace: `/api/*`
+## What Is Visible in UI
+1. OpenEnv API execution (`reset` / `step` / `state` / `grade`)
+2. Heuristic baseline agent runs (`/api/autostep`, `/api/benchmark`)
+3. Trained RL model execution (Phase 2/3 checkpoints via `/api/rl/run`)
+4. Trained RL evaluation across tasks (`/api/rl/evaluate`)
+5. Script-level workflow visibility for:
+   - `baseline_openai.py`
+   - `inference.py`
+## Frontend API Surface
+- Core:
+  - `GET /api/health`
+  - `GET /api/tasks`
+  - `GET /api/agents`
+  - `POST /api/reset`
+  - `POST /api/step`
+  - `POST /api/state`
+  - `POST /api/grade`
+  - `GET /api/sessions`
+  - `DELETE /api/sessions/{session_id}`
+- Baseline execution:
+  - `POST /api/autostep`
+  - `POST /api/benchmark`
+- Workflow visibility:
+  - `GET /api/workflows/components`
+  - `POST /api/workflows/run`
+- RL visibility/execution:
+  - `GET /api/rl/models`
+  - `POST /api/rl/run`
+  - `POST /api/rl/evaluate`
+## Deployment Notes
+- No Node.js build is required for serving the current frontend.
+- Backend startup remains `app.main:app`.
+- Frontend does not call external LLM providers directly.

docs/PHASE2_IMPLEMENTATION.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Phase 2 Implementation Notes
+Phase 2 goal: Curriculum PPO across easy, medium, and hard tasks with deterministic evaluation discipline.
+## Implemented Components
+- `rl/curriculum.py`
+  - `CurriculumScheduler` with staged task sampling:
+    - Stage 1 (0%-30%): easy only
+    - Stage 2 (30%-70%): easy + medium
+    - Stage 3 (70%-100%): all 3 tasks with configurable weights
+- `rl/configs/curriculum.yaml`
+  - curriculum fractions and weights
+  - PPO hyperparameters for Phase 2
+- `rl/train_ppo.py`
+  - `--phase 2` training path wired to curriculum scheduler
+  - default config path uses `rl/configs/curriculum.yaml`
+  - backward compatibility fallback to `rl/configs/ppo_curriculum.yaml`
+  - explicit CLI args: `--phase1-config`, `--phase2-config`
+- `tests/test_curriculum.py`
+  - stage transitions
+  - stage-1 easy-only enforcement
+  - stage-3 all-task sampling
+  - deterministic task seed invariants
+## Operational Notes
+- Existing 28-action design is preserved.
+- Existing task IDs and grader logic are unchanged.
+- No files were deleted as part of structure cleanup.
+## Commands (using existing .venv313)
+- Train Phase 1:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 1 --timesteps 200000 --n-envs 4 --seed 42`
+- Train Phase 2:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 2 --timesteps 500000 --n-envs 4 --seed 42 --phase2-config rl/configs/curriculum.yaml`
+- Train Phase 2 (tuned continuation):
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 2 --timesteps 300000 --n-envs 4 --seed 42 --phase2-config rl/configs/curriculum_tuned.yaml`
+- Evaluate trained model:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/phase2_final.zip --episodes 3`

docs/PHASE3_IMPLEMENTATION.md ADDED Viewed

	@@ -0,0 +1,39 @@

+# Phase 3 Implementation Notes
+Phase 3 goal: Recurrent PPO (LSTM policy) to capture temporal dependencies such as SLA trend and escalation history.
+## Implemented Components
+- `rl/train_recurrent.py`
+  - RecurrentPPO training with `MlpLstmPolicy`
+  - LSTM hidden size configurable (default 128)
+  - curriculum sampling retained (easy -> medium -> hard)
+  - optional transfer of compatible policy tensors from best Phase 2 checkpoint
+- `rl/configs/recurrent.yaml`
+  - declarative recurrent training and curriculum settings
+- `rl/evaluate.py`
+  - model loading modes: `auto`, `maskable`, `recurrent`
+  - recurrent inference path with LSTM state handling + action-mask sanitization
+  - helper `compare_recurrent_vs_flat(...)`
+- `rl/callbacks.py`
+  - `RecurrentEvalCallback` for periodic grader-based checkpointing in Phase 3
+  - recurrent best checkpoints saved as `best_grader_recurrent_<task>.zip` (no collision with Phase 2 files)
+- `rl/gym_wrapper.py`
+  - optional `hard_action_mask` mode (default off) for safe action execution
+- `tests/test_rl_evaluate.py`
+  - recurrent hidden-state persistence
+  - LSTM reset behavior on episode boundary
+  - recurrent >= flat comparison utility check
+## Commands (using existing .venv313)
+- Train Phase 3:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_recurrent --timesteps 600000 --n-envs 4 --seed 42 --config rl/configs/recurrent.yaml`
+- Train Phase 3-v2 (recommended tuning run):
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_recurrent --timesteps 700000 --n-envs 4 --seed 42 --config rl/configs/recurrent_v2.yaml`
+- Evaluate Phase 3 model:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/phase3_final.zip --episodes 3 --model-type recurrent`
+- Evaluate best recurrent checkpoint (saved during Phase 3 eval):
+  - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/best_grader_recurrent_mixed_urgency_medium.zip --episodes 3 --model-type recurrent`
+- Compare recurrent vs flat on medium task:
+  - `.\\.venv313\\Scripts\\python.exe -c "from rl.evaluate import compare_recurrent_vs_flat; print(compare_recurrent_vs_flat('results/best_model/phase2_final.zip','results/best_model/phase3_final.zip'))"`

docs/PROJECT_STRUCTURE.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Project Structure (Judge-Friendly)
+This repository keeps runtime-critical files in their original paths for deployment safety.
+No existing files were deleted.
+## Top-Level Layout
+- `app/` - core environment logic and FastAPI server
+- `app/web/` - deployed React frontend assets served by backend at `/ui`
+- `frontend/` - frontend ownership docs and reserved source folder for future split components
+- `rl/` - reinforcement-learning wrappers, training, evaluation, configs
+- `tests/` - deterministic unit/integration test suites
+- `scripts/` - operational scripts (local run, validation, benchmark ladder)
+- `docs/` - judge-facing documentation and phase notes
+- `openenv.yaml` - OpenEnv manifest
+- `inference.py` - OpenEnv inference entrypoint
+- `baseline_openai.py` - CLI baseline workflow
+- `Dockerfile` - deployment image
+## Deployment-Critical Paths
+- API app import path: `app.main:app`
+- Frontend route: `/ui` (served from `app/web/index.html`)
+- RL training entrypoint: `python -m rl.train_ppo`
+- RL evaluation entrypoint: `python -m rl.evaluate`
+- OpenEnv config: `openenv.yaml`
+## Phase Mapping
+- Phase 1: `rl/feature_builder.py`, `rl/action_mask.py`, `rl/gym_wrapper.py`, `rl/train_ppo.py`
+- Phase 2: `rl/curriculum.py`, `rl/configs/curriculum.yaml`, `tests/test_curriculum.py`
+- Phase 3: `rl/train_recurrent.py`, `rl/configs/recurrent.yaml`, `tests/test_rl_evaluate.py`
+- Phase 3+: reserved in existing `rl/` module structure
+## Judge Quick Navigation
+1. Environment behavior: `app/env.py`, `app/reward.py`, `app/graders.py`
+2. OpenEnv compliance + inference: `openenv.yaml`, `inference.py`
+3. Frontend behavior: `app/web/react_app.js`, `docs/FRONTEND_WORKFLOW.md`
+4. RL implementation: `rl/`
+5. Validation: `tests/`, `scripts/validate_env.py`, `scripts/validate-submission.sh`

examples/sample_actions.json ADDED Viewed

File without changes

examples/sample_observations.json ADDED Viewed

File without changes

frontend/README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+# frontend/
+Frontend ownership and structure.
+- Source-managed React frontend lives in [frontend/react](C:/Users/siddh/OPENENV_RL/frontend/react).
+- Built with Vite and served by FastAPI at `/ui`.
+- UI is now module-based:
+  - `Overview`
+  - `Simulation Lab`
+  - `Training Studio`
+  - `Model Comparison`
+- Backend APIs remain under `/api/*`.
+Local frontend dev:
+1. Start backend:
+   - `.\.venv313\Scripts\python.exe scripts\run_local.py --host 0.0.0.0 --port 7860`
+2. Start Vite dev server:
+   - `cd frontend/react`
+   - `npm install`
+   - `npm run dev`
+3. Open:
+   - `http://localhost:5173`
+Build for backend serving:
+- `cd frontend/react`
+- `npm run build`
+Deployment path:
+- UI route: `/ui`
+- Asset route: `/ui/assets/*`

frontend/react/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ node_modules/
2	+ dist/

frontend/react/README.md ADDED Viewed

	@@ -0,0 +1,24 @@

+# react/
+Vite + React frontend for the Gov Workflow OpenEnv console.
+Commands:
+- `npm install`
+- `npm run dev` (local dev on `http://localhost:5173`, proxies `/api` to `http://localhost:7860`)
+- `npm run build` (production build for Docker/HF)
+- `npm run preview`
+If you see `ERR_CONNECTION_REFUSED` on `/api/*`:
+- Start backend first on port `7860`
+- Or set a custom dev proxy target:
+  - PowerShell: `$env:VITE_DEV_API_TARGET='http://127.0.0.1:7860'`
+  - Then run `npm run dev`
+Modules:
+- `Overview`: project and environment summary
+- `Simulation Lab`: dynamic real-world workflow simulation (baseline / inference-like / trained RL)
+- `Training Studio`: launch and monitor background RL training jobs
+- `Model Comparison`: baseline vs trained model score comparison on the same task

frontend/react/index.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<!doctype html>
+<html lang="en" class="dark">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Gov Workflow OpenEnv Console</title>
+    <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;700;900&amp;family=Inter:wght@400;600;700&amp;display=swap" rel="stylesheet" />
+    <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&amp;display=swap" rel="stylesheet" />
+  </head>
+  <body>
+    <div id="app-root" class="app-root">
+      <div class="boot">Loading frontend...</div>
+    </div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>

frontend/react/package-lock.json ADDED Viewed

	@@ -0,0 +1,2050 @@

+{
+  "name": "openenv-rl-frontend",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "openenv-rl-frontend",
+      "version": "0.1.0",
+      "dependencies": {
+        "react": "^18.3.1",
+        "react-dom": "^18.3.1"
+      },
+      "devDependencies": {
+        "@vitejs/plugin-react": "^6.0.1",
+        "autoprefixer": "^10.5.0",
+        "postcss": "^8.5.10",
+        "tailwindcss": "^3.4.19",
+        "vite": "^8.0.7"
+      }
+    },
+    "node_modules/@alloc/quick-lru": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
+      "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/@emnapi/core": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.1.tgz",
+      "integrity": "sha512-mukuNALVsoix/w1BJwFzwXBN/dHeejQtuVzcDsfOEsdpCumXb/E9j8w11h5S54tT1xhifGfbbSm/ICrObRb3KA==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.2.0",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@emnapi/runtime": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.1.tgz",
+      "integrity": "sha512-VYi5+ZVLhpgK4hQ0TAjiQiZ6ol0oe4mBx7mVv7IflsiEp0OWoVsp/+f9Vc1hOhE0TtkORVrI1GvzyreqpgWtkA==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@emnapi/wasi-threads": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.0.tgz",
+      "integrity": "sha512-N10dEJNSsUx41Z6pZsXU8FjPjpBEplgH24sfkmITrBED1/U2Esum9F3lfLrMjKHHjmi557zQn7kR9R+XWXu5Rg==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.13",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
+      "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.5.0",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.31",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz",
+      "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@napi-rs/wasm-runtime": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz",
+      "integrity": "sha512-sNXv5oLJ7ob93xkZ1XnxisYhGYXfaG9f65/ZgYuAu3qt7b3NadcOEhLvx28hv31PgX8SZJRYrAIPQilQmFpLVw==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@tybys/wasm-util": "^0.10.1"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/Brooooooklyn"
+      },
+      "peerDependencies": {
+        "@emnapi/core": "^1.7.1",
+        "@emnapi/runtime": "^1.7.1"
+      }
+    },
+    "node_modules/@nodelib/fs.scandir": {
+      "version": "2.1.5",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
+      "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "2.0.5",
+        "run-parallel": "^1.1.9"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nodelib/fs.stat": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz",
+      "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nodelib/fs.walk": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz",
+      "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.scandir": "2.1.5",
+        "fastq": "^1.6.0"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@oxc-project/types": {
+      "version": "0.123.0",
+      "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.123.0.tgz",
+      "integrity": "sha512-YtECP/y8Mj1lSHiUWGSRzy/C6teUKlS87dEfuVKT09LgQbUsBW1rNg+MiJ4buGu3yuADV60gbIvo9/HplA56Ew==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/Boshen"
+      }
+    },
+    "node_modules/@rolldown/binding-android-arm64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-5ZiiecKH2DXAVJTNN13gNMUcCDg4Jy8ZjbXEsPnqa248wgOVeYRX0iqXXD5Jz4bI9BFHgKsI2qmyJynstbmr+g==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-darwin-arm64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-tz/v/8G77seu8zAB3A5sK3UFoOl06zcshEzhUO62sAEtrEuW/H1CcyoupOrD+NbQJytYgA4CppXPzlrmp4JZKA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-darwin-x64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-8DakphqOz8JrMYWTJmWA+vDJxut6LijZ8Xcdc4flOlAhU7PNVwo2MaWBF9iXjJAPo5rC/IxEFZDhJ3GC7NHvug==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-freebsd-x64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-4wBQFfjDuXYN/SVI8inBF3Aa+isq40rc6VMFbk5jcpolUBTe5cYnMsHZ51nFWsx3PVyyNN3vgoESki0Hmr/4BA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-arm-gnueabihf": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.13.tgz",
+      "integrity": "sha512-JW/e4yPIXLms+jmnbwwy5LA/LxVwZUWLN8xug+V200wzaVi5TEGIWQlh8o91gWYFxW609euI98OCCemmWGuPrw==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-arm64-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-ZfKWpXiUymDnavepCaM6KG/uGydJ4l2nBmMxg60Ci4CbeefpqjPWpfaZM7PThOhk2dssqBAcwLc6rAyr0uTdXg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-arm64-musl": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.13.tgz",
+      "integrity": "sha512-bmRg3O6Z0gq9yodKKWCIpnlH051sEfdVwt+6m5UDffAQMUUqU0xjnQqqAUm+Gu7ofAAly9DqiQDtKu2nPDEABA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-ppc64-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-8Wtnbw4k7pMYN9B/mOEAsQ8HOiq7AZ31Ig4M9BKn2So4xRaFEhtCSa4ZJaOutOWq50zpgR4N5+L/opnlaCx8wQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-s390x-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-D/0Nlo8mQuxSMohNJUF2lDXWRsFDsHldfRRgD9bRgktj+EndGPj4DOV37LqDKPYS+osdyhZEH7fTakTAEcW7qg==",
+      "cpu": [
+        "s390x"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-x64-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-eRrPvat2YaVQcwwKi/JzOP6MKf1WRnOCr+VaI3cTWz3ZoLcP/654z90lVCJ4dAuMEpPdke0n+qyAqXDZdIC4rA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-x64-musl": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.13.tgz",
+      "integrity": "sha512-PsdONiFRp8hR8KgVjTWjZ9s7uA3uueWL0t74/cKHfM4dR5zXYv4AjB8BvA+QDToqxAFg4ZkcVEqeu5F7inoz5w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-openharmony-arm64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-hCNXgC5dI3TVOLrPT++PKFNZ+1EtS0mLQwfXXXSUD/+rGlB65gZDwN/IDuxLpQP4x8RYYHqGomlUXzpO8aVI2w==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openharmony"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-wasm32-wasi": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.13.tgz",
+      "integrity": "sha512-viLS5C5et8NFtLWw9Sw3M/w4vvnVkbWkO7wSNh3C+7G1+uCkGpr6PcjNDSFcNtmXY/4trjPBqUfcOL+P3sWy/g==",
+      "cpu": [
+        "wasm32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "1.9.1",
+        "@emnapi/runtime": "1.9.1",
+        "@napi-rs/wasm-runtime": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/@rolldown/binding-win32-arm64-msvc": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.13.tgz",
+      "integrity": "sha512-Fqa3Tlt1xL4wzmAYxGNFV36Hb+VfPc9PYU+E25DAnswXv3ODDu/yyWjQDbXMo5AGWkQVjLgQExuVu8I/UaZhPQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-win32-x64-msvc": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.13.tgz",
+      "integrity": "sha512-/pLI5kPkGEi44TDlnbio3St/5gUFeN51YWNAk/Gnv6mEQBOahRBh52qVFVBpmrnU01n2yysvBML9Ynu7K4kGAQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/pluginutils": {
+      "version": "1.0.0-rc.7",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.7.tgz",
+      "integrity": "sha512-qujRfC8sFVInYSPPMLQByRh7zhwkGFS4+tyMQ83srV1qrxL4g8E2tyxVVyxd0+8QeBM1mIk9KbWxkegRr76XzA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tybys/wasm-util": {
+      "version": "0.10.1",
+      "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz",
+      "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@vitejs/plugin-react": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-6.0.1.tgz",
+      "integrity": "sha512-l9X/E3cDb+xY3SWzlG1MOGt2usfEHGMNIaegaUGFsLkb3RCn/k8/TOXBcab+OndDI4TBtktT8/9BwwW8Vi9KUQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@rolldown/pluginutils": "1.0.0-rc.7"
+      },
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      },
+      "peerDependencies": {
+        "@rolldown/plugin-babel": "^0.1.7 || ^0.2.0",
+        "babel-plugin-react-compiler": "^1.0.0",
+        "vite": "^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@rolldown/plugin-babel": {
+          "optional": true
+        },
+        "babel-plugin-react-compiler": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/any-promise": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
+      "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/anymatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/arg": {
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz",
+      "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/autoprefixer": {
+      "version": "10.5.0",
+      "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.5.0.tgz",
+      "integrity": "sha512-FMhOoZV4+qR6aTUALKX2rEqGG+oyATvwBt9IIzVR5rMa2HRWPkxf+P+PAJLD1I/H5/II+HuZcBJYEFBpq39ong==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/autoprefixer"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "browserslist": "^4.28.2",
+        "caniuse-lite": "^1.0.30001787",
+        "fraction.js": "^5.3.4",
+        "picocolors": "^1.1.1",
+        "postcss-value-parser": "^4.2.0"
+      },
+      "bin": {
+        "autoprefixer": "bin/autoprefixer"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      },
+      "peerDependencies": {
+        "postcss": "^8.1.0"
+      }
+    },
+    "node_modules/baseline-browser-mapping": {
+      "version": "2.10.21",
+      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.21.tgz",
+      "integrity": "sha512-Q+rUQ7Uz8AHM7DEaNdwvfFCTq7a43lNTzuS94eiWqwyxfV/wJv+oUivef51T91mmRY4d4A1u9rcSvkeufCVXlA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "baseline-browser-mapping": "dist/cli.cjs"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/binary-extensions": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
+      "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fill-range": "^7.1.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/browserslist": {
+      "version": "4.28.2",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz",
+      "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "baseline-browser-mapping": "^2.10.12",
+        "caniuse-lite": "^1.0.30001782",
+        "electron-to-chromium": "^1.5.328",
+        "node-releases": "^2.0.36",
+        "update-browserslist-db": "^1.2.3"
+      },
+      "bin": {
+        "browserslist": "cli.js"
+      },
+      "engines": {
+        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
+      }
+    },
+    "node_modules/camelcase-css": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz",
+      "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/caniuse-lite": {
+      "version": "1.0.30001790",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001790.tgz",
+      "integrity": "sha512-bOoxfJPyYo+ds6W0YfptaCWbFnJYjh2Y1Eow5lRv+vI2u8ganPZqNm1JwNh0t2ELQCqIWg4B3dWEusgAmsoyOw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "CC-BY-4.0"
+    },
+    "node_modules/chokidar": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
+      "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "anymatch": "~3.1.2",
+        "braces": "~3.0.2",
+        "glob-parent": "~5.1.2",
+        "is-binary-path": "~2.1.0",
+        "is-glob": "~4.0.1",
+        "normalize-path": "~3.0.0",
+        "readdirp": "~3.6.0"
+      },
+      "engines": {
+        "node": ">= 8.10.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.2"
+      }
+    },
+    "node_modules/chokidar/node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/commander": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
+      "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/cssesc": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
+      "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "cssesc": "bin/cssesc"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/detect-libc": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
+      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/didyoumean": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
+      "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/dlv": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz",
+      "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/electron-to-chromium": {
+      "version": "1.5.344",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.344.tgz",
+      "integrity": "sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/fast-glob": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
+      "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "^2.0.2",
+        "@nodelib/fs.walk": "^1.2.3",
+        "glob-parent": "^5.1.2",
+        "merge2": "^1.3.0",
+        "micromatch": "^4.0.8"
+      },
+      "engines": {
+        "node": ">=8.6.0"
+      }
+    },
+    "node_modules/fast-glob/node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/fastq": {
+      "version": "1.20.1",
+      "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz",
+      "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "reusify": "^1.0.4"
+      }
+    },
+    "node_modules/fdir": {
+      "version": "6.5.0",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
+      "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/fraction.js": {
+      "version": "5.3.4",
+      "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
+      "integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/rawify"
+      }
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/glob-parent": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
+      "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.3"
+      },
+      "engines": {
+        "node": ">=10.13.0"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
+      "integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/is-binary-path": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
+      "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "binary-extensions": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-core-module": {
+      "version": "2.16.1",
+      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz",
+      "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-glob": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
+      "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-extglob": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/jiti": {
+      "version": "1.21.7",
+      "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz",
+      "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "jiti": "bin/jiti.js"
+      }
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "license": "MIT"
+    },
+    "node_modules/lightningcss": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz",
+      "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "detect-libc": "^2.0.3"
+      },
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      },
+      "optionalDependencies": {
+        "lightningcss-android-arm64": "1.32.0",
+        "lightningcss-darwin-arm64": "1.32.0",
+        "lightningcss-darwin-x64": "1.32.0",
+        "lightningcss-freebsd-x64": "1.32.0",
+        "lightningcss-linux-arm-gnueabihf": "1.32.0",
+        "lightningcss-linux-arm64-gnu": "1.32.0",
+        "lightningcss-linux-arm64-musl": "1.32.0",
+        "lightningcss-linux-x64-gnu": "1.32.0",
+        "lightningcss-linux-x64-musl": "1.32.0",
+        "lightningcss-win32-arm64-msvc": "1.32.0",
+        "lightningcss-win32-x64-msvc": "1.32.0"
+      }
+    },
+    "node_modules/lightningcss-android-arm64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz",
+      "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-darwin-arm64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz",
+      "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-darwin-x64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz",
+      "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-freebsd-x64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz",
+      "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm-gnueabihf": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz",
+      "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm64-gnu": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz",
+      "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm64-musl": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz",
+      "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-x64-gnu": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz",
+      "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-x64-musl": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz",
+      "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-win32-arm64-msvc": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz",
+      "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-win32-x64-msvc": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz",
+      "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lilconfig": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
+      "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/antonk52"
+      }
+    },
+    "node_modules/lines-and-columns": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
+      "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/loose-envify": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
+      "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
+      "license": "MIT",
+      "dependencies": {
+        "js-tokens": "^3.0.0 || ^4.0.0"
+      },
+      "bin": {
+        "loose-envify": "cli.js"
+      }
+    },
+    "node_modules/merge2": {
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
+      "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/micromatch": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
+      "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "braces": "^3.0.3",
+        "picomatch": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=8.6"
+      }
+    },
+    "node_modules/micromatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/mz": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz",
+      "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "any-promise": "^1.0.0",
+        "object-assign": "^4.0.1",
+        "thenify-all": "^1.0.0"
+      }
+    },
+    "node_modules/nanoid": {
+      "version": "3.3.11",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
+      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "bin": {
+        "nanoid": "bin/nanoid.cjs"
+      },
+      "engines": {
+        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
+      }
+    },
+    "node_modules/node-releases": {
+      "version": "2.0.38",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz",
+      "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/normalize-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
+      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/object-assign": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/object-hash": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz",
+      "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/path-parse": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
+      "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/picocolors": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/picomatch": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/pify": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz",
+      "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/pirates": {
+      "version": "4.0.7",
+      "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz",
+      "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/postcss": {
+      "version": "8.5.10",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz",
+      "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/postcss"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "nanoid": "^3.3.11",
+        "picocolors": "^1.1.1",
+        "source-map-js": "^1.2.1"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      }
+    },
+    "node_modules/postcss-import": {
+      "version": "15.1.0",
+      "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz",
+      "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "postcss-value-parser": "^4.0.0",
+        "read-cache": "^1.0.0",
+        "resolve": "^1.1.7"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      },
+      "peerDependencies": {
+        "postcss": "^8.0.0"
+      }
+    },
+    "node_modules/postcss-js": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.1.0.tgz",
+      "integrity": "sha512-oIAOTqgIo7q2EOwbhb8UalYePMvYoIeRY2YKntdpFQXNosSu3vLrniGgmH9OKs/qAkfoj5oB3le/7mINW1LCfw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "camelcase-css": "^2.0.1"
+      },
+      "engines": {
+        "node": "^12 || ^14 || >= 16"
+      },
+      "peerDependencies": {
+        "postcss": "^8.4.21"
+      }
+    },
+    "node_modules/postcss-load-config": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz",
+      "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "lilconfig": "^3.1.1"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "peerDependencies": {
+        "jiti": ">=1.21.0",
+        "postcss": ">=8.0.9",
+        "tsx": "^4.8.1",
+        "yaml": "^2.4.2"
+      },
+      "peerDependenciesMeta": {
+        "jiti": {
+          "optional": true
+        },
+        "postcss": {
+          "optional": true
+        },
+        "tsx": {
+          "optional": true
+        },
+        "yaml": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/postcss-nested": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
+      "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "postcss-selector-parser": "^6.1.1"
+      },
+      "engines": {
+        "node": ">=12.0"
+      },
+      "peerDependencies": {
+        "postcss": "^8.2.14"
+      }
+    },
+    "node_modules/postcss-selector-parser": {
+      "version": "6.1.2",
+      "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz",
+      "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cssesc": "^3.0.0",
+        "util-deprecate": "^1.0.2"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/postcss-value-parser": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
+      "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/queue-microtask": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
+      "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/react": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
+      "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/react-dom": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
+      "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.1.0",
+        "scheduler": "^0.23.2"
+      },
+      "peerDependencies": {
+        "react": "^18.3.1"
+      }
+    },
+    "node_modules/read-cache": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
+      "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pify": "^2.3.0"
+      }
+    },
+    "node_modules/readdirp": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
+      "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "picomatch": "^2.2.1"
+      },
+      "engines": {
+        "node": ">=8.10.0"
+      }
+    },
+    "node_modules/readdirp/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/resolve": {
+      "version": "1.22.12",
+      "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.12.tgz",
+      "integrity": "sha512-TyeJ1zif53BPfHootBGwPRYT1RUt6oGWsaQr8UyZW/eAm9bKoijtvruSDEmZHm92CwS9nj7/fWttqPCgzep8CA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "is-core-module": "^2.16.1",
+        "path-parse": "^1.0.7",
+        "supports-preserve-symlinks-flag": "^1.0.0"
+      },
+      "bin": {
+        "resolve": "bin/resolve"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/reusify": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz",
+      "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "iojs": ">=1.0.0",
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/rolldown": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.13.tgz",
+      "integrity": "sha512-bvVj8YJmf0rq4pSFmH7laLa6pYrhghv3PRzrCdRAr23g66zOKVJ4wkvFtgohtPLWmthgg8/rkaqRHrpUEh0Zbw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@oxc-project/types": "=0.123.0",
+        "@rolldown/pluginutils": "1.0.0-rc.13"
+      },
+      "bin": {
+        "rolldown": "bin/cli.mjs"
+      },
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      },
+      "optionalDependencies": {
+        "@rolldown/binding-android-arm64": "1.0.0-rc.13",
+        "@rolldown/binding-darwin-arm64": "1.0.0-rc.13",
+        "@rolldown/binding-darwin-x64": "1.0.0-rc.13",
+        "@rolldown/binding-freebsd-x64": "1.0.0-rc.13",
+        "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.13",
+        "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.13",
+        "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-x64-musl": "1.0.0-rc.13",
+        "@rolldown/binding-openharmony-arm64": "1.0.0-rc.13",
+        "@rolldown/binding-wasm32-wasi": "1.0.0-rc.13",
+        "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.13",
+        "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.13"
+      }
+    },
+    "node_modules/rolldown/node_modules/@rolldown/pluginutils": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.13.tgz",
+      "integrity": "sha512-3ngTAv6F/Py35BsYbeeLeecvhMKdsKm4AoOETVhAA+Qc8nrA2I0kF7oa93mE9qnIurngOSpMnQ0x2nQY2FPviA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/run-parallel": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
+      "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "queue-microtask": "^1.2.2"
+      }
+    },
+    "node_modules/scheduler": {
+      "version": "0.23.2",
+      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
+      "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.1.0"
+      }
+    },
+    "node_modules/source-map-js": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/sucrase": {
+      "version": "3.35.1",
+      "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz",
+      "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.2",
+        "commander": "^4.0.0",
+        "lines-and-columns": "^1.1.6",
+        "mz": "^2.7.0",
+        "pirates": "^4.0.1",
+        "tinyglobby": "^0.2.11",
+        "ts-interface-checker": "^0.1.9"
+      },
+      "bin": {
+        "sucrase": "bin/sucrase",
+        "sucrase-node": "bin/sucrase-node"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      }
+    },
+    "node_modules/supports-preserve-symlinks-flag": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
+      "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/tailwindcss": {
+      "version": "3.4.19",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz",
+      "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@alloc/quick-lru": "^5.2.0",
+        "arg": "^5.0.2",
+        "chokidar": "^3.6.0",
+        "didyoumean": "^1.2.2",
+        "dlv": "^1.1.3",
+        "fast-glob": "^3.3.2",
+        "glob-parent": "^6.0.2",
+        "is-glob": "^4.0.3",
+        "jiti": "^1.21.7",
+        "lilconfig": "^3.1.3",
+        "micromatch": "^4.0.8",
+        "normalize-path": "^3.0.0",
+        "object-hash": "^3.0.0",
+        "picocolors": "^1.1.1",
+        "postcss": "^8.4.47",
+        "postcss-import": "^15.1.0",
+        "postcss-js": "^4.0.1",
+        "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0",
+        "postcss-nested": "^6.2.0",
+        "postcss-selector-parser": "^6.1.2",
+        "resolve": "^1.22.8",
+        "sucrase": "^3.35.0"
+      },
+      "bin": {
+        "tailwind": "lib/cli.js",
+        "tailwindcss": "lib/cli.js"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/thenify": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
+      "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "any-promise": "^1.0.0"
+      }
+    },
+    "node_modules/thenify-all": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz",
+      "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "thenify": ">= 3.1.0 < 4"
+      },
+      "engines": {
+        "node": ">=0.8"
+      }
+    },
+    "node_modules/tinyglobby": {
+      "version": "0.2.16",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
+      "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fdir": "^6.5.0",
+        "picomatch": "^4.0.4"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/SuperchupuDev"
+      }
+    },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/ts-interface-checker": {
+      "version": "0.1.13",
+      "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz",
+      "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/tslib": {
+      "version": "2.8.1",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
+      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
+      "dev": true,
+      "license": "0BSD",
+      "optional": true
+    },
+    "node_modules/update-browserslist-db": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
+      "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "escalade": "^3.2.0",
+        "picocolors": "^1.1.1"
+      },
+      "bin": {
+        "update-browserslist-db": "cli.js"
+      },
+      "peerDependencies": {
+        "browserslist": ">= 4.21.0"
+      }
+    },
+    "node_modules/util-deprecate": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
+      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vite": {
+      "version": "8.0.7",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.7.tgz",
+      "integrity": "sha512-P1PbweD+2/udplnThz3btF4cf6AgPky7kk23RtHUkJIU5BIxwPprhRGmOAHs6FTI7UiGbTNrgNP6jSYD6JaRnw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "lightningcss": "^1.32.0",
+        "picomatch": "^4.0.4",
+        "postcss": "^8.5.8",
+        "rolldown": "1.0.0-rc.13",
+        "tinyglobby": "^0.2.15"
+      },
+      "bin": {
+        "vite": "bin/vite.js"
+      },
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      },
+      "funding": {
+        "url": "https://github.com/vitejs/vite?sponsor=1"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      },
+      "peerDependencies": {
+        "@types/node": "^20.19.0 || >=22.12.0",
+        "@vitejs/devtools": "^0.1.0",
+        "esbuild": "^0.27.0 || ^0.28.0",
+        "jiti": ">=1.21.0",
+        "less": "^4.0.0",
+        "sass": "^1.70.0",
+        "sass-embedded": "^1.70.0",
+        "stylus": ">=0.54.8",
+        "sugarss": "^5.0.0",
+        "terser": "^5.16.0",
+        "tsx": "^4.8.1",
+        "yaml": "^2.4.2"
+      },
+      "peerDependenciesMeta": {
+        "@types/node": {
+          "optional": true
+        },
+        "@vitejs/devtools": {
+          "optional": true
+        },
+        "esbuild": {
+          "optional": true
+        },
+        "jiti": {
+          "optional": true
+        },
+        "less": {
+          "optional": true
+        },
+        "sass": {
+          "optional": true
+        },
+        "sass-embedded": {
+          "optional": true
+        },
+        "stylus": {
+          "optional": true
+        },
+        "sugarss": {
+          "optional": true
+        },
+        "terser": {
+          "optional": true
+        },
+        "tsx": {
+          "optional": true
+        },
+        "yaml": {
+          "optional": true
+        }
+      }
+    }
+  }
+}

frontend/react/package.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "name": "openenv-rl-frontend",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite --configLoader native || vite",
+    "build": "vite build --configLoader native || vite build",
+    "preview": "vite preview --configLoader native --host 0.0.0.0 --port 4173 || vite preview --host 0.0.0.0 --port 4173"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-react": "^6.0.1",
+    "autoprefixer": "^10.5.0",
+    "postcss": "^8.5.10",
+    "tailwindcss": "^3.4.19",
+    "vite": "^8.0.7"
+  }
+}

frontend/react/postcss.config.js ADDED Viewed

	@@ -0,0 +1,6 @@

+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}

frontend/react/src/App.jsx ADDED Viewed

	@@ -0,0 +1,21 @@

+import { useState, useEffect } from "react";
+import { api } from "./api/client";
+import { Dashboard } from "./components/story-ui/Dashboard";
+export default function App() {
+  const [tasks, setTasks] = useState([]);
+  useEffect(() => {
+    const boot = async () => {
+      try {
+        const taskRes = await api("/tasks");
+        setTasks(taskRes.tasks || []);
+      } catch (err) {
+        console.error("Failed to load tasks", err);
+      }
+    };
+    boot();
+  }, []);
+  return <Dashboard tasks={tasks} />;
+}

frontend/react/src/api/client.js ADDED Viewed

	@@ -0,0 +1,131 @@

+const DEFAULT_LOCAL_API = "http://127.0.0.1:7860";
+const LOCAL_PORTS = ["7860"];
+const LOCAL_HOSTS = ["127.0.0.1", "localhost"];
+function candidates(path) {
+  const urls = [];
+  const rootOnlyPaths = path === "/rl/models";
+  const compatNoApiPaths =
+    path.startsWith("/simulation/") ||
+    path.startsWith("/training/") ||
+    path.startsWith("/rl/") ||
+    path.startsWith("/openenv/") ||
+    path.startsWith("/benchmark") ||
+    path.startsWith("/history/");
+  let isLocalDev5173 = false;
+  if (typeof window !== "undefined") {
+    const host = window.location.hostname;
+    const isLocal = host === "localhost" || host === "127.0.0.1";
+    isLocalDev5173 = isLocal && window.location.port === "5173";
+  }
+  // Training story endpoints are mounted at /training/* (not /api/training/*).
+  // Avoid known-bad prefixes first to prevent noisy 404 logs in browser console.
+  if (path.startsWith("/training/")) {
+    if (isLocalDev5173) {
+      for (const port of LOCAL_PORTS) {
+        for (const lh of LOCAL_HOSTS) {
+          urls.push(`http://${lh}:${port}${path}`);
+        }
+      }
+    } else {
+      urls.push(path);
+    }
+    return [...new Set(urls)];
+  }
+  if (isLocalDev5173) {
+    // For local dev, prefer direct backend URLs first to avoid noisy Vite proxy
+    // connection-refused spam when backend is temporarily down.
+    for (const port of LOCAL_PORTS) {
+      for (const lh of LOCAL_HOSTS) {
+        if (rootOnlyPaths) {
+          urls.push(`http://${lh}:${port}${path}`);
+        } else {
+          urls.push(`http://${lh}:${port}/api${path}`);
+          urls.push(`http://${lh}:${port}/api/v1${path}`);
+          if (compatNoApiPaths) {
+            urls.push(`http://${lh}:${port}${path}`);
+          }
+        }
+      }
+    }
+  }
+  if (rootOnlyPaths) {
+    urls.push(path);
+  } else {
+    urls.push(`/api${path}`, `/api/v1${path}`);
+    if (compatNoApiPaths) {
+      urls.push(path);
+    }
+  }
+  if (isLocalDev5173 && !rootOnlyPaths) {
+    for (const port of LOCAL_PORTS) {
+      for (const lh of LOCAL_HOSTS) {
+        // keep original ordering as fallback candidates
+        urls.push(`http://${lh}:${port}/api${path}`);
+        urls.push(`http://${lh}:${port}/api/v1${path}`);
+      }
+    }
+  }
+  return [...new Set(urls)];
+}
+export async function api(path, options = {}) {
+  const method = String(options.method || "GET").toUpperCase();
+  const headers = { ...(options.headers || {}) };
+  if (method !== "GET" && method !== "HEAD" && !("Content-Type" in headers)) {
+    headers["Content-Type"] = "application/json";
+  }
+  const requestOptions = {
+    ...options,
+    method,
+    headers,
+  };
+  if (method === "GET" || method === "HEAD") {
+    delete requestOptions.body;
+  }
+  const errors = [];
+  for (const url of candidates(path)) {
+    try {
+      const res = await fetch(url, requestOptions);
+      let payload = null;
+      try {
+        payload = await res.json();
+      } catch (err) {
+        payload = null;
+      }
+      if (!res.ok) {
+        const detail = payload?.detail || `${res.status}`;
+        throw new Error(`API ${path} failed on ${url}: ${detail}`);
+      }
+      return payload;
+    } catch (err) {
+      errors.push(err);
+    }
+  }
+  const firstApiError = errors.find(
+    (e) => e instanceof Error && e.message.startsWith(`API ${path} failed`)
+  );
+  if (firstApiError) {
+    throw firstApiError;
+  }
+  const lastError = errors.length ? errors[errors.length - 1] : new Error("Unknown request failure.");
+  throw new Error(
+    `API ${path} connection failed. Start backend on ${DEFAULT_LOCAL_API}. Last error: ${
+      lastError instanceof Error ? lastError.message : String(lastError)
+    }`
+  );
+}
+export function fmt(value, digits = 2) {
+  if (value == null || Number.isNaN(Number(value))) return "-";
+  return Number(value).toFixed(digits);
+}