Siddharaj Shirke commited on
Commit
3eae4cc
·
0 Parent(s):

deploy: fresh snapshot to Hugging Face Space

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.dockerignore ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VCS / local env
2
+ .git/
3
+ .gitignore
4
+ .venv/
5
+ .venv313/
6
+ .env
7
+ .env.*
8
+ !.env.example
9
+
10
+ # Python cache/build
11
+ __pycache__/
12
+ *.pyc
13
+ *.pyo
14
+ *.egg-info/
15
+ dist/
16
+ build/
17
+
18
+ # Frontend cache/deps
19
+ frontend/react/node_modules/
20
+ frontend/react/.vite/
21
+ frontend/react/.vite-temp/
22
+ frontend/react/dist/
23
+ .npm-cache/
24
+ .vite/
25
+
26
+ # Runtime/generated data not needed in image build context
27
+ logs/
28
+ reports/
29
+ outputs/
30
+ data/
31
+ results/training_runs/
32
+ results/runs/
33
+ results/eval_logs/
34
+ results/best_model/archived/
35
+ artifacts/
36
+ results/prevalidation_*.log
37
+
38
+ # Test/dev-only assets
39
+ .pytest_cache/
40
+ .tmp/
41
+ docs/
42
+ examples/
43
+ tests/
44
+ gov_workflow_openenv_tests/
45
+ pip_bootstrap/
46
+ test_results.txt
47
+ test_rl_output*.txt
48
+ tests/test_output*.txt
49
+ tests/test_run.txt
50
+ phase1_validation.py
51
+ test_phase2.py
52
+ old_simulator.py
53
+ restore_simulator.py
54
+
55
+ # Non-runtime docs/notebooks
56
+ GovWorkflow_RL_ENV.ipynb
57
+ Blog.md
58
+ uv.lock
59
+
60
+ # IDE/OS noise
61
+ .vscode/
62
+ .idea/
63
+ *.swp
64
+ Thumbs.db
65
+ .DS_Store
.env.example ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gov Workflow OpenEnv
2
+ # Detailed environment template for local run, E2E validation, Docker preflight,
3
+ # and release deployment.
4
+ #
5
+ # Usage:
6
+ # 1) Copy this file to .env
7
+ # 2) Fill only the auth/provider values you use
8
+ # 3) Keep defaults unless you intentionally need different behavior
9
+
10
+ # -----------------------------------------------------------------------------
11
+ # 1) LLM Provider Endpoints and Auth
12
+ # -----------------------------------------------------------------------------
13
+ # Primary OpenAI-compatible endpoint used by inference/simulation runtime.
14
+ API_BASE_URL=https://integrate.api.nvidia.com/v1
15
+
16
+ # OpenAI-compatible model used for LLM inference mode.
17
+ MODEL_NAME=meta/llama-3.3-70b-instruct
18
+
19
+ # Auth precedence in runtime:
20
+ # HF_TOKEN -> OPENAI_API_KEY -> API_KEY
21
+ HF_TOKEN=
22
+ OPENAI_API_KEY=
23
+ API_KEY=
24
+
25
+ # Optional image tag used by inference / utility flows.
26
+ LOCAL_IMAGE_NAME=gov-workflow-openenv:latest
27
+
28
+ # Inference acceptance criteria (inference.py).
29
+ MAX_STEPS=80
30
+ SUCCESS_SCORE_THRESHOLD=0.50
31
+
32
+
33
+ # -----------------------------------------------------------------------------
34
+ # 2) Provider-Specific API Base URLs
35
+ # -----------------------------------------------------------------------------
36
+ # OpenAI-compatible provider URL (fallback path in engine/simulator).
37
+ OPENAI_API_BASE_URL=https://api.openai.com/v1
38
+
39
+ # NVIDIA provider URL for NIM calls.
40
+ NVIDIA_API_BASE_URL=https://integrate.api.nvidia.com/v1
41
+
42
+
43
+ # -----------------------------------------------------------------------------
44
+ # 3) Model Routing and Fallback Pools
45
+ # -----------------------------------------------------------------------------
46
+ # Optional CSV fallback models for OpenAI-compatible runtime.
47
+ # Example:
48
+ # MODEL_FALLBACKS=meta/llama-3.1-8b-instruct,microsoft/phi-4-mini-instruct
49
+ MODEL_FALLBACKS=
50
+
51
+ # Optional CSV fallback models for NVIDIA runtime.
52
+ NVIDIA_MODEL_FALLBACKS=
53
+
54
+ # Primary NVIDIA model for NVIDIA-key runtime path.
55
+ NVIDIA_MODEL=meta/llama-3.3-70b-instruct
56
+
57
+ # NVIDIA keys for baseline and simulation fallback behavior.
58
+ # Get keys at: https://build.nvidia.com/explore/discover
59
+ NVIDIA_API_KEY=
60
+ NVIDIA_API_KEY_2=
61
+
62
+
63
+ # -----------------------------------------------------------------------------
64
+ # 4) Environment Transport (Direct vs HTTP)
65
+ # -----------------------------------------------------------------------------
66
+ # Used by inference / gateway code.
67
+ # Allowed: auto, http, direct
68
+ OPENENV_ENV_TRANSPORT=auto
69
+
70
+ # Base URL for HTTP transport path.
71
+ OPENENV_ENV_BASE_URL=http://127.0.0.1:7860
72
+
73
+ # Optional explicit API prefix for /reset /step /grade calls.
74
+ # Typical values: (empty), /api, /api/v1
75
+ OPENENV_ENV_API_PREFIX=
76
+
77
+ # Optional candidate prefixes (CSV) tried before built-in fallback sequence.
78
+ # Example: /api/v1,/api
79
+ OPENENV_ENV_API_PREFIX_CANDIDATES=
80
+
81
+ # Force HTTP/FastAPI gateway even when direct transport is available.
82
+ # Allowed truthy values: 1, true, yes, on
83
+ FORCE_FASTAPI_GATEWAY=0
84
+
85
+
86
+ # -----------------------------------------------------------------------------
87
+ # 5) Structured API Alias Controls (app.main)
88
+ # -----------------------------------------------------------------------------
89
+ # Enables automatic aliasing from source prefix to versioned prefix.
90
+ ENABLE_STRUCTURED_V1_API=1
91
+ OPENENV_API_SOURCE_PREFIX=/api
92
+ OPENENV_API_V1_PREFIX=/api/v1
93
+
94
+
95
+ # -----------------------------------------------------------------------------
96
+ # 6) FastAPI Server Settings (SERVER_* in app/config.py)
97
+ # -----------------------------------------------------------------------------
98
+ SERVER_HOST=0.0.0.0
99
+ SERVER_PORT=7860
100
+ SERVER_LOG_LEVEL=info
101
+
102
+ # Keep 1 for in-memory session store unless external shared state is added.
103
+ SERVER_WORKERS=1
104
+
105
+ # JSON list string expected by Pydantic settings.
106
+ SERVER_CORS_ORIGINS=["*"]
107
+
108
+
109
+ # -----------------------------------------------------------------------------
110
+ # 7) Environment Defaults (ENV_* in app/config.py)
111
+ # -----------------------------------------------------------------------------
112
+ ENV_DEFAULT_TASK_ID=district_backlog_easy
113
+ ENV_DEFAULT_SEED=11
114
+ ENV_MAX_SESSIONS=100
115
+ ENV_MAX_STEPS_PER_EPISODE=500
116
+
117
+
118
+ # -----------------------------------------------------------------------------
119
+ # 8) Runtime Throttling
120
+ # -----------------------------------------------------------------------------
121
+ # Delay between LLM calls used by baseline_openai.py.
122
+ LLM_CALL_DELAY=12.0
123
+
124
+
125
+ # -----------------------------------------------------------------------------
126
+ # 9) Persistence and Storage
127
+ # -----------------------------------------------------------------------------
128
+ # Enables SQLite/filesystem persistence.
129
+ STORAGE_ENABLED=true
130
+
131
+ # Preferred persistence root (used by app/persistence.py).
132
+ # Local example: C:/Users/your-user/OPENENV_RL/outputs/persist
133
+ # HF Spaces example: /data/openenv_rl
134
+ OPENENV_DATA_DIR=
135
+
136
+ # Legacy fallback path key still supported by code.
137
+ STORAGE_DATA_DIR=
138
+
139
+
140
+ # -----------------------------------------------------------------------------
141
+ # 10) Frontend Dev Proxy (Vite)
142
+ # -----------------------------------------------------------------------------
143
+ # Used by frontend/react/vite.config.js for local /api proxy target.
144
+ VITE_DEV_API_TARGET=http://127.0.0.1:7860
.gitignore ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment secrets - NEVER commit .env
2
+ .env
3
+ .env.local
4
+ .env.production
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.pyc
9
+ *.pyo
10
+ .venv/
11
+ .venv313/
12
+ *.egg-info/
13
+ dist/
14
+ build/
15
+
16
+ # pytest
17
+ .pytest_cache/
18
+
19
+ # Local temp/bootstrap
20
+ .tmp/
21
+ pip_bootstrap/
22
+
23
+ # Runtime outputs
24
+ outputs/
25
+ logs/
26
+ reports/
27
+ data/
28
+ results/training_runs/
29
+ results/runs/
30
+ results/eval_logs/
31
+ results/best_model/archived/
32
+ artifacts/
33
+
34
+ # Frontend build cache/deps
35
+ frontend/react/node_modules/
36
+ frontend/react/.vite/
37
+ frontend/react/.vite-temp/
38
+ frontend/react/dist/
39
+ .vite/
40
+ .npm-cache/
41
+
42
+ # Docker/local deployment overrides
43
+ docker-compose.override.yml
44
+ *.local.env
45
+
46
+ # Local test artifacts
47
+ test_results.txt
48
+ test_rl_output*.txt
49
+ tests/test_output*.txt
50
+ tests/test_run.txt
51
+
52
+ # Pre-submission validation artifacts
53
+ scripts/validate-submission.sh
54
+ results/prevalidation_docker_build.log
55
+ results/prevalidation_*.log
56
+
57
+ # Keep benchmark Phase 1 model in Git for Colab/Kaggle transfer
58
+ !results/best_model/phase1/phase1_final.zip
59
+
Blog.md ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🏛️ Gov Workflow OpenEnv — Teaching Machines to Manage Real-World Bureaucracy
2
+
3
+ ---
4
+
5
+ ## 🚨 The Problem Nobody Talks About
6
+
7
+ Every day, thousands of applications flow into government systems:
8
+
9
+ * Passports
10
+ * Income certificates
11
+ * Land records
12
+ * Licenses
13
+
14
+ But the system handling them?
15
+
16
+ ```text
17
+ Rigid. Static. Fragile.
18
+ ```
19
+
20
+ Most workflows rely on simple rules like:
21
+
22
+ * First-Come-First-Serve
23
+ * Urgent-first prioritization
24
+
25
+ And that’s where things break.
26
+
27
+ ---
28
+
29
+ ### ⚠️ What goes wrong?
30
+
31
+ * If you prioritize **old cases**, new easy ones pile up → backlog explodes
32
+ * If you prioritize **fast cases**, complex ones miss deadlines → SLA breaches
33
+ * If you follow **fixed rules**, you ignore real-time system state
34
+
35
+ This is not a sorting problem.
36
+
37
+ ```text
38
+ This is a decision-making problem under uncertainty.
39
+ ```
40
+
41
+ ---
42
+
43
+ ## 💡 Our Idea
44
+
45
+ What if instead of **hardcoding rules**,
46
+ we let a system **learn how to manage workflows**?
47
+
48
+ That’s exactly what we built.
49
+
50
+ ---
51
+
52
+ ## 🌍 What is the Environment?
53
+
54
+ At the heart of this project is a **simulation environment** that mimics a real government office.
55
+
56
+ Think of it as:
57
+
58
+ ```text
59
+ A virtual district office running in code
60
+ ```
61
+
62
+ It includes:
63
+
64
+ * Multiple services (passport, certificates, etc.)
65
+ * Multi-stage workflows (submission → approval → issuance)
66
+ * Limited officers (resources)
67
+ * Delays due to missing documents
68
+ * SLA deadlines and penalties
69
+ * Fairness constraints across services
70
+
71
+ Every “step” in this environment represents **one unit of time** (a working day).
72
+
73
+ ---
74
+
75
+ ## 🧠 The Core Concept
76
+
77
+ We model this system as a **Reinforcement Learning problem**.
78
+
79
+ ```text
80
+ Environment → Government workflow simulation
81
+ Agent → Decision-maker
82
+ Goal → Optimize system performance over time
83
+ ```
84
+
85
+ ---
86
+
87
+ ## ⚙️ How RL Works Here
88
+
89
+ At every step, the agent interacts with the environment using three core components:
90
+
91
+ ---
92
+
93
+ ### 🔹 1. State (What the agent sees)
94
+
95
+ The **state** is a snapshot of the system at a given time.
96
+
97
+ It includes:
98
+
99
+ * Number of pending applications per service
100
+ * Average waiting time
101
+ * SLA pressure (how close deadlines are)
102
+ * Missing document backlog
103
+ * Officer allocation across services
104
+
105
+ ```text
106
+ State = Current condition of the entire workflow system
107
+ ```
108
+
109
+ ---
110
+
111
+ ### 🔹 2. Action (What the agent can do)
112
+
113
+ The agent chooses **one action per step** to influence the system.
114
+
115
+ Examples:
116
+
117
+ * Change prioritization strategy (urgent-first, fairness-based, etc.)
118
+ * Allocate more officers to a service
119
+ * Request missing documents
120
+ * Escalate high-priority cases
121
+ * Reallocate resources
122
+ * Advance time (do nothing)
123
+
124
+ ```text
125
+ Action = A decision that changes how the system evolves
126
+ ```
127
+
128
+ ---
129
+
130
+ ### 🔹 3. Reward (How the agent learns)
131
+
132
+ After each action, the agent receives a **reward signal**.
133
+
134
+ This reward tells the agent how good or bad its decision was.
135
+
136
+ ---
137
+
138
+ #### Reward is based on:
139
+
140
+ * ✅ Applications progressing through stages
141
+ * ✅ Completed applications
142
+ * ❌ SLA breaches (penalty)
143
+ * ❌ Long waiting times
144
+ * ❌ Unfair distribution across services
145
+ * ❌ Idle resources
146
+
147
+ ---
148
+
149
+ ### Simplified reward intuition:
150
+
151
+ ```text
152
+ Good decisions → positive reward
153
+ Bad decisions → negative reward
154
+ ```
155
+
156
+ Over time, the agent learns:
157
+
158
+ ```text
159
+ “How to maximize long-term reward”
160
+ ```
161
+
162
+ ---
163
+
164
+ ## 🔁 Why Reinforcement Learning?
165
+
166
+ Because this system is:
167
+
168
+ ```text
169
+ ✔ Dynamic (state keeps changing)
170
+ ✔ Multi-objective (speed vs fairness vs deadlines)
171
+ ✔ Sequential (each decision affects future)
172
+ ✔ Uncertain (random delays, missing docs)
173
+ ```
174
+
175
+ This makes RL a natural fit.
176
+
177
+ ---
178
+
179
+ ## 🏗️ What We Built
180
+
181
+ ---
182
+
183
+ ### 🔹 1. Simulation Environment
184
+
185
+ A realistic, controllable system that models:
186
+
187
+ * Workflow pipelines
188
+ * Resource constraints
189
+ * Delays and uncertainties
190
+ * Policy decisions
191
+
192
+ ---
193
+
194
+ ### 🔹 2. RL Training Pipeline
195
+
196
+ We trained an agent using **PPO (Proximal Policy Optimization)**:
197
+
198
+ * Runs through thousands of simulated steps
199
+ * Learns via trial and error
200
+ * Improves decision-making over time
201
+
202
+ ---
203
+
204
+ ### 🔹 3. Baseline vs RL Comparison
205
+
206
+ We compared against:
207
+
208
+ ```text
209
+ Heuristic Systems:
210
+ - FIFO
211
+ - Urgent-first
212
+ ```
213
+
214
+ ---
215
+
216
+ ## 📊 What Did We Observe?
217
+
218
+ Across all scenarios:
219
+
220
+ ```text
221
+ ✔ Reduced backlog
222
+ ✔ Fewer SLA breaches
223
+ ✔ Better completion rates
224
+ ```
225
+
226
+ The RL agent consistently **outperformed static policies**.
227
+
228
+ ---
229
+
230
+ ## 🎬 Making AI Explainable
231
+
232
+ AI systems often act like black boxes.
233
+
234
+ We solved this using a **storytelling frontend**:
235
+
236
+ * Timeline of decisions
237
+ * Agent reasoning (why a decision was taken)
238
+ * Impact indicators (what changed after each action)
239
+
240
+ ---
241
+
242
+ ```text
243
+ The system doesn’t just act — it explains.
244
+ ```
245
+
246
+ ---
247
+
248
+ ## 🧠 Addressing the Big Question
249
+
250
+ > “Is this just coded logic?”
251
+
252
+ ---
253
+
254
+ ### ❌ Static System
255
+
256
+ ```text
257
+ if backlog > X → do Y
258
+ ```
259
+
260
+ ---
261
+
262
+ ### ✅ RL System
263
+
264
+ ```text
265
+ policy(state) → action
266
+ ```
267
+
268
+ * Learns from experience
269
+ * Adapts to changing conditions
270
+ * Balances trade-offs dynamically
271
+
272
+ ---
273
+
274
+ ## 🌍 Why This Matters
275
+
276
+ This approach applies to:
277
+
278
+ * Government services
279
+ * Public infrastructure systems
280
+ * Large-scale workflow automation
281
+
282
+ It demonstrates:
283
+
284
+ ```text
285
+ Adaptive systems can outperform rule-based systems
286
+ ```
287
+
288
+ ---
289
+
290
+ ## 🚀 Final Thought
291
+
292
+ We didn’t just build a model.
293
+
294
+ We built a system that learns:
295
+
296
+ ```text
297
+ “How to make better decisions in complex workflows”
298
+ ```
299
+
300
+ ---
301
+
302
+ ## 📌 TL;DR
303
+
304
+ * Government workflows fail due to rigid rules
305
+ * We simulate them as an RL environment
306
+ * Train an agent to make adaptive decisions
307
+ * Result: improved efficiency, fairness, and scalability
308
+
309
+ ---
310
+
311
+ > From rules → to learning
312
+ > From static → to adaptive intelligence
313
+
314
+ ---
Dockerfile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gov Workflow OpenEnv
2
+ # Multi-stage image:
3
+ # 1) build Vite frontend assets
4
+ # 2) run FastAPI backend and serve built UI under /ui
5
+
6
+ FROM node:20-slim AS frontend-build
7
+ WORKDIR /web
8
+
9
+ COPY frontend/react/package.json frontend/react/package-lock.json ./frontend/react/
10
+ RUN cd frontend/react && npm ci --no-audit --no-fund
11
+
12
+ COPY frontend/react ./frontend/react
13
+ RUN cd frontend/react && npm run build
14
+
15
+
16
+ FROM python:3.11-slim AS runtime
17
+
18
+ ENV PYTHONDONTWRITEBYTECODE=1 \
19
+ PYTHONUNBUFFERED=1 \
20
+ PIP_NO_CACHE_DIR=1 \
21
+ OPENENV_DATA_DIR=/data/openenv_rl \
22
+ STORAGE_ENABLED=true \
23
+ PORT=7860
24
+
25
+ WORKDIR /app
26
+
27
+ # Runtime OS dependencies (torch/sb3 commonly require libgomp at runtime)
28
+ RUN apt-get update \
29
+ && apt-get install -y --no-install-recommends libgomp1 \
30
+ && rm -rf /var/lib/apt/lists/*
31
+
32
+ COPY requirements.txt requirements_rl.txt ./
33
+ RUN python -m pip install --upgrade pip \
34
+ && python -m pip install -r requirements.txt \
35
+ && python -m pip install -r requirements_rl.txt
36
+
37
+ COPY . .
38
+ COPY --from=frontend-build /web/frontend/react/dist ./app/web/vite_dist
39
+
40
+ RUN mkdir -p /data/openenv_rl \
41
+ && useradd --create-home --uid 10001 appuser \
42
+ && chown -R appuser:appuser /app /data/openenv_rl
43
+
44
+ USER appuser
45
+
46
+ EXPOSE 7860
47
+
48
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
49
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:7860/health', timeout=3)" || exit 1
50
+
51
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
GovWorkflow_RL_ENV.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Gov Workflow OpenEnv
3
+ sdk: docker
4
+ app_port: 7860
5
+ pinned: false
6
+ ---
7
+
8
+ # Gov Workflow OpenEnv
9
+
10
+ ## Quick Links
11
+
12
+ - Hugging Face Space URL (Dummy, update later): [https://huggingface.co/spaces/your-username/your-space-name](https://huggingface.co/spaces/your-username/your-space-name)
13
+ This placeholder will be replaced with the final deployed demo link.
14
+ - Blog path in codebase: `OPENENV_RL/Blog.md`
15
+ Project write-up and narrative documentation for design choices and outcomes.
16
+ - Notebook path: `OPENENV_RL/GovWorkflow_RL_ENV.ipynb`
17
+ Main OpenEnv RL government workflow notebook used as the judge-facing criteria book. It contains the practical judging context, environment setup, and the full end-to-end flow in one place.
18
+ - Notebook Colab URL: [https://colab.research.google.com/drive/1ssTnxKoU1nOfSNA3nOeiNM8S4fKFpkby?usp=sharing](https://colab.research.google.com/drive/1ssTnxKoU1nOfSNA3nOeiNM8S4fKFpkby?usp=sharing)
19
+ Cloud version of the same notebook so judges can run and review the complete workflow without local setup.
20
+ - GRPO Phase 1 training link: [https://colab.research.google.com/drive/1ND_DZ6xcT2JuH7uGB2AYbiZ1dcHKFfIw?usp=sharing](https://colab.research.google.com/drive/1ND_DZ6xcT2JuH7uGB2AYbiZ1dcHKFfIw?usp=sharing)
21
+ First-stage GRPO training run where the LLM agent starts learning policy behavior inside the RL environment.
22
+ - GRPO Phase 2 training link: [https://colab.research.google.com/drive/1ofxEADct_gTX5DGhcnk8lW6p31gFCIFV?usp=sharing](https://colab.research.google.com/drive/1ofxEADct_gTX5DGhcnk8lW6p31gFCIFV?usp=sharing)
23
+ Second-stage GRPO continuation where the same LLM agent is further trained and refined on the RL environment.
24
+ - PPO Phase 1 training (local): `rl/train_ppo.py`
25
+ Phase 1 PPO baseline training was executed on the local system to establish the RL algorithm baseline before phase-2 progression.
26
+ - PPO Phase 2 training link: [https://colab.research.google.com/drive/1RVXQs-QAuXLBw0YXJtN4cbEootCTfHO7?usp=sharing](https://colab.research.google.com/drive/1RVXQs-QAuXLBw0YXJtN4cbEootCTfHO7?usp=sharing)
27
+ PPO phase 2 training notebook where the RL algorithm is further trained on the same environment for improved policy performance.
28
+
29
+ Gov Workflow OpenEnv is a FastAPI-first simulation environment for public service workflow operations.
30
+ It models queue prioritization, officer allocation, missing-document recovery, escalation usage, and fairness-aware SLA management across government services.
31
+
32
+ This repository is productionized for:
33
+ - local development (FastAPI + Vite)
34
+ - Docker runtime
35
+ - Hugging Face Spaces (Docker SDK)
36
+
37
+ ## Current Main-Branch Status
38
+
39
+ This README is aligned to the current `main` branch code paths, including:
40
+ - `app.main:app` as primary server runtime
41
+ - React UI served at `/ui` from built Vite assets when available
42
+ - OpenEnv contract endpoints (`/reset`, `/step`, `/state`, `/grade`)
43
+ - frontend API aliases (`/api/*`) and versioned aliases (`/api/v1/*`)
44
+ - training story endpoints (`/training/*`)
45
+ - simulation, RL, persistence, compliance, and history endpoints
46
+
47
+ ## End-to-End Architecture
48
+
49
+ ```mermaid
50
+ flowchart LR
51
+ UI["React UI"] --> API["FastAPI app.main"]
52
+ API --> ENV["GovWorkflowEnv app/env.py"]
53
+ API --> SIM["Simulation runtime app/simulator.py"]
54
+ API --> RL["RL train/eval rl/*"]
55
+ API --> STORE["PersistenceStore SQLite + filesystem"]
56
+ API --> STORY["Training Story router /training/*"]
57
+ API --> OPENENV["Optional OpenEnv adapter /openenv/*"]
58
+ ```
59
+
60
+ ## Core Runtime Components
61
+
62
+ - API server: `app/main.py`
63
+ - Environment kernel: `app/env.py`
64
+ - Typed models: `app/models.py`
65
+ - Task registry: `app/tasks.py`
66
+ - Reward shaping: `app/reward.py`
67
+ - Deterministic graders: `app/graders.py`
68
+ - Simulation runtime: `app/simulator.py`
69
+ - Training jobs manager: `app/training_jobs.py`
70
+ - Persistence layer: `app/persistence.py`
71
+ - Transport gateway: `app/api_gateway.py`
72
+ - React frontend: `frontend/react`
73
+
74
+ ## Task Set (Current Runtime)
75
+
76
+ Configured in `app/tasks.py`:
77
+ - `district_backlog_easy`
78
+ - `mixed_urgency_medium`
79
+ - `cross_department_hard`
80
+ - `district_backlog_easy_extreme`
81
+
82
+ Benchmark list used by APIs:
83
+ - `district_backlog_easy`
84
+ - `mixed_urgency_medium`
85
+ - `cross_department_hard`
86
+
87
+ ## Service Coverage
88
+
89
+ `ServiceType` includes:
90
+ - `passport`
91
+ - `driving_license`
92
+ - `aadhaar_card`
93
+ - `gst_registration`
94
+ - `income_certificate`
95
+ - `caste_certificate`
96
+ - `birth_certificate`
97
+ - `land_registration`
98
+
99
+ Medium and hard tasks currently run with:
100
+ - `income_certificate`
101
+ - `land_registration`
102
+ - `passport`
103
+ - `driving_license`
104
+ - `aadhaar_card`
105
+
106
+
107
+
108
+ ## Local Development
109
+
110
+ ### Prerequisites
111
+
112
+ - Python 3.11+
113
+ - Node 20+
114
+ - Docker
115
+
116
+ ### Install dependencies
117
+
118
+ ```bash
119
+ pip install -r requirements.txt
120
+ pip install -r requirements_rl.txt
121
+ pip install pytest pytest-asyncio
122
+ npm --prefix frontend/react install
123
+ ```
124
+
125
+ ### Configure environment
126
+
127
+ ```bash
128
+ copy .env.example .env
129
+ ```
130
+
131
+ Populate as needed:
132
+ - `API_BASE_URL`
133
+ - `MODEL_NAME`
134
+ - `HF_TOKEN` or `OPENAI_API_KEY`/`API_KEY`
135
+ - optional NVIDIA keys (`NVIDIA_API_KEY`, `NVIDIA_API_KEY_2`)
136
+ - storage settings (`STORAGE_ENABLED`, `OPENENV_DATA_DIR`)
137
+
138
+ ### Run backend
139
+
140
+ ```bash
141
+ python scripts/run_local.py --host 127.0.0.1 --port 7860 --reload
142
+ ```
143
+
144
+ ### Run frontend
145
+
146
+ ```bash
147
+ npm --prefix frontend/react run dev
148
+ ```
149
+
150
+ Open:
151
+ - UI: `http://127.0.0.1:5173/ui`
152
+ - API docs: `http://127.0.0.1:7860/docs`
153
+
154
+
155
+
156
+
157
+ ## Repository Layout
158
+
159
+ ```text
160
+ app/
161
+ main.py FastAPI app + API routing + compatibility aliases
162
+ env.py GovWorkflowEnv kernel
163
+ models.py Typed Pydantic contracts
164
+ tasks.py Runtime task registry
165
+ reward.py Reward shaping
166
+ graders.py Deterministic graders
167
+ simulator.py Simulation runtime and live sessions
168
+ training_jobs.py Background RL training manager
169
+ persistence.py SQLite/filesystem persistence
170
+ api_gateway.py direct/http/auto environment transport layer
171
+ story_router.py training story endpoints
172
+ rl/
173
+ gov_workflow_env.py Gym adapter
174
+ train_ppo.py PPO phase training entrypoint
175
+ evaluate.py Checkpoint evaluator
176
+ feature_builder.py RL feature engineering
177
+ action_mask.py Action mask logic
178
+ frontend/react/
179
+ src/ React modules/components/api hooks
180
+ scripts/
181
+ run_local.py Local FastAPI launcher
182
+ convert_grpo_csv.py Training CSV to JSON converter for story endpoints
183
+ openenv.yaml OpenEnv manifest metadata
184
+ baseline_openai.py Baseline and LLM runner
185
+ inference.py Submission-style inference runner
186
+ Dockerfile Docker image definition
187
+ ```
188
+
189
+ ## License
190
+
191
+ BSD-3-Clause
app/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/
2
+
3
+ Core environment and API layer.
4
+
5
+ - `main.py`: FastAPI app and endpoints
6
+ - `env.py`: GovWorkflowEnv simulation kernel
7
+ - `models.py`: Pydantic action/observation/reward/state models
8
+ - `tasks.py`: easy/medium/hard deterministic task configs
9
+ - `graders.py`: deterministic task scoring (0.0 to 1.0)
10
+ - `reward.py`: dense reward breakdown
11
+ - `baselines.py`: heuristic baseline policies
12
+ - `web/`: frontend assets served by FastAPI at `/ui`
13
+ - `vite_dist/`: production Vite build output copied during Docker build
14
+ - legacy files (`index.html`, `react_app.js`, `styles.css`) remain as local fallback
15
+
16
+ Additional frontend-focused APIs in `main.py`:
17
+ - `/api/workflows/components`
18
+ - `/api/workflows/run`
19
+ - `/api/rl/models`
20
+ - `/api/rl/run`
21
+ - `/api/rl/evaluate`
22
+ - `/api/simulation/run`
23
+ - `/api/training/jobs`
app/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from app.env import GovWorkflowEnv
2
+ from app.models import ActionModel, ObservationModel, RewardModel
3
+
4
+ try:
5
+ from client import GovWorkflowClient
6
+ except ModuleNotFoundError:
7
+ GovWorkflowClient = None # type: ignore[assignment]
8
+
9
+ GovWorkflowAction = ActionModel
10
+ GovWorkflowObservation = ObservationModel
11
+
12
+ __all__ = [
13
+ "ActionModel",
14
+ "ObservationModel",
15
+ "RewardModel",
16
+ "GovWorkflowAction",
17
+ "GovWorkflowObservation",
18
+ # "GovWorkflowEnv",
19
+ "GovWorkflowClient",
20
+ ]
app/api_gateway.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified environment transport layer.
3
+
4
+ This module centralizes environment access so callers can use:
5
+ - FastAPI HTTP transport
6
+ - direct in-process transport
7
+ - dynamic auto selection
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ import os
14
+ from typing import Literal, Protocol
15
+
16
+ from app.env import GovWorkflowEnv
17
+ from app.graders import grade_episode
18
+ from app.models import ActionModel, ObservationModel, StepInfoModel
19
+
20
+
21
+ TransportMode = Literal["auto", "http", "direct"]
22
+
23
+
24
+ class EnvGateway(Protocol):
25
+ transport: TransportMode
26
+ terminated: bool
27
+ truncated: bool
28
+
29
+ def reset(self) -> ObservationModel: ...
30
+
31
+ def step(
32
+ self, action: ActionModel
33
+ ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]: ...
34
+
35
+ def grade(self) -> tuple[float, str, dict[str, float]]: ...
36
+
37
+ def close(self) -> None: ...
38
+
39
+
40
+ @dataclass
41
+ class DirectEnvGateway:
42
+ task_id: str
43
+ seed: int
44
+ transport: TransportMode = "direct"
45
+
46
+ def __post_init__(self) -> None:
47
+ self._env = GovWorkflowEnv(task_id=self.task_id)
48
+ self.terminated = False
49
+ self.truncated = False
50
+
51
+ def reset(self) -> ObservationModel:
52
+ obs, _ = self._env.reset(seed=self.seed)
53
+ self.terminated = False
54
+ self.truncated = False
55
+ return obs
56
+
57
+ def step(
58
+ self, action: ActionModel
59
+ ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
60
+ obs, reward, terminated, truncated, info = self._env.step(action)
61
+ self.terminated = bool(terminated)
62
+ self.truncated = bool(truncated)
63
+ return obs, float(reward), bool(terminated), bool(truncated), info
64
+
65
+ def grade(self) -> tuple[float, str, dict[str, float]]:
66
+ result = grade_episode(self._env.state())
67
+ return float(result.score), str(result.grader_name), dict(result.metrics)
68
+
69
+ def close(self) -> None:
70
+ close_fn = getattr(self._env, "close", None)
71
+ if callable(close_fn):
72
+ close_fn()
73
+
74
+
75
+ @dataclass
76
+ class HttpEnvGateway:
77
+ task_id: str
78
+ seed: int
79
+ base_url: str
80
+ api_prefix: str | None = None
81
+ transport: TransportMode = "http"
82
+
83
+ def __post_init__(self) -> None:
84
+ try:
85
+ import requests as _requests
86
+ except ImportError as exc:
87
+ raise ImportError("requests is required for HTTP transport.") from exc
88
+ self._requests = _requests
89
+ self._session_id: str | None = None
90
+ self.terminated = False
91
+ self.truncated = False
92
+ self.base_url = self.base_url.rstrip("/")
93
+ self._resolved_prefix = self._normalize_prefix(self.api_prefix)
94
+
95
+ @staticmethod
96
+ def _normalize_prefix(prefix: str | None) -> str:
97
+ if prefix is None:
98
+ return ""
99
+ p = str(prefix).strip()
100
+ if not p:
101
+ return ""
102
+ if not p.startswith("/"):
103
+ p = "/" + p
104
+ return p.rstrip("/")
105
+
106
+ @staticmethod
107
+ def _candidate_prefixes(explicit_prefix: str | None) -> list[str]:
108
+ normalized_explicit = HttpEnvGateway._normalize_prefix(explicit_prefix)
109
+ if normalized_explicit:
110
+ return [normalized_explicit]
111
+
112
+ env_prefix = HttpEnvGateway._normalize_prefix(os.getenv("OPENENV_ENV_API_PREFIX", ""))
113
+ configured_candidates = os.getenv("OPENENV_ENV_API_PREFIX_CANDIDATES", "")
114
+
115
+ candidates: list[str] = []
116
+ for item in [env_prefix, *configured_candidates.split(",")]:
117
+ normalized = HttpEnvGateway._normalize_prefix(item)
118
+ if normalized not in candidates:
119
+ candidates.append(normalized)
120
+
121
+ # Ordered fallbacks: versioned API -> frontend API -> root OpenEnv API.
122
+ for fallback in ["/api/v1", "/api", ""]:
123
+ if fallback not in candidates:
124
+ candidates.append(fallback)
125
+ return candidates
126
+
127
+ def _resolve_prefix(self) -> str:
128
+ if self._resolved_prefix:
129
+ return self._resolved_prefix
130
+ for prefix in self._candidate_prefixes(self.api_prefix):
131
+ try:
132
+ response = self._requests.get(
133
+ f"{self.base_url}{prefix}/health",
134
+ timeout=3,
135
+ )
136
+ if response.ok:
137
+ self._resolved_prefix = prefix
138
+ return self._resolved_prefix
139
+ except Exception:
140
+ continue
141
+ self._resolved_prefix = ""
142
+ return self._resolved_prefix
143
+
144
+ def _url(self, path: str) -> str:
145
+ return f"{self.base_url}{self._resolve_prefix()}{path}"
146
+
147
+ def _post(self, path: str, body: dict) -> dict:
148
+ response = self._requests.post(
149
+ self._url(path),
150
+ json=body,
151
+ timeout=30,
152
+ )
153
+ response.raise_for_status()
154
+ return response.json()
155
+
156
+ def reset(self) -> ObservationModel:
157
+ payload = {"task_id": self.task_id, "seed": self.seed}
158
+ data = self._post("/reset", payload)
159
+ self._session_id = str(data["session_id"])
160
+ self.terminated = False
161
+ self.truncated = False
162
+ return ObservationModel(**data["observation"])
163
+
164
+ def step(
165
+ self, action: ActionModel
166
+ ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
167
+ if not self._session_id:
168
+ raise RuntimeError("Session is not initialized. Call reset() first.")
169
+ data = self._post(
170
+ "/step",
171
+ {
172
+ "session_id": self._session_id,
173
+ "action": action.model_dump(exclude_none=True, mode="json"),
174
+ },
175
+ )
176
+ obs = ObservationModel(**data["observation"])
177
+ info = StepInfoModel(**data["info"])
178
+ self.terminated = bool(data["terminated"])
179
+ self.truncated = bool(data["truncated"])
180
+ return (
181
+ obs,
182
+ float(data["reward"]),
183
+ bool(data["terminated"]),
184
+ bool(data["truncated"]),
185
+ info,
186
+ )
187
+
188
+ def grade(self) -> tuple[float, str, dict[str, float]]:
189
+ if not self._session_id:
190
+ raise RuntimeError("Session is not initialized. Call reset() first.")
191
+ data = self._post("/grade", {"session_id": self._session_id})
192
+ return (
193
+ float(data["score"]),
194
+ str(data["grader_name"]),
195
+ dict(data.get("metrics", {})),
196
+ )
197
+
198
+ def close(self) -> None:
199
+ if not self._session_id:
200
+ return
201
+ try:
202
+ self._requests.delete(self._url(f"/sessions/{self._session_id}"), timeout=10)
203
+ except Exception:
204
+ pass
205
+ self._session_id = None
206
+
207
+
208
+ def _http_reachable(base_url: str) -> bool:
209
+ try:
210
+ import requests
211
+ r = requests.get(f"{base_url.rstrip('/')}/health", timeout=3)
212
+ return bool(r.ok)
213
+ except Exception:
214
+ return False
215
+
216
+
217
+ def create_env_gateway(
218
+ *,
219
+ task_id: str,
220
+ seed: int,
221
+ mode: TransportMode = "auto",
222
+ base_url: str = "http://127.0.0.1:7860",
223
+ api_prefix: str | None = None,
224
+ enforce_fastapi: bool = False,
225
+ ) -> EnvGateway:
226
+ """
227
+ Create environment gateway with dynamic transport selection.
228
+
229
+ Behavior:
230
+ - mode=http -> always HTTP
231
+ - mode=direct -> always direct (unless enforce_fastapi=True)
232
+ - mode=auto -> HTTP if /health reachable, else direct fallback
233
+ """
234
+ if enforce_fastapi and mode == "direct":
235
+ raise RuntimeError("Direct transport is disabled. Set mode to 'http' or 'auto'.")
236
+
237
+ if mode == "http":
238
+ return HttpEnvGateway(task_id=task_id, seed=seed, base_url=base_url, api_prefix=api_prefix)
239
+
240
+ if mode == "direct":
241
+ return DirectEnvGateway(task_id=task_id, seed=seed)
242
+
243
+ if _http_reachable(base_url):
244
+ return HttpEnvGateway(
245
+ task_id=task_id,
246
+ seed=seed,
247
+ base_url=base_url,
248
+ api_prefix=api_prefix,
249
+ transport="auto",
250
+ )
251
+
252
+ if enforce_fastapi:
253
+ raise RuntimeError(
254
+ f"FastAPI gateway is required but unavailable at {base_url}. "
255
+ "Start the API server or disable FORCE_FASTAPI_GATEWAY."
256
+ )
257
+ return DirectEnvGateway(task_id=task_id, seed=seed, transport="auto")
app/baselines.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from collections.abc import Callable
3
+ from types import SimpleNamespace
4
+ from app.env import GovWorkflowEnv
5
+ from app.graders import grade_episode
6
+ from app.models import ActionModel, ActionType, ObservationModel, PriorityMode, ServiceType
7
+
8
+ PolicyFn = Callable[[ObservationModel], ActionModel]
9
+
10
+
11
+ def _snapshots(obs: ObservationModel):
12
+ """Return queue snapshots as a list regardless of Phase 1 (list) or Phase 2 (dict)."""
13
+ qs = obs.queue_snapshots
14
+ if isinstance(qs, dict):
15
+ return list(qs.values())
16
+ return list(qs)
17
+
18
+
19
+ def _service_attr(q, *attrs):
20
+ """Return the first attribute that exists on a QueueSnapshot (Phase 1 vs Phase 2 names)."""
21
+ for attr in attrs:
22
+ val = getattr(q, attr, None)
23
+ if val is not None:
24
+ return val
25
+ return 0
26
+
27
+
28
+ def _service_name(q) -> ServiceType:
29
+ """Return ServiceType regardless of Phase 1 (.service) or Phase 2 (.service_type)."""
30
+ return getattr(q, "service_type", None) or getattr(q, "service", None)
31
+
32
+
33
+ def _service_with_max(obs: ObservationModel, *attrs) -> ServiceType | None:
34
+ snaps = _snapshots(obs)
35
+ ranked = sorted(snaps, key=lambda s: _service_attr(s, *attrs), reverse=True)
36
+ if ranked and _service_attr(ranked[0], *attrs) > 0:
37
+ return _service_name(ranked[0])
38
+ return None
39
+
40
+
41
+ def _reserve_officers(obs: ObservationModel) -> int:
42
+ pool = obs.officer_pool
43
+ # Phase 2: idle_officers property
44
+ if hasattr(pool, "idle_officers"):
45
+ return int(pool.idle_officers)
46
+ # Phase 1 fallback
47
+ return int(getattr(pool, "reserve_officers", 0))
48
+
49
+
50
+ def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
51
+ pool = obs.officer_pool
52
+ # Phase 2 uses 'allocated'; Phase 1 used 'allocations'
53
+ alloc_dict = getattr(pool, "allocated", None) or getattr(pool, "allocations", {})
54
+ raw = alloc_dict.get(service)
55
+ if raw is None:
56
+ raw = alloc_dict.get(service.value if hasattr(service, "value") else str(service), 0)
57
+ return int(raw or 0)
58
+
59
+
60
+ def urgent_first_policy(obs: ObservationModel) -> ActionModel:
61
+ target = _service_with_max(obs, "urgent_pending", "urgent_cases")
62
+ if target:
63
+ return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
64
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
65
+
66
+
67
+ def oldest_first_policy(obs: ObservationModel) -> ActionModel:
68
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
69
+
70
+
71
+ def backlog_clearance_policy(obs: ObservationModel) -> ActionModel:
72
+ snaps = _snapshots(obs)
73
+
74
+ # Assign idle officers to the most backlogged service
75
+ if _reserve_officers(obs) > 0:
76
+ target = _service_with_max(obs, "total_pending", "active_cases")
77
+ if target:
78
+ return ActionModel(
79
+ action_type=ActionType.ASSIGN_CAPACITY,
80
+ service_target=target,
81
+ capacity_assignment={target.value: 1},
82
+ )
83
+
84
+ # Clear missing-doc bottlenecks
85
+ target = _service_with_max(obs, "blocked_missing_docs", "missing_docs_cases")
86
+ if target:
87
+ return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
88
+
89
+ # Reallocate from least-loaded to most-loaded
90
+ if len(snaps) >= 2:
91
+ hot = sorted(snaps, key=lambda s: _service_attr(s, "total_pending", "active_cases"), reverse=True)
92
+ cold = sorted(snaps, key=lambda s: _service_attr(s, "total_pending", "active_cases"))
93
+ hot_svc = _service_name(hot[0])
94
+ cold_svc = _service_name(cold[0])
95
+ hot_load = _service_attr(hot[0], "total_pending", "active_cases")
96
+ cold_load = _service_attr(cold[0], "total_pending", "active_cases")
97
+ if (
98
+ hot_svc and cold_svc and hot_svc != cold_svc
99
+ and hot_load - cold_load >= 3
100
+ and _alloc_for(obs, cold_svc) > 1
101
+ ):
102
+ return ActionModel(
103
+ action_type=ActionType.REALLOCATE_OFFICERS,
104
+ service_target=cold_svc,
105
+ reallocation_delta={cold_svc.value: -1, hot_svc.value: 1},
106
+ )
107
+
108
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
109
+
110
+
111
+ def greedy_sla_policy(obs: ObservationModel) -> ActionModel:
112
+ """SLA-focused fallback policy used by historical aliases."""
113
+ target = _service_with_max(obs, "urgent_pending", "urgent_cases", "breached_cases")
114
+ if target:
115
+ return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
116
+ return backlog_clearance_policy(obs)
117
+
118
+
119
+ def random_policy(obs: ObservationModel) -> ActionModel:
120
+ import random
121
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
122
+
123
+ urgent_first_policy = greedy_sla_policy
124
+ fairness_aware_policy = backlog_clearance_policy
125
+
126
+ POLICIES: dict[str, PolicyFn] = {
127
+ "urgent_first": greedy_sla_policy,
128
+ "oldest_first": oldest_first_policy,
129
+ "backlog_clearance": backlog_clearance_policy,
130
+ "random_policy": random_policy,
131
+ "greedy_sla_policy": greedy_sla_policy,
132
+ "fairness_aware_policy": fairness_aware_policy,
133
+ }
134
+
135
+
136
+ def run_policy_episode(task_id: str, policy_name: str, seed: int | None = None, max_steps: int = 500) -> dict:
137
+ env = GovWorkflowEnv(task_id=task_id)
138
+ obs, _ = env.reset(seed=seed)
139
+ policy = POLICIES[policy_name]
140
+ reward_sum = 0.0
141
+ for _ in range(max_steps):
142
+ action = policy(obs)
143
+ obs, reward, terminated, truncated, _ = env.step(action)
144
+ reward_sum += reward
145
+ if terminated or truncated:
146
+ break
147
+ state = env.state()
148
+ grade = grade_episode(state)
149
+ # Return a SimpleNamespace so attribute access (result.score) works in main.py
150
+ return SimpleNamespace(
151
+ task_id=task_id,
152
+ policy=policy_name,
153
+ seed=state.seed,
154
+ reward_sum=round(reward_sum, 4),
155
+ score=float(grade.score),
156
+ grader=grade.grader_name,
157
+ metrics=grade.metrics,
158
+ steps=int(state.total_steps),
159
+ completed=int(state.total_completed),
160
+ backlog=int(state.total_backlog),
161
+ )
app/config.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Path bootstrap ─────────────────────────────────────────────────────────────
2
+ from __future__ import annotations
3
+ from pathlib import Path
4
+
5
+ # Load .env file if it exists — must happen before Pydantic Settings reads env vars
6
+ try:
7
+ from dotenv import load_dotenv
8
+ except (ImportError, AttributeError):
9
+ # Keep runtime functional even when python-dotenv is not installed
10
+ # or when a conflicting `dotenv` package is present.
11
+ def load_dotenv(*args, **kwargs): # type: ignore[no-redef]
12
+ return False
13
+ _ENV_FILE = Path(__file__).resolve().parent.parent / ".env"
14
+ load_dotenv(dotenv_path=_ENV_FILE, override=False)
15
+ # override=False means real environment variables always win over .env values
16
+ # ──────────────────────────────────────────────────────────────────────────────
17
+
18
+ from pydantic import Field
19
+ from pydantic_settings import BaseSettings, SettingsConfigDict
20
+
21
+
22
+ class ServerSettings(BaseSettings):
23
+ """
24
+ HTTP-server configuration.
25
+ Read from environment variables prefixed SERVER_.
26
+ Example: SERVER_PORT=8080 SERVER_LOG_LEVEL=debug
27
+
28
+ Intentionally isolated from EnvSettings — changing server bind
29
+ options never affects simulation behaviour, and vice-versa.
30
+ Both classes are instantiated once at import and treated as
31
+ read-only singletons for the lifetime of the process.
32
+ """
33
+
34
+ host: str = Field("0.0.0.0", description="Bind host")
35
+ port: int = Field(7860, description="Bind port — HF Spaces default is 7860")
36
+ log_level: str = Field(
37
+ "info", description="Uvicorn log level: debug | info | warning | error"
38
+ )
39
+ cors_origins: list[str] = Field(
40
+ default=["*"],
41
+ description="Allowed CORS origins. '*' is required for HF Spaces embedding.",
42
+ )
43
+ # NOTE: Keep at 1 when using the in-memory session store.
44
+ # Multiple workers do NOT share process memory.
45
+ # Use Redis + a shared store before increasing workers in production.
46
+ workers: int = Field(
47
+ 1, description="Uvicorn worker count — keep at 1 for in-memory sessions"
48
+ )
49
+
50
+ model_config = SettingsConfigDict(env_prefix="SERVER_", extra="ignore")
51
+
52
+
53
+ class EnvSettings(BaseSettings):
54
+ """
55
+ Simulation-environment defaults.
56
+ Read from environment variables prefixed ENV_.
57
+ Example: ENV_DEFAULT_TASK_ID=mixed_urgency_medium ENV_MAX_SESSIONS=50
58
+
59
+ Controls the environment kernel only. No effect on network
60
+ binding, logging, or CORS — those belong to ServerSettings.
61
+ """
62
+
63
+ default_task_id: str = Field(
64
+ "district_backlog_easy",
65
+ description="Task used when POST /reset is called without an explicit task_id",
66
+ )
67
+ default_seed: int = Field(
68
+ 11,
69
+ description="Seed used when POST /reset is called without an explicit seed",
70
+ )
71
+ max_steps_per_episode: int = Field(
72
+ 500,
73
+ description="Hard cap on step() calls per session before episode is truncated",
74
+ )
75
+ max_sessions: int = Field(
76
+ 100,
77
+ description="Maximum concurrent in-memory sessions. Oldest is evicted when exceeded.",
78
+ )
79
+
80
+ model_config = SettingsConfigDict(env_prefix="ENV_", extra="ignore")
81
+
82
+
83
+ # ── Singletons ────────────────────────────────────────────────────────────────
84
+ # Loaded exactly once at import time. Never mutated at runtime.
85
+ # Tests may monkeypatch individual fields after import if needed.
86
+ server_settings = ServerSettings()
87
+ env_settings = EnvSettings()
app/engine.py ADDED
@@ -0,0 +1,1712 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import random
6
+ import re
7
+ from dataclasses import dataclass
8
+ from enum import Enum
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, Literal, Optional
11
+
12
+ from openai import OpenAI
13
+
14
+ from app.event_engine import EventEngine
15
+ from app.models import (
16
+ ActionModel,
17
+ ActionType,
18
+ ApplicationCase,
19
+ DelayedEffect,
20
+ EventType,
21
+ IntakeChannel,
22
+ InternalSubstate,
23
+ ObservationModel,
24
+ PriorityMode,
25
+ QueueSnapshot,
26
+ ServiceType,
27
+ StageType,
28
+ )
29
+ from app.sector_profiles import get_sector_profile
30
+ from app.state_machine import can_advance
31
+
32
+ if TYPE_CHECKING:
33
+ from app.models import TaskConfig
34
+
35
+
36
+ LEGACY_NVIDIA_MODEL_POOL = [
37
+ "meta/llama-3.3-70b-instruct",
38
+ "qwen/qwen3-next-80b-a3b-instruct",
39
+ "moonshotai/kimi-k2-instruct-0905",
40
+ "meta/llama-3.1-405b-instruct",
41
+ "deepseek-ai/deepseek-v3.2",
42
+ "qwen/qwq-32b",
43
+ "mistralai/mixtral-8x22b-instruct-v0.1",
44
+ "google/gemma-3-27b-it",
45
+ "microsoft/phi-4-mini-instruct",
46
+ "meta/llama-3.1-8b-instruct",
47
+ ]
48
+
49
+ _MODEL_CACHE: dict[tuple[str, str], Any] = {}
50
+
51
+
52
+ # ─────────────────────────────────────────────
53
+ # DAY RESULT
54
+ # ─────────────────────────────────────────────
55
+
56
+
57
+ class DayResult:
58
+ def __init__(self) -> None:
59
+ self.new_arrivals: int = 0
60
+ self.new_completions: int = 0
61
+ self.new_sla_breaches: int = 0
62
+ self.total_capacity_days: int = 0
63
+ self.idle_officer_days: int = 0
64
+ self.stage_advances: int = 0
65
+ self.newly_unblocked_missing: int = 0
66
+ self.newly_blocked_missing: int = 0
67
+ self.newly_unblocked_enrich: int = 0
68
+ self.field_verif_completed: int = 0
69
+ self.urgent_completed: int = 0
70
+ self.digital_arrivals: int = 0
71
+ self.active_events: list[EventType] = []
72
+
73
+
74
+ # ─────────────────────────────────────────────
75
+ # DAY SIMULATOR
76
+ # ─────────────────────────────────────────────
77
+
78
+
79
+ class DaySimulator:
80
+ """
81
+ Core daily simulation engine.
82
+
83
+ Accepts TWO calling conventions so both env.py and tests work:
84
+
85
+ Convention A (tests):
86
+ DaySimulator(task_config=task, rng=rng, event_engine=engine)
87
+
88
+ Convention B (env.py legacy):
89
+ DaySimulator(seed=42, task_config=task, sector_registry={})
90
+ — in this case rng and event_engine are built internally.
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ task_config: "TaskConfig",
96
+ rng: Optional[random.Random] = None,
97
+ event_engine: Optional[EventEngine] = None,
98
+ seed: Optional[int] = None,
99
+ sector_registry: Optional[dict] = None,
100
+ ) -> None:
101
+ self.task_config = task_config
102
+ self.task = task_config
103
+
104
+ if rng is not None:
105
+ self.rng = rng
106
+ elif seed is not None:
107
+ self.rng = random.Random(seed)
108
+ else:
109
+ self.rng = random.Random(task_config.seed)
110
+
111
+ if event_engine is not None:
112
+ self.event_engine = event_engine
113
+ else:
114
+ _seed = seed if seed is not None else task_config.seed
115
+ self.event_engine = EventEngine(
116
+ seed=_seed,
117
+ scenario_mode=task_config.scenario_mode,
118
+ )
119
+
120
+ self.sector_registry = sector_registry or {}
121
+ self.active_cases: list[ApplicationCase] = []
122
+ self.pending_effects: list[DelayedEffect] = []
123
+ self.case_counter: int = 0
124
+
125
+ def simulate_day(
126
+ self,
127
+ day: int,
128
+ active_cases: list[ApplicationCase],
129
+ completed_cases: list[ApplicationCase],
130
+ priority_mode: PriorityMode,
131
+ officer_allocations: dict,
132
+ ) -> DayResult:
133
+ result = DayResult()
134
+
135
+ events = self.event_engine.get_events_for_day(day, self.task_config)
136
+ params = self.event_engine.apply_events(events, self.task_config)
137
+ result.active_events = list(params.active_events)
138
+
139
+ new_cases = self._spawn_arrivals(day, params, result)
140
+ active_cases.extend(new_cases)
141
+
142
+ effective_alloc = self._apply_officer_reduction(officer_allocations, params)
143
+
144
+ self._resolve_field_verification(day, active_cases, result)
145
+ self._resolve_doc_requests(day, active_cases, result)
146
+
147
+ newly_completed: list[ApplicationCase] = []
148
+
149
+ for service in self.task_config.enabled_services:
150
+ capacity = effective_alloc.get(service, effective_alloc.get(service.value, 0))
151
+ result.total_capacity_days += int(capacity)
152
+
153
+ service_cases = [
154
+ c
155
+ for c in active_cases
156
+ if c.service_type == service and not c.completed and not c.rejected
157
+ ]
158
+
159
+ if not service_cases:
160
+ result.idle_officer_days += int(capacity)
161
+ continue
162
+
163
+ sorted_cases = self._sort_queue(service_cases, priority_mode)
164
+
165
+ for case in sorted_cases:
166
+ if capacity <= 0:
167
+ break
168
+
169
+ from app.state_machine import advance_case
170
+
171
+ advanced, final = advance_case(case, day)
172
+
173
+ if advanced:
174
+ capacity -= 1
175
+ result.stage_advances += 1
176
+ if final:
177
+ newly_completed.append(case)
178
+ if case.is_urgent:
179
+ result.urgent_completed += 1
180
+
181
+ if newly_completed:
182
+ done_ids = {c.case_id for c in newly_completed}
183
+ still_active = [c for c in active_cases if c.case_id not in done_ids]
184
+ active_cases.clear()
185
+ active_cases.extend(still_active)
186
+ completed_cases.extend(newly_completed)
187
+ result.new_completions = len(newly_completed)
188
+
189
+ for case in active_cases:
190
+ case.current_day = day
191
+ case.waiting_days += 1
192
+ if day > case.sla_deadline_day and not case.sla_breached:
193
+ case.sla_breached = True
194
+ result.new_sla_breaches += 1
195
+
196
+ return result
197
+
198
+ def _apply_officer_reduction(self, allocations: dict, params: Any) -> dict:
199
+ reduction = int(getattr(params, "officer_reduction", 0))
200
+ if reduction <= 0:
201
+ return dict(allocations)
202
+
203
+ effective = dict(allocations)
204
+ for _ in range(reduction):
205
+ target = max(effective, key=lambda k: effective[k], default=None)
206
+ if target is None or effective[target] <= 0:
207
+ break
208
+ effective[target] -= 1
209
+ return effective
210
+
211
+ def _spawn_arrivals(
212
+ self,
213
+ day: int,
214
+ params: Any,
215
+ result: DayResult,
216
+ ) -> list[ApplicationCase]:
217
+ new_cases: list[ApplicationCase] = []
218
+
219
+ for service in self.task_config.enabled_services:
220
+ base_rate = self.task_config.arrival_rate_per_day.get(
221
+ service,
222
+ self.task_config.arrival_rate_per_day.get(service.value, 0.0),
223
+ )
224
+ effective_rate = float(base_rate) * float(getattr(params, "arrival_multiplier", 1.0))
225
+ count = int(effective_rate)
226
+ if self.rng.random() < (effective_rate - count):
227
+ count += 1
228
+
229
+ for _ in range(count):
230
+ case = self._new_case(service, day, params)
231
+ new_cases.append(case)
232
+ if case.intake_channel == IntakeChannel.DIGITAL:
233
+ result.digital_arrivals += 1
234
+
235
+ result.new_arrivals = len(new_cases)
236
+ return new_cases
237
+
238
+ def _new_case(self, service: ServiceType, day: int, params: Any) -> ApplicationCase:
239
+ self.case_counter += 1
240
+ profile = get_sector_profile(service)
241
+
242
+ sla_days = int(profile.sla_days * getattr(params, "sla_window_multiplier", 1.0))
243
+ sla_deadline_day = day + sla_days
244
+
245
+ digital_ratio = self.task_config.digital_intake_ratio
246
+ channel = (
247
+ IntakeChannel.DIGITAL
248
+ if self.rng.random() < digital_ratio
249
+ else IntakeChannel.PAPER
250
+ )
251
+
252
+ base_missing = profile.missing_docs_probability
253
+ override = (self.task_config.missing_docs_probability_override or {}).get(
254
+ service,
255
+ (self.task_config.missing_docs_probability_override or {}).get(service.value),
256
+ )
257
+ if override is not None:
258
+ base_missing = override
259
+
260
+ defect_rate = (
261
+ profile.doc_defect_rate_digital
262
+ if channel == IntakeChannel.DIGITAL
263
+ else profile.doc_defect_rate_paper
264
+ )
265
+ eff_missing = min(
266
+ 1.0,
267
+ base_missing + getattr(params, "doc_defect_rate_boost", 0.0) * defect_rate,
268
+ )
269
+ has_missing = self.rng.random() < eff_missing
270
+
271
+ base_fv = profile.field_verification_probability
272
+ fv_override = (self.task_config.field_verification_probability_override or {}).get(
273
+ service,
274
+ (self.task_config.field_verification_probability_override or {}).get(service.value),
275
+ )
276
+ if fv_override is not None:
277
+ base_fv = fv_override
278
+
279
+ eff_fv = min(1.0, base_fv + getattr(params, "field_verification_boost", 0.0))
280
+ has_fv = self.rng.random() < eff_fv
281
+ field_completion_day = day + profile.field_verification_days if has_fv else None
282
+
283
+ from app.models import UrgencyProfile
284
+
285
+ urgency_profile = profile.urgency_profile
286
+ is_urgent = (
287
+ urgency_profile == UrgencyProfile.HIGH and self.rng.random() < 0.20
288
+ ) or (
289
+ urgency_profile == UrgencyProfile.MODERATE and self.rng.random() < 0.08
290
+ )
291
+
292
+ return ApplicationCase(
293
+ case_id=f"case-{self.case_counter:06d}",
294
+ service_type=service,
295
+ arrival_day=day,
296
+ current_day=day,
297
+ sla_deadline_day=sla_deadline_day,
298
+ intake_channel=channel,
299
+ internal_substate=(
300
+ InternalSubstate.BLOCKED_MISSING_DOCS
301
+ if has_missing
302
+ else InternalSubstate.PRE_SCRUTINY
303
+ ),
304
+ public_stage=StageType.SUBMISSION,
305
+ is_urgent=is_urgent,
306
+ has_missing_docs=has_missing,
307
+ field_verification_required=has_fv,
308
+ field_verification_completion_day=field_completion_day,
309
+ )
310
+
311
+ def _resolve_field_verification(
312
+ self,
313
+ day: int,
314
+ active_cases: list[ApplicationCase],
315
+ result: DayResult,
316
+ ) -> None:
317
+ for case in active_cases:
318
+ if (
319
+ case.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
320
+ and case.field_verification_completion_day is not None
321
+ and day >= case.field_verification_completion_day
322
+ ):
323
+ case.internal_substate = InternalSubstate.PRE_SCRUTINY
324
+ case.field_verification_completion_day = None
325
+ result.field_verif_completed += 1
326
+
327
+ def _resolve_doc_requests(
328
+ self,
329
+ day: int,
330
+ active_cases: list[ApplicationCase],
331
+ result: DayResult,
332
+ ) -> None:
333
+ for case in active_cases:
334
+ if (
335
+ case.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
336
+ and case.doc_resolution_day is not None
337
+ and day >= case.doc_resolution_day
338
+ ):
339
+ case.internal_substate = InternalSubstate.PRE_SCRUTINY
340
+ case.doc_resolution_day = None
341
+ result.newly_unblocked_missing += 1
342
+
343
+ def _sort_queue(
344
+ self,
345
+ cases: list[ApplicationCase],
346
+ priority_mode: PriorityMode,
347
+ ) -> list[ApplicationCase]:
348
+ eligible = [c for c in cases if can_advance(c)]
349
+
350
+ if priority_mode == PriorityMode.URGENT_FIRST:
351
+ return sorted(
352
+ eligible,
353
+ key=lambda c: (not c.is_urgent, -c.sla_risk, c.arrival_day),
354
+ )
355
+
356
+ if priority_mode == PriorityMode.OLDEST_FIRST:
357
+ return sorted(eligible, key=lambda c: c.arrival_day)
358
+
359
+ if priority_mode == PriorityMode.BACKLOG_CLEARANCE:
360
+ return sorted(
361
+ eligible,
362
+ key=lambda c: (-c.sla_risk, not c.is_urgent, c.arrival_day),
363
+ )
364
+
365
+ return sorted(
366
+ eligible,
367
+ key=lambda c: (
368
+ -c.sla_risk if c.sla_risk > 0.8 else 0,
369
+ not c.is_urgent,
370
+ c.arrival_day,
371
+ ),
372
+ )
373
+
374
+ def build_queue_snapshot(
375
+ self,
376
+ service: ServiceType,
377
+ active_cases: list[ApplicationCase],
378
+ day: int,
379
+ ) -> QueueSnapshot:
380
+ cases = [
381
+ c
382
+ for c in active_cases
383
+ if c.service_type == service and not c.completed and not c.rejected
384
+ ]
385
+
386
+ stage_counts = {s.value: 0 for s in StageType}
387
+ for c in cases:
388
+ stage_counts[c.public_stage.value] = stage_counts.get(c.public_stage.value, 0) + 1
389
+
390
+ oldest_age = max((c.waiting_days for c in cases), default=0)
391
+ avg_wait = sum(c.waiting_days for c in cases) / len(cases) if cases else 0.0
392
+ sla_risk = sum(c.sla_risk for c in cases) / len(cases) if cases else 0.0
393
+
394
+ return QueueSnapshot(
395
+ service_type=service,
396
+ public_stage_counts=stage_counts,
397
+ total_pending=len(cases),
398
+ total_completed_today=0,
399
+ total_sla_breached=sum(1 for c in cases if c.sla_breached),
400
+ urgent_pending=sum(1 for c in cases if c.is_urgent),
401
+ blocked_missing_docs=sum(
402
+ 1
403
+ for c in cases
404
+ if c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
405
+ ),
406
+ field_verification_pending=sum(
407
+ 1
408
+ for c in cases
409
+ if c.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
410
+ ),
411
+ oldest_case_age_days=oldest_age,
412
+ avg_waiting_days=round(avg_wait, 2),
413
+ current_sla_risk=round(min(1.0, sla_risk), 3),
414
+ )
415
+
416
+
417
+ # ─────────────────────────────────────────────
418
+ # HIGH-LEVEL SIMULATION ORCHESTRATION
419
+ # ─────────────────────────────────────────────
420
+
421
+
422
+ class SimulationAgentMode(str, Enum):
423
+ BASELINE_POLICY = "baseline_policy"
424
+ LLM_INFERENCE = "llm_inference"
425
+ TRAINED_RL = "trained_rl"
426
+
427
+
428
+ @dataclass
429
+ class SimulationRun:
430
+ task_id: str
431
+ agent_mode: SimulationAgentMode
432
+ seed: int
433
+ total_reward: float
434
+ score: float
435
+ grader_name: str
436
+ summary: dict[str, Any]
437
+ trace: list[dict[str, Any]]
438
+
439
+
440
+ def _dedupe(values: list[str | None]) -> list[str]:
441
+ out: list[str] = []
442
+ for value in values:
443
+ if value is None:
444
+ continue
445
+ v = str(value).strip()
446
+ if v and v not in out:
447
+ out.append(v)
448
+ return out
449
+
450
+
451
+ def _env_csv_list(name: str) -> list[str]:
452
+ raw = os.getenv(name, "").strip()
453
+ if not raw:
454
+ return []
455
+ return [x.strip() for x in raw.split(",") if x.strip()]
456
+
457
+
458
+ def _extract_json_object(text: str) -> dict[str, Any] | None:
459
+ text = (text or "").strip()
460
+ if not text:
461
+ return None
462
+ try:
463
+ parsed = json.loads(text)
464
+ if isinstance(parsed, dict):
465
+ return parsed
466
+ except json.JSONDecodeError:
467
+ pass
468
+
469
+ match = re.search(r"\{.*\}", text, flags=re.DOTALL)
470
+ if not match:
471
+ return None
472
+ try:
473
+ parsed = json.loads(match.group(0))
474
+ except json.JSONDecodeError:
475
+ return None
476
+ return parsed if isinstance(parsed, dict) else None
477
+
478
+
479
+ def _enum_service(value: Any) -> ServiceType | None:
480
+ if value is None or value == "":
481
+ return None
482
+ if isinstance(value, ServiceType):
483
+ return value
484
+ try:
485
+ return ServiceType(str(value))
486
+ except Exception:
487
+ return None
488
+
489
+
490
+ def _enum_priority(value: Any) -> PriorityMode | None:
491
+ if value is None or value == "":
492
+ return None
493
+ if isinstance(value, PriorityMode):
494
+ return value
495
+ try:
496
+ return PriorityMode(str(value))
497
+ except Exception:
498
+ return None
499
+
500
+
501
+ def _action_model_from_kwargs(action_type: ActionType, **kwargs: Any) -> ActionModel:
502
+ service = _enum_service(kwargs.get("service") or kwargs.get("service_target"))
503
+ target_service = _enum_service(kwargs.get("target_service"))
504
+ escalation_target = _enum_service(kwargs.get("escalation_target"))
505
+ priority_mode = _enum_priority(kwargs.get("priority_mode"))
506
+ officer_delta = kwargs.get("officer_delta")
507
+ case_id = kwargs.get("case_id")
508
+
509
+ candidates: list[dict[str, Any]] = []
510
+
511
+ if action_type == ActionType.ADVANCE_TIME:
512
+ candidates.append({"action_type": action_type})
513
+
514
+ elif action_type == ActionType.SET_PRIORITY_MODE:
515
+ candidates.extend(
516
+ [
517
+ {"action_type": action_type, "priority_mode": priority_mode},
518
+ ]
519
+ )
520
+
521
+ elif action_type == ActionType.ASSIGN_CAPACITY:
522
+ if service is not None:
523
+ delta = max(1, int(officer_delta or 1))
524
+ candidates.extend(
525
+ [
526
+ {"action_type": action_type, "service": service, "officer_delta": delta},
527
+ {"action_type": action_type, "service_target": service, "officer_delta": delta},
528
+ {
529
+ "action_type": action_type,
530
+ "capacity_assignment": {service.value: delta},
531
+ },
532
+ ]
533
+ )
534
+
535
+ elif action_type == ActionType.REQUEST_MISSING_DOCUMENTS:
536
+ if service is not None:
537
+ candidates.extend(
538
+ [
539
+ {"action_type": action_type, "service": service},
540
+ {"action_type": action_type, "service_target": service},
541
+ ]
542
+ )
543
+
544
+ elif action_type == ActionType.ESCALATE_SERVICE:
545
+ svc = escalation_target or service
546
+ candidates.extend(
547
+ [
548
+ {"action_type": action_type, "service": svc, "case_id": case_id},
549
+ {"action_type": action_type, "service_target": svc, "case_id": case_id},
550
+ {"action_type": action_type, "escalation_target": svc, "case_id": case_id},
551
+ ]
552
+ )
553
+
554
+ elif action_type == ActionType.REALLOCATE_OFFICERS:
555
+ if service is not None and target_service is not None:
556
+ delta = max(1, int(officer_delta or 1))
557
+ candidates.extend(
558
+ [
559
+ {
560
+ "action_type": action_type,
561
+ "service": service,
562
+ "target_service": target_service,
563
+ "officer_delta": delta,
564
+ },
565
+ {
566
+ "action_type": action_type,
567
+ "reallocation_delta": {
568
+ service.value: -delta,
569
+ target_service.value: delta,
570
+ },
571
+ },
572
+ ]
573
+ )
574
+
575
+ for candidate in candidates:
576
+ try:
577
+ return ActionModel(**candidate)
578
+ except Exception:
579
+ continue
580
+
581
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
582
+
583
+
584
+ def _coerce_action(payload: dict[str, Any] | None) -> ActionModel:
585
+ if not payload:
586
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
587
+
588
+ raw_action_type = payload.get("action_type") or payload.get("actionType")
589
+ try:
590
+ action_type = ActionType(str(raw_action_type))
591
+ except Exception:
592
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
593
+
594
+ service = payload.get("service") or payload.get("service_target") or payload.get("serviceTarget")
595
+ target_service = payload.get("target_service") or payload.get("targetService")
596
+ escalation_target = payload.get("escalation_target") or payload.get("escalationTarget")
597
+ priority_mode = payload.get("priority_mode") or payload.get("priorityMode")
598
+ officer_delta = payload.get("officer_delta") or payload.get("officerDelta")
599
+ case_id = payload.get("case_id") or payload.get("caseId")
600
+
601
+ if action_type == ActionType.ASSIGN_CAPACITY and not service:
602
+ assignment = payload.get("capacity_assignment") or {}
603
+ if isinstance(assignment, dict) and assignment:
604
+ service, officer_delta = next(iter(assignment.items()))
605
+
606
+ if action_type == ActionType.REALLOCATE_OFFICERS and (not service or not target_service):
607
+ delta_map = payload.get("reallocation_delta") or {}
608
+ if isinstance(delta_map, dict) and len(delta_map) >= 2:
609
+ negatives = [k for k, v in delta_map.items() if int(v) < 0]
610
+ positives = [k for k, v in delta_map.items() if int(v) > 0]
611
+ if negatives and positives:
612
+ service = negatives[0]
613
+ target_service = positives[0]
614
+ officer_delta = abs(int(delta_map[service]))
615
+
616
+ return _action_model_from_kwargs(
617
+ action_type,
618
+ service=service,
619
+ target_service=target_service,
620
+ escalation_target=escalation_target,
621
+ priority_mode=priority_mode,
622
+ officer_delta=officer_delta,
623
+ case_id=case_id,
624
+ )
625
+
626
+
627
+ def _recommended_min_steps(task_id: str) -> int:
628
+ if task_id == "cross_department_hard":
629
+ return 70
630
+ if task_id == "mixed_urgency_medium":
631
+ return 60
632
+ return 40
633
+
634
+
635
+ def _queue_snapshot_iter(obs: ObservationModel) -> list[Any]:
636
+ raw = getattr(obs, "queue_snapshots", [])
637
+ if isinstance(raw, dict):
638
+ return list(raw.values())
639
+ if isinstance(raw, list):
640
+ return list(raw)
641
+ try:
642
+ return list(raw)
643
+ except Exception:
644
+ return []
645
+
646
+
647
+ def _queue_service(q: Any) -> ServiceType | None:
648
+ return _enum_service(getattr(q, "service", None) or getattr(q, "service_type", None))
649
+
650
+
651
+ def _queue_active_cases(q: Any) -> int:
652
+ return int(getattr(q, "active_cases", getattr(q, "total_pending", 0)) or 0)
653
+
654
+
655
+ def _queue_missing_docs(q: Any) -> int:
656
+ return int(getattr(q, "missing_docs_cases", getattr(q, "blocked_missing_docs", 0)) or 0)
657
+
658
+
659
+ def _queue_urgent_cases(q: Any) -> int:
660
+ return int(getattr(q, "urgent_cases", getattr(q, "urgent_pending", 0)) or 0)
661
+
662
+
663
+ def _queue_breached_cases(q: Any) -> int:
664
+ return int(getattr(q, "breached_cases", getattr(q, "total_sla_breached", 0)) or 0)
665
+
666
+
667
+ def _queue_avg_age(q: Any) -> float:
668
+ if hasattr(q, "avg_age_days"):
669
+ return float(getattr(q, "avg_age_days") or 0.0)
670
+ if hasattr(q, "oldest_case_age_days"):
671
+ return float(getattr(q, "oldest_case_age_days") or 0.0)
672
+ return float(getattr(q, "avg_waiting_days", 0.0) or 0.0)
673
+
674
+
675
+ def _queue_rows(obs: ObservationModel) -> list[dict[str, Any]]:
676
+ rows: list[dict[str, Any]] = []
677
+ for q in _queue_snapshot_iter(obs):
678
+ service = _queue_service(q)
679
+ if service is None:
680
+ continue
681
+ rows.append(
682
+ {
683
+ "service": service.value,
684
+ "active_cases": _queue_active_cases(q),
685
+ "missing_docs_cases": _queue_missing_docs(q),
686
+ "urgent_cases": _queue_urgent_cases(q),
687
+ "breached_cases": _queue_breached_cases(q),
688
+ "avg_age_days": _queue_avg_age(q),
689
+ }
690
+ )
691
+ return rows
692
+
693
+
694
+ def _pool_allocations(obs: ObservationModel) -> dict[Any, Any]:
695
+ pool = getattr(obs, "officer_pool", None)
696
+ if pool is None:
697
+ return {}
698
+ return getattr(pool, "allocations", getattr(pool, "allocated", {})) or {}
699
+
700
+
701
+ def _reserve_officers(obs: ObservationModel) -> int:
702
+ pool = getattr(obs, "officer_pool", None)
703
+ if pool is None:
704
+ return 0
705
+ for name in ("reserve_officers", "idle_officers", "available_officers"):
706
+ if hasattr(pool, name):
707
+ try:
708
+ return int(getattr(pool, name) or 0)
709
+ except Exception:
710
+ pass
711
+ return 0
712
+
713
+
714
+ def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
715
+ allocs = _pool_allocations(obs)
716
+ raw = allocs.get(service)
717
+ if raw is None:
718
+ raw = allocs.get(service.value, 0)
719
+ return int(raw or 0)
720
+
721
+
722
+ def _top_backlog_service(
723
+ obs: ObservationModel,
724
+ *,
725
+ exclude: ServiceType | None = None,
726
+ ) -> ServiceType | None:
727
+ ranked: list[Any] = []
728
+ for q in _queue_snapshot_iter(obs):
729
+ service = _queue_service(q)
730
+ if service is None or service == exclude:
731
+ continue
732
+ ranked.append(q)
733
+ if not ranked:
734
+ return None
735
+ ranked.sort(
736
+ key=lambda q: (
737
+ _queue_active_cases(q) + (2 * _queue_breached_cases(q)) + _queue_urgent_cases(q),
738
+ _queue_avg_age(q),
739
+ ),
740
+ reverse=True,
741
+ )
742
+ return _queue_service(ranked[0])
743
+
744
+
745
+ def _service_with_missing_docs(obs: ObservationModel) -> ServiceType | None:
746
+ candidates = [q for q in _queue_snapshot_iter(obs) if _queue_missing_docs(q) > 0]
747
+ if not candidates:
748
+ return None
749
+ candidates.sort(key=lambda q: (_queue_missing_docs(q), _queue_active_cases(q)), reverse=True)
750
+ return _queue_service(candidates[0])
751
+
752
+
753
+ def _service_with_officers(obs: ObservationModel) -> ServiceType | None:
754
+ services = [s for s in (_queue_service(q) for q in _queue_snapshot_iter(obs)) if s is not None]
755
+ services.sort(key=lambda s: _alloc_for(obs, s), reverse=True)
756
+ for service in services:
757
+ if _alloc_for(obs, service) > 0:
758
+ return service
759
+ return None
760
+
761
+
762
+ def _compute_action_mask(obs: ObservationModel) -> dict[ActionType, bool]:
763
+ has_reserve = _reserve_officers(obs) > 0
764
+ snapshots = _queue_snapshot_iter(obs)
765
+ has_missing = any(_queue_missing_docs(q) > 0 for q in snapshots)
766
+ has_backlog = any(_queue_active_cases(q) > 0 for q in snapshots)
767
+ has_budget = int(getattr(obs, "escalation_budget_remaining", 0) or 0) > 0
768
+ staffed_services = [q for q in snapshots if (_queue_service(q) is not None and _alloc_for(obs, _queue_service(q)) > 0)]
769
+ can_reallocate = len(staffed_services) >= 1 and len(snapshots) >= 2
770
+ return {
771
+ ActionType.SET_PRIORITY_MODE: True,
772
+ ActionType.ADVANCE_TIME: True,
773
+ ActionType.ASSIGN_CAPACITY: has_reserve and has_backlog,
774
+ ActionType.REQUEST_MISSING_DOCUMENTS: has_missing,
775
+ ActionType.ESCALATE_SERVICE: has_budget and has_backlog,
776
+ ActionType.REALLOCATE_OFFICERS: can_reallocate,
777
+ }
778
+
779
+
780
+ def _masked_action_type_hints(obs: ObservationModel) -> tuple[list[str], list[str]]:
781
+ mask = _compute_action_mask(obs)
782
+ allowed = [k.value for k, ok in mask.items() if ok]
783
+ blocked = [k.value for k, ok in mask.items() if not ok]
784
+ return allowed, blocked
785
+
786
+
787
+ def _best_high_impact_action(obs: ObservationModel) -> tuple[ActionModel, str]:
788
+ top_backlog = _top_backlog_service(obs)
789
+ top_missing = _service_with_missing_docs(obs)
790
+
791
+ if _reserve_officers(obs) > 0 and top_backlog is not None:
792
+ return (
793
+ _action_model_from_kwargs(
794
+ ActionType.ASSIGN_CAPACITY,
795
+ service=top_backlog,
796
+ officer_delta=1,
797
+ ),
798
+ "high-impact: assign reserve capacity to top backlog service",
799
+ )
800
+
801
+ if top_missing is not None:
802
+ return (
803
+ _action_model_from_kwargs(
804
+ ActionType.REQUEST_MISSING_DOCUMENTS,
805
+ service=top_missing,
806
+ ),
807
+ "high-impact: clear missing-document bottleneck",
808
+ )
809
+
810
+ if int(getattr(obs, "escalation_budget_remaining", 0) or 0) > 0:
811
+ hot = sorted(
812
+ _queue_snapshot_iter(obs),
813
+ key=lambda q: (_queue_breached_cases(q), _queue_active_cases(q), _queue_urgent_cases(q)),
814
+ reverse=True,
815
+ )
816
+ if hot and (_queue_breached_cases(hot[0]) > 0 or _queue_active_cases(hot[0]) > 0):
817
+ service = _queue_service(hot[0])
818
+ if service is not None:
819
+ return (
820
+ _action_model_from_kwargs(
821
+ ActionType.ESCALATE_SERVICE,
822
+ service=service,
823
+ ),
824
+ "high-impact: escalate highest SLA-risk service",
825
+ )
826
+
827
+ source = _service_with_officers(obs)
828
+ if source is not None and _alloc_for(obs, source) > 0:
829
+ target = _top_backlog_service(obs, exclude=source)
830
+ if target is not None and target != source:
831
+ return (
832
+ _action_model_from_kwargs(
833
+ ActionType.REALLOCATE_OFFICERS,
834
+ service=source,
835
+ target_service=target,
836
+ officer_delta=1,
837
+ ),
838
+ "high-impact: reallocate one officer toward highest backlog",
839
+ )
840
+
841
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), "fallback: no high-impact action available"
842
+
843
+
844
+ def _repair_action_for_observation(
845
+ action: ActionModel,
846
+ obs: ObservationModel,
847
+ ) -> tuple[ActionModel, str | None]:
848
+ mask = _compute_action_mask(obs)
849
+ at = action.action_type
850
+
851
+ if not bool(mask.get(at, True)):
852
+ fallback, why = _best_high_impact_action(obs)
853
+ return fallback, f"masked {at.value}; {why}"
854
+
855
+ if at == ActionType.ADVANCE_TIME:
856
+ return action, None
857
+
858
+ if at == ActionType.SET_PRIORITY_MODE:
859
+ if getattr(action, "priority_mode", None) is None:
860
+ return (
861
+ _action_model_from_kwargs(
862
+ ActionType.SET_PRIORITY_MODE,
863
+ priority_mode=PriorityMode.BACKLOG_CLEARANCE,
864
+ ),
865
+ "missing priority_mode, defaulted to backlog_clearance",
866
+ )
867
+ return action, None
868
+
869
+ if at == ActionType.ASSIGN_CAPACITY:
870
+ reserve = _reserve_officers(obs)
871
+ if reserve <= 0:
872
+ fallback, why = _best_high_impact_action(obs)
873
+ return fallback, f"reserve officers exhausted; {why}"
874
+ service = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _top_backlog_service(obs)
875
+ if service is None:
876
+ fallback, why = _best_high_impact_action(obs)
877
+ return fallback, f"no service available for assign_capacity; {why}"
878
+ delta = max(1, int(getattr(action, "officer_delta", 1) or 1))
879
+ delta = min(delta, reserve)
880
+ repaired = _action_model_from_kwargs(
881
+ ActionType.ASSIGN_CAPACITY,
882
+ service=service,
883
+ officer_delta=delta,
884
+ )
885
+ return repaired, "repaired assign_capacity payload"
886
+
887
+ if at == ActionType.REQUEST_MISSING_DOCUMENTS:
888
+ service = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _service_with_missing_docs(obs)
889
+ if service is None:
890
+ fallback, why = _best_high_impact_action(obs)
891
+ return fallback, f"no missing-doc queue available; {why}"
892
+ repaired = _action_model_from_kwargs(
893
+ ActionType.REQUEST_MISSING_DOCUMENTS,
894
+ service=service,
895
+ )
896
+ return repaired, "repaired request_missing_documents payload"
897
+
898
+ if at == ActionType.ESCALATE_SERVICE:
899
+ if int(getattr(obs, "escalation_budget_remaining", 0) or 0) <= 0:
900
+ fallback, why = _best_high_impact_action(obs)
901
+ return fallback, f"escalation budget exhausted; {why}"
902
+ service = (
903
+ _enum_service(getattr(action, "service", None))
904
+ or _enum_service(getattr(action, "service_target", None))
905
+ or _enum_service(getattr(action, "escalation_target", None))
906
+ or _top_backlog_service(obs)
907
+ )
908
+ case_id = getattr(action, "case_id", None)
909
+ if service is None and case_id is None:
910
+ fallback, why = _best_high_impact_action(obs)
911
+ return fallback, f"no escalation target available; {why}"
912
+ repaired = _action_model_from_kwargs(
913
+ ActionType.ESCALATE_SERVICE,
914
+ service=service,
915
+ case_id=case_id,
916
+ )
917
+ return repaired, "repaired escalate_service payload"
918
+
919
+ if at == ActionType.REALLOCATE_OFFICERS:
920
+ source = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _service_with_officers(obs)
921
+ if source is None:
922
+ fallback, why = _best_high_impact_action(obs)
923
+ return fallback, f"no staffed source service; {why}"
924
+ source_alloc = _alloc_for(obs, source)
925
+ if source_alloc <= 0:
926
+ source = _service_with_officers(obs)
927
+ source_alloc = _alloc_for(obs, source) if source is not None else 0
928
+ if source is None or source_alloc <= 0:
929
+ fallback, why = _best_high_impact_action(obs)
930
+ return fallback, f"insufficient source officers; {why}"
931
+
932
+ target = _enum_service(getattr(action, "target_service", None))
933
+ if target is None or target == source:
934
+ target = _top_backlog_service(obs, exclude=source)
935
+ if target is None or target == source:
936
+ fallback, why = _best_high_impact_action(obs)
937
+ return fallback, f"missing distinct target_service; {why}"
938
+
939
+ delta = max(1, int(getattr(action, "officer_delta", 1) or 1))
940
+ delta = min(delta, source_alloc)
941
+ repaired = _action_model_from_kwargs(
942
+ ActionType.REALLOCATE_OFFICERS,
943
+ service=source,
944
+ target_service=target,
945
+ officer_delta=delta,
946
+ )
947
+ return repaired, "repaired reallocate_officers payload"
948
+
949
+ return action, None
950
+
951
+
952
+ def _model_label_for_mode(agent_mode: SimulationAgentMode) -> str:
953
+ if agent_mode == SimulationAgentMode.BASELINE_POLICY:
954
+ return "baseline_policy"
955
+ if agent_mode == SimulationAgentMode.TRAINED_RL:
956
+ return "trained_rl"
957
+ return os.getenv("MODEL_NAME", "llm_inference")
958
+
959
+
960
+ def _log_step_line(step_row: dict[str, Any]) -> str:
961
+ done = "true" if bool(step_row.get("done")) else "false"
962
+ error = step_row.get("last_action_error") or "null"
963
+ action = json.dumps(step_row.get("action_payload", {}), separators=(",", ":"))
964
+ source = step_row.get("decision_source") or "unknown"
965
+ model = step_row.get("model_used") or "null"
966
+ repair = step_row.get("repair_note") or "null"
967
+ switch_note = step_row.get("switch_note") or "null"
968
+ return (
969
+ f"[STEP] step={step_row.get('step', 0)} action={action} "
970
+ f"reward={float(step_row.get('reward', 0.0)):.2f} done={done} "
971
+ f"error={error} source={source} model={model} repair={repair} switch={switch_note}"
972
+ )
973
+
974
+
975
+ def _resolve_model_path_or_raise(model_path: str) -> str:
976
+ p = Path(model_path).expanduser()
977
+ if not p.is_absolute():
978
+ p = (Path.cwd() / p).resolve()
979
+
980
+ if p.is_dir():
981
+ candidates = [
982
+ p / "best_model.zip",
983
+ p / "model.zip",
984
+ p / "checkpoint.zip",
985
+ ]
986
+ zip_files = sorted(p.glob("*.zip"))
987
+ candidates.extend(zip_files)
988
+ for candidate in candidates:
989
+ if candidate.exists():
990
+ return str(candidate)
991
+
992
+ if p.exists():
993
+ return str(p)
994
+
995
+ raise FileNotFoundError(f"Model path not found: {model_path}")
996
+
997
+
998
+ def _load_model_cached_or_raise(model_abs: str, model_type: Literal["maskable", "recurrent"]) -> Any:
999
+ key = (model_abs, model_type)
1000
+ if key in _MODEL_CACHE:
1001
+ return _MODEL_CACHE[key]
1002
+
1003
+ if model_type == "recurrent":
1004
+ from sb3_contrib import RecurrentPPO
1005
+
1006
+ model = RecurrentPPO.load(model_abs)
1007
+ else:
1008
+ try:
1009
+ from sb3_contrib import MaskablePPO
1010
+
1011
+ model = MaskablePPO.load(model_abs)
1012
+ except Exception:
1013
+ from stable_baselines3 import PPO
1014
+
1015
+ model = PPO.load(model_abs)
1016
+
1017
+ _MODEL_CACHE[key] = model
1018
+ return model
1019
+
1020
+
1021
+ def _safe_invalid_action_count(final_state: Any) -> int:
1022
+ if hasattr(final_state, "total_invalid_actions"):
1023
+ return int(getattr(final_state, "total_invalid_actions") or 0)
1024
+ metrics = getattr(final_state, "metrics", None)
1025
+ if metrics is not None and hasattr(metrics, "total_invalid_actions"):
1026
+ return int(getattr(metrics, "total_invalid_actions") or 0)
1027
+ return 0
1028
+
1029
+
1030
+ class LiveSimulationSession:
1031
+ def __init__(
1032
+ self,
1033
+ *,
1034
+ task_id: str,
1035
+ agent_mode: SimulationAgentMode,
1036
+ max_steps: int,
1037
+ seed: int | None,
1038
+ policy_name: str | None = None,
1039
+ model_path: str | None = None,
1040
+ model_type: Literal["maskable", "recurrent"] = "maskable",
1041
+ ) -> None:
1042
+ self.task_id = task_id
1043
+ self.agent_mode = agent_mode
1044
+ recommended = _recommended_min_steps(task_id)
1045
+ self.max_steps = max(int(max_steps), int(recommended)) if agent_mode == SimulationAgentMode.LLM_INFERENCE else int(max_steps)
1046
+ self.seed = int(seed if seed is not None else random.randint(1, 999999))
1047
+ self.policy_name = policy_name or "backlog_clearance"
1048
+ self.model_path = model_path
1049
+ self.model_type = model_type
1050
+ self.trace: list[dict[str, Any]] = []
1051
+ self.total_reward = 0.0
1052
+ self.step_idx = 0
1053
+ self.done = False
1054
+ self.summary: dict[str, Any] | None = None
1055
+ self.score: float | None = None
1056
+ self.grader_name: str | None = None
1057
+
1058
+ self.env: Any = None
1059
+ self.obs: ObservationModel | Any = None
1060
+ self.policy: Any = None
1061
+
1062
+ self.rl_env: Any = None
1063
+ self.rl_model: Any = None
1064
+ self.rl_lstm_state: Any = None
1065
+ self.rl_episode_start: Any = None
1066
+
1067
+ self.llm_runtimes: list[dict[str, Any]] = []
1068
+ self.llm_route: list[str] = []
1069
+ self.llm_model_stats: dict[tuple[str, str], dict[str, Any]] = {}
1070
+ self.consecutive_failure_steps = 0
1071
+ self.recovery_steps_remaining = 0
1072
+ self.auto_switch_count = 0
1073
+ self.last_switch_reason: str | None = None
1074
+
1075
+ if self.agent_mode == SimulationAgentMode.TRAINED_RL:
1076
+ self._init_trained()
1077
+ else:
1078
+ self._init_core()
1079
+
1080
+ def start_line(self) -> dict[str, Any]:
1081
+ return {
1082
+ "log": (
1083
+ f"[START] task={self.task_id} env=gov-workflow-openenv "
1084
+ f"model={_model_label_for_mode(self.agent_mode)}"
1085
+ ),
1086
+ "observation": self.obs
1087
+ }
1088
+
1089
+ def _init_core(self) -> None:
1090
+ from app.baselines import POLICIES, backlog_clearance_policy
1091
+ from app.env import GovWorkflowEnv
1092
+
1093
+ self.env = GovWorkflowEnv(task_id=self.task_id)
1094
+ self.obs, _ = self.env.reset(seed=self.seed)
1095
+ if self.agent_mode == SimulationAgentMode.BASELINE_POLICY:
1096
+ self.policy = POLICIES.get(self.policy_name, backlog_clearance_policy)
1097
+ else:
1098
+ self.policy = self._llm_action_with_meta
1099
+ self._init_llm_runtimes()
1100
+
1101
+ def _init_llm_runtimes(self) -> None:
1102
+ openai_base = os.getenv("API_BASE_URL") or os.getenv("OPENAI_API_BASE_URL") or "https://api.openai.com/v1"
1103
+ nvidia_base = os.getenv("NVIDIA_API_BASE_URL", "https://integrate.api.nvidia.com/v1")
1104
+
1105
+ openai_keys = _dedupe(
1106
+ [
1107
+ os.getenv("HF_TOKEN"),
1108
+ os.getenv("OPENAI_API_KEY"),
1109
+ os.getenv("API_KEY"),
1110
+ ]
1111
+ )
1112
+ nvidia_keys = _dedupe(
1113
+ [
1114
+ os.getenv("NVIDIA_API_KEY"),
1115
+ os.getenv("NVIDIA_API_KEY_2"),
1116
+ ]
1117
+ )
1118
+
1119
+ openai_models = _dedupe(
1120
+ [
1121
+ os.getenv("MODEL_NAME", "meta/llama-3.3-70b-instruct"),
1122
+ *_env_csv_list("MODEL_FALLBACKS"),
1123
+ ]
1124
+ )
1125
+ nvidia_models = _dedupe(
1126
+ [
1127
+ os.getenv("NVIDIA_MODEL"),
1128
+ *_env_csv_list("NVIDIA_MODEL_FALLBACKS"),
1129
+ *LEGACY_NVIDIA_MODEL_POOL,
1130
+ ]
1131
+ )
1132
+
1133
+ runtimes: list[dict[str, Any]] = []
1134
+
1135
+ if openai_keys and openai_models:
1136
+ clients: list[tuple[OpenAI, str]] = []
1137
+ for idx, key in enumerate(openai_keys, start=1):
1138
+ try:
1139
+ clients.append(
1140
+ (
1141
+ OpenAI(base_url=openai_base, api_key=key, timeout=8.0, max_retries=0),
1142
+ f"openai_key_{idx}",
1143
+ )
1144
+ )
1145
+ except Exception:
1146
+ continue
1147
+ if clients:
1148
+ runtimes.append(
1149
+ {
1150
+ "provider": "openai-compatible",
1151
+ "base_url": openai_base,
1152
+ "clients": clients,
1153
+ "models": openai_models,
1154
+ }
1155
+ )
1156
+
1157
+ if nvidia_keys and nvidia_models:
1158
+ clients = []
1159
+ for idx, key in enumerate(nvidia_keys, start=1):
1160
+ try:
1161
+ clients.append(
1162
+ (
1163
+ OpenAI(base_url=nvidia_base, api_key=key, timeout=8.0, max_retries=0),
1164
+ f"nvidia_key_{idx}",
1165
+ )
1166
+ )
1167
+ except Exception:
1168
+ continue
1169
+ if clients:
1170
+ runtimes.append(
1171
+ {
1172
+ "provider": "nvidia",
1173
+ "base_url": nvidia_base,
1174
+ "clients": clients,
1175
+ "models": nvidia_models,
1176
+ }
1177
+ )
1178
+
1179
+ self.llm_runtimes = runtimes
1180
+ self.llm_model_stats = {}
1181
+ for runtime in runtimes:
1182
+ provider = str(runtime.get("provider"))
1183
+ for model in runtime.get("models", []):
1184
+ self.llm_model_stats[(provider, str(model))] = {
1185
+ "calls": 0,
1186
+ "invalid": 0,
1187
+ "repaired": 0,
1188
+ "failures": 0,
1189
+ "cooldown_until_step": 0,
1190
+ }
1191
+
1192
+ openai_runtime = next((rt for rt in runtimes if rt.get("provider") == "openai-compatible"), None)
1193
+ nvidia_runtime = next((rt for rt in runtimes if rt.get("provider") == "nvidia"), None)
1194
+
1195
+ openai_route = (
1196
+ f"openai-compatible ({len(openai_runtime['clients'])} keys, {len(openai_runtime['models'])} models)"
1197
+ if openai_runtime is not None
1198
+ else "openai-compatible (unavailable: missing API key/model)"
1199
+ )
1200
+ nvidia_route = (
1201
+ f"nvidia ({len(nvidia_runtime['clients'])} keys, {len(nvidia_runtime['models'])} models)"
1202
+ if nvidia_runtime is not None
1203
+ else "nvidia (unavailable: missing API key/model)"
1204
+ )
1205
+
1206
+ self.llm_route = [
1207
+ openai_route,
1208
+ nvidia_route,
1209
+ "adaptive ranking: prefer models with lower invalid/repaired rates",
1210
+ "heuristic fallback (backlog_clearance_policy)",
1211
+ ]
1212
+
1213
+ def _rank_runtime_models(self, provider: str, models: list[str]) -> list[str]:
1214
+ def _score(model_name: str) -> tuple[float, int]:
1215
+ stat = self.llm_model_stats.get((provider, model_name), {})
1216
+ calls = max(1, int(stat.get("calls", 0)))
1217
+ invalid_rate = float(stat.get("invalid", 0)) / calls
1218
+ repaired_rate = float(stat.get("repaired", 0)) / calls
1219
+ fail_rate = float(stat.get("failures", 0)) / calls
1220
+ cooldown = int(stat.get("cooldown_until_step", 0))
1221
+ cooldown_penalty = 1.0 if self.step_idx < cooldown else 0.0
1222
+ return (
1223
+ invalid_rate * 2.0 + repaired_rate * 1.25 + fail_rate * 1.5 + cooldown_penalty,
1224
+ -calls,
1225
+ )
1226
+
1227
+ return sorted([str(m) for m in models], key=_score)
1228
+
1229
+ def _llm_action_with_meta(self, obs: ObservationModel) -> tuple[ActionModel, dict[str, Any]]:
1230
+ if self.recovery_steps_remaining > 0:
1231
+ self.recovery_steps_remaining -= 1
1232
+ action, why = _best_high_impact_action(obs)
1233
+ return action, {
1234
+ "decision_source": "auto_recovery_policy",
1235
+ "provider": "heuristic",
1236
+ "model_used": "backlog_clearance_policy",
1237
+ "llm_attempts": 0,
1238
+ "llm_error": None,
1239
+ "llm_key_label": None,
1240
+ "repair_note": why,
1241
+ }
1242
+
1243
+ attempts = 0
1244
+ last_error = ""
1245
+ allowed_actions, blocked_actions = _masked_action_type_hints(obs)
1246
+ schema_hint = {
1247
+ "required_fields": {
1248
+ "set_priority_mode": ["action_type", "priority_mode"],
1249
+ "assign_capacity": ["action_type", "service", "officer_delta"],
1250
+ "request_missing_documents": ["action_type", "service"],
1251
+ "escalate_service": ["action_type", "service"],
1252
+ "advance_time": ["action_type"],
1253
+ "reallocate_officers": ["action_type", "service", "target_service", "officer_delta"],
1254
+ },
1255
+ "allowed_priority_mode": [m.value for m in PriorityMode],
1256
+ "allowed_services": [s.value for s in ServiceType],
1257
+ }
1258
+ system_prompt = (
1259
+ "You are controlling a government workflow simulator. "
1260
+ "Return exactly one JSON object only. No markdown. No explanation. "
1261
+ "Allowed action_type: set_priority_mode, assign_capacity, request_missing_documents, "
1262
+ "escalate_service, advance_time, reallocate_officers. "
1263
+ "Rules: "
1264
+ "1) reallocate_officers requires service + target_service + officer_delta>0 and source!=target. "
1265
+ "2) assign_capacity requires service + officer_delta>0. "
1266
+ "3) request_missing_documents requires service with missing_docs_cases>0. "
1267
+ "4) set_priority_mode requires priority_mode in [urgent_first, oldest_first, balanced, backlog_clearance]. "
1268
+ "5) Always prefer high-impact actions that reduce backlog/SLA risk over no-op loops. "
1269
+ "Use lowercase enum values."
1270
+ )
1271
+ user_prompt = (
1272
+ "Observation:\n"
1273
+ f"{obs.model_dump_json() if hasattr(obs, 'model_dump_json') else json.dumps(getattr(obs, 'dict', lambda: {})())}\n"
1274
+ f"Allowed action types now: {allowed_actions}\n"
1275
+ f"Blocked action types now: {blocked_actions}\n"
1276
+ f"Action schema hints: {json.dumps(schema_hint, separators=(',', ':'))}\n"
1277
+ f"Last action validity: {getattr(obs, 'last_action_valid', True)}\n"
1278
+ f"Last action message: {getattr(obs, 'last_action_message', '')}\n"
1279
+ "Return action JSON."
1280
+ )
1281
+
1282
+ for runtime in self.llm_runtimes:
1283
+ provider = str(runtime["provider"])
1284
+ ranked_models = self._rank_runtime_models(provider, list(runtime["models"]))
1285
+ for client, key_label in runtime["clients"]:
1286
+ for model in ranked_models:
1287
+ attempts += 1
1288
+ stat_key = (provider, model)
1289
+ try:
1290
+ out = client.chat.completions.create(
1291
+ model=model,
1292
+ messages=[
1293
+ {"role": "system", "content": system_prompt},
1294
+ {"role": "user", "content": user_prompt},
1295
+ ],
1296
+ temperature=0.0,
1297
+ max_tokens=200,
1298
+ stream=False,
1299
+ )
1300
+ content = (out.choices[0].message.content or "").strip()
1301
+ action = _coerce_action(_extract_json_object(content))
1302
+ if stat_key in self.llm_model_stats:
1303
+ self.llm_model_stats[stat_key]["calls"] += 1
1304
+ return action, {
1305
+ "decision_source": "llm",
1306
+ "provider": provider,
1307
+ "model_used": model,
1308
+ "llm_attempts": attempts,
1309
+ "llm_error": None,
1310
+ "llm_key_label": key_label,
1311
+ }
1312
+ except Exception as exc:
1313
+ last_error = str(exc)
1314
+ stat = self.llm_model_stats.get(stat_key)
1315
+ if stat is not None:
1316
+ stat["calls"] += 1
1317
+ stat["failures"] += 1
1318
+ if stat["failures"] >= 2:
1319
+ stat["cooldown_until_step"] = self.step_idx + 5
1320
+ continue
1321
+
1322
+ action, why = _best_high_impact_action(obs)
1323
+ if not self.llm_runtimes:
1324
+ last_error = "No LLM credentials configured."
1325
+ return action, {
1326
+ "decision_source": "heuristic_fallback",
1327
+ "provider": "heuristic",
1328
+ "model_used": "backlog_clearance_policy",
1329
+ "llm_attempts": attempts,
1330
+ "llm_error": last_error or None,
1331
+ "llm_key_label": None,
1332
+ "repair_note": why,
1333
+ }
1334
+
1335
+ def _init_trained(self) -> None:
1336
+ import numpy as np
1337
+ from rl.gov_workflow_env import GovWorkflowGymEnv
1338
+
1339
+ if not self.model_path:
1340
+ raise ValueError("model_path is required for trained_rl simulation.")
1341
+ model_abs = _resolve_model_path_or_raise(self.model_path)
1342
+ self.rl_model = _load_model_cached_or_raise(model_abs, self.model_type)
1343
+ self.rl_env = GovWorkflowGymEnv(
1344
+ task_id=self.task_id,
1345
+ seed=self.seed,
1346
+ hard_action_mask=True,
1347
+ )
1348
+ self.obs, _ = self.rl_env.reset(seed=self.seed)
1349
+ self.rl_lstm_state = None
1350
+ self.rl_episode_start = np.array([True], dtype=bool)
1351
+
1352
+ def step_once(self) -> tuple[dict[str, Any], str, bool]:
1353
+ if self.done:
1354
+ raise RuntimeError("Simulation already finished.")
1355
+
1356
+ self.step_idx += 1
1357
+ row = self._step_trained() if self.agent_mode == SimulationAgentMode.TRAINED_RL else self._step_core()
1358
+ self.trace.append(row)
1359
+ self.total_reward += float(row["reward"])
1360
+ step_log = _log_step_line(row)
1361
+
1362
+ if row["done"] or self.step_idx >= self.max_steps:
1363
+ self._finalize()
1364
+ row["done"] = True
1365
+ return row, step_log, True
1366
+ return row, step_log, False
1367
+
1368
+ def end_line(self) -> str:
1369
+ if self.score is None:
1370
+ return "[END] success=false steps=0 score=0.00 rewards="
1371
+ rewards = ",".join(f"{float(x.get('reward', 0.0)):.2f}" for x in self.trace)
1372
+ success = "true" if self.score >= 0.5 else "false"
1373
+ return f"[END] success={success} steps={len(self.trace)} score={self.score:.2f} rewards={rewards}"
1374
+
1375
+ def step_line(self, action: dict | ActionModel) -> dict[str, Any]:
1376
+ """Test wrapper for executing an action and returning observation + reward."""
1377
+ if isinstance(action, dict):
1378
+ action = _coerce_action(action)
1379
+ self.obs, reward, terminated, truncated, info = self.env.step(action)
1380
+ return {"observation": self.obs, "reward": reward}
1381
+
1382
+ def snapshot(self) -> dict[str, Any]:
1383
+ return {
1384
+ "task_id": self.task_id,
1385
+ "agent_mode": self.agent_mode.value,
1386
+ "seed": self.seed,
1387
+ "max_steps": self.max_steps,
1388
+ "step_idx": self.step_idx,
1389
+ "done": self.done,
1390
+ "total_reward": float(self.total_reward),
1391
+ "score": self.score,
1392
+ "grader_name": self.grader_name,
1393
+ "summary": self.summary,
1394
+ "trace_len": len(self.trace),
1395
+ "llm_route": list(self.llm_route),
1396
+ }
1397
+
1398
+ def close(self) -> None:
1399
+ try:
1400
+ if self.env is not None and hasattr(self.env, "close"):
1401
+ self.env.close()
1402
+ except Exception:
1403
+ pass
1404
+ try:
1405
+ if self.rl_env is not None and hasattr(self.rl_env, "close"):
1406
+ self.rl_env.close()
1407
+ except Exception:
1408
+ pass
1409
+
1410
+ def _step_core(self) -> dict[str, Any]:
1411
+ if self.env is None:
1412
+ raise RuntimeError("Core simulation env not initialized.")
1413
+
1414
+ if self.agent_mode == SimulationAgentMode.BASELINE_POLICY:
1415
+ action = self.policy(self.obs)
1416
+ meta = {
1417
+ "decision_source": "baseline_policy",
1418
+ "provider": "local_policy",
1419
+ "model_used": self.policy_name,
1420
+ "llm_attempts": 0,
1421
+ "llm_error": None,
1422
+ "llm_key_label": None,
1423
+ }
1424
+ else:
1425
+ raw_decision = self.policy(self.obs)
1426
+ if isinstance(raw_decision, tuple) and len(raw_decision) == 2:
1427
+ action, meta = raw_decision
1428
+ else:
1429
+ action, meta = raw_decision, {}
1430
+ if not isinstance(meta, dict):
1431
+ meta = {}
1432
+
1433
+ if not isinstance(action, ActionModel):
1434
+ if isinstance(action, dict):
1435
+ action = _coerce_action(action)
1436
+ else:
1437
+ action = ActionModel(action_type=ActionType.ADVANCE_TIME)
1438
+ meta["repair_note"] = "non-action output from llm policy, coerced to advance_time"
1439
+
1440
+ allowed_mask = _compute_action_mask(self.obs)
1441
+ if not bool(allowed_mask.get(action.action_type, True)):
1442
+ masked_fallback, why = _best_high_impact_action(self.obs)
1443
+ action = masked_fallback
1444
+ if meta.get("decision_source") == "llm":
1445
+ meta["decision_source"] = "llm_repaired"
1446
+ meta["repair_note"] = f"action masked at runtime; {why}"
1447
+
1448
+ repaired_action, repair_note = _repair_action_for_observation(action, self.obs)
1449
+ if repair_note:
1450
+ action = repaired_action
1451
+ if meta.get("decision_source") == "llm":
1452
+ meta["decision_source"] = "llm_repaired"
1453
+ meta["repair_note"] = repair_note
1454
+
1455
+ self.obs, reward, terminated, truncated, info = self.env.step(action)
1456
+ done = bool(terminated or truncated)
1457
+ last_action_error = getattr(info, "last_action_error", None)
1458
+ if last_action_error is None:
1459
+ last_action_error = getattr(info, "action_explanation", None)
1460
+
1461
+ row = {
1462
+ "step": self.step_idx,
1463
+ "day": self.obs.day,
1464
+ "action_type": action.action_type.value,
1465
+ "action_payload": action.model_dump(exclude_none=True, mode="json"),
1466
+ "reward": float(reward),
1467
+ "done": done,
1468
+ "backlog": getattr(self.obs, "total_backlog", 0),
1469
+ "completed": getattr(self.obs, "total_completed", 0),
1470
+ "sla_breaches": getattr(self.obs, "total_sla_breaches", 0),
1471
+ "fairness_gap": float(
1472
+ getattr(self.obs, "fairness_gap", getattr(self.obs, "fairness_index", 0.0)) or 0.0
1473
+ ),
1474
+ "escalation_budget_remaining": getattr(self.obs, "escalation_budget_remaining", 0),
1475
+ "invalid_action": bool(getattr(info, "invalid_action", False)),
1476
+ "last_action_error": last_action_error,
1477
+ "queue_rows": _queue_rows(self.obs),
1478
+ }
1479
+ row.update(meta)
1480
+
1481
+ if self.agent_mode == SimulationAgentMode.LLM_INFERENCE:
1482
+ is_repaired = row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
1483
+ is_invalid = bool(row.get("invalid_action")) or bool(row.get("last_action_error"))
1484
+ model_used = str(row.get("model_used") or "")
1485
+ provider = str(row.get("provider") or "")
1486
+ stat_key = (provider, model_used)
1487
+ stat = self.llm_model_stats.get(stat_key)
1488
+ if stat is not None:
1489
+ if is_repaired:
1490
+ stat["repaired"] += 1
1491
+ if is_invalid:
1492
+ stat["invalid"] += 1
1493
+ stat["failures"] += 1
1494
+ else:
1495
+ stat["failures"] = max(0, int(stat.get("failures", 0)) - 1)
1496
+
1497
+ is_failure_pattern = is_invalid or is_repaired
1498
+ self.consecutive_failure_steps = self.consecutive_failure_steps + 1 if is_failure_pattern else 0
1499
+
1500
+ if self.consecutive_failure_steps >= 4:
1501
+ if stat is not None:
1502
+ stat["cooldown_until_step"] = self.step_idx + 6
1503
+ self.recovery_steps_remaining = max(self.recovery_steps_remaining, 3)
1504
+ self.auto_switch_count += 1
1505
+ self.last_switch_reason = "repeated invalid/repaired pattern detected"
1506
+ row["switch_note"] = "auto-switched to recovery policy and deprioritized failing model"
1507
+ self.consecutive_failure_steps = 0
1508
+
1509
+ return row
1510
+
1511
+ def _step_trained(self) -> dict[str, Any]:
1512
+ import numpy as np
1513
+
1514
+ masks = self.rl_env.action_masks()
1515
+ if self.model_type == "recurrent":
1516
+ action, self.rl_lstm_state = self.rl_model.predict(
1517
+ self.obs,
1518
+ state=self.rl_lstm_state,
1519
+ episode_start=self.rl_episode_start,
1520
+ deterministic=True,
1521
+ )
1522
+ action_idx = int(action.item() if hasattr(action, "item") else action)
1523
+ if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
1524
+ valid = np.flatnonzero(masks)
1525
+ action_idx = int(valid[0]) if valid.size > 0 else 18
1526
+ else:
1527
+ from sb3_contrib.common.maskable.utils import get_action_masks
1528
+
1529
+ action, _ = self.rl_model.predict(
1530
+ self.obs,
1531
+ action_masks=get_action_masks(self.rl_env),
1532
+ deterministic=True,
1533
+ )
1534
+ action_idx = int(action.item() if hasattr(action, "item") else action)
1535
+
1536
+ self.obs, reward, terminated, truncated, info = self.rl_env.step(action_idx)
1537
+ done = bool(terminated or truncated)
1538
+ if self.model_type == "recurrent":
1539
+ self.rl_episode_start = np.array([done], dtype=bool)
1540
+
1541
+ core_env = self.rl_env.core_env
1542
+ core_obs = core_env._build_observation()
1543
+ action_model, action_label = _decode_action_idx(action_idx)
1544
+
1545
+ return {
1546
+ "step": self.step_idx,
1547
+ "day": core_obs.day,
1548
+ "action_type": action_label,
1549
+ "action_payload": action_model.model_dump(exclude_none=True, mode="json"),
1550
+ "action_index": action_idx,
1551
+ "reward": float(reward),
1552
+ "done": done,
1553
+ "backlog": core_obs.total_backlog,
1554
+ "completed": core_obs.total_completed,
1555
+ "sla_breaches": core_obs.total_sla_breaches,
1556
+ "fairness_gap": float(
1557
+ getattr(core_obs, "fairness_gap", getattr(core_obs, "fairness_index", 0.0)) or 0.0
1558
+ ),
1559
+ "escalation_budget_remaining": core_obs.escalation_budget_remaining,
1560
+ "invalid_action": bool(info.get("invalid_action", False)),
1561
+ "last_action_error": info.get("last_action_error") or info.get("action_explanation"),
1562
+ "queue_rows": _queue_rows(core_obs),
1563
+ "decision_source": "trained_rl",
1564
+ "provider": "rl",
1565
+ "model_used": self.model_path or "trained_rl",
1566
+ "llm_attempts": 0,
1567
+ "llm_error": None,
1568
+ "llm_key_label": None,
1569
+ }
1570
+
1571
+ def _finalize(self) -> None:
1572
+ if self.done:
1573
+ return
1574
+ self.done = True
1575
+
1576
+ from app.graders import grade_episode
1577
+
1578
+ if self.agent_mode == SimulationAgentMode.TRAINED_RL:
1579
+ final_state = self.rl_env.core_env.state()
1580
+ else:
1581
+ final_state = self.env.state()
1582
+
1583
+ gr = grade_episode(final_state)
1584
+ self.score = float(gr.score)
1585
+ self.grader_name = gr.grader_name
1586
+
1587
+ llm_steps = sum(1 for row in self.trace if row.get("decision_source") in {"llm", "llm_repaired"})
1588
+ fallback_steps = sum(
1589
+ 1 for row in self.trace if row.get("decision_source") in {"heuristic_fallback", "auto_recovery_policy"}
1590
+ )
1591
+ repaired_steps = sum(
1592
+ 1 for row in self.trace if row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
1593
+ )
1594
+ total_steps = max(1, len(self.trace))
1595
+ invalid_actions = _safe_invalid_action_count(final_state)
1596
+ invalid_rate = float(invalid_actions) / float(total_steps)
1597
+ repaired_rate = float(repaired_steps) / float(total_steps)
1598
+
1599
+ ranked_models: list[dict[str, Any]] = []
1600
+ if self.llm_model_stats:
1601
+ for (provider, model), stat in self.llm_model_stats.items():
1602
+ calls = int(stat.get("calls", 0))
1603
+ if calls <= 0:
1604
+ continue
1605
+ ranked_models.append(
1606
+ {
1607
+ "provider": provider,
1608
+ "model": model,
1609
+ "calls": calls,
1610
+ "invalid_rate": float(stat.get("invalid", 0)) / max(1, calls),
1611
+ "repaired_rate": float(stat.get("repaired", 0)) / max(1, calls),
1612
+ }
1613
+ )
1614
+ ranked_models.sort(key=lambda x: (x["invalid_rate"], x["repaired_rate"], -x["calls"]))
1615
+
1616
+ self.summary = {
1617
+ "total_steps": getattr(final_state, "total_steps", len(self.trace)),
1618
+ "total_completed": getattr(final_state, "total_completed", 0),
1619
+ "total_backlog": getattr(final_state, "total_backlog", 0),
1620
+ "total_sla_breaches": getattr(final_state, "total_sla_breaches", 0),
1621
+ "fairness_gap": float(getattr(final_state, "fairness_gap", 0.0) or 0.0),
1622
+ "total_invalid_actions": invalid_actions,
1623
+ "invalid_action_rate": invalid_rate,
1624
+ "llm_steps": llm_steps,
1625
+ "heuristic_fallback_steps": fallback_steps,
1626
+ "llm_repaired_steps": repaired_steps,
1627
+ "repaired_action_rate": repaired_rate,
1628
+ "auto_switch_count": self.auto_switch_count,
1629
+ "last_switch_reason": self.last_switch_reason,
1630
+ "effective_max_steps": self.max_steps,
1631
+ "recommended_min_steps": _recommended_min_steps(self.task_id),
1632
+ }
1633
+ if self.agent_mode == SimulationAgentMode.LLM_INFERENCE:
1634
+ self.summary["llm_route"] = list(self.llm_route)
1635
+ self.summary["llm_model_performance"] = ranked_models
1636
+ if self.agent_mode == SimulationAgentMode.TRAINED_RL:
1637
+ self.summary["model_path"] = self.model_path
1638
+ self.summary["model_type"] = self.model_type
1639
+
1640
+
1641
+ def run_simulation(
1642
+ *,
1643
+ task_id: str,
1644
+ agent_mode: SimulationAgentMode,
1645
+ max_steps: int,
1646
+ seed: int | None,
1647
+ policy_name: str | None = None,
1648
+ model_path: str | None = None,
1649
+ model_type: Literal["maskable", "recurrent"] = "maskable",
1650
+ ) -> SimulationRun:
1651
+ session = LiveSimulationSession(
1652
+ task_id=task_id,
1653
+ agent_mode=agent_mode,
1654
+ max_steps=max_steps,
1655
+ seed=seed,
1656
+ policy_name=policy_name,
1657
+ model_path=model_path,
1658
+ model_type=model_type,
1659
+ )
1660
+ try:
1661
+ while not session.done:
1662
+ session.step_once()
1663
+ return SimulationRun(
1664
+ task_id=session.task_id,
1665
+ agent_mode=session.agent_mode,
1666
+ seed=session.seed,
1667
+ total_reward=float(session.total_reward),
1668
+ score=float(session.score or 0.0),
1669
+ grader_name=str(session.grader_name or "unknown"),
1670
+ summary=dict(session.summary or {}),
1671
+ trace=list(session.trace),
1672
+ )
1673
+ finally:
1674
+ session.close()
1675
+
1676
+
1677
+ def _decode_action_idx(action_idx: int) -> tuple[ActionModel, str]:
1678
+ try:
1679
+ from rl.feature_builder import ACTION_DECODE_TABLE
1680
+ except Exception:
1681
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
1682
+
1683
+ row = ACTION_DECODE_TABLE.get(int(action_idx))
1684
+ if row is None:
1685
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
1686
+
1687
+ action_type, service, priority_mode, delta = row
1688
+
1689
+ try:
1690
+ at = ActionType(str(action_type))
1691
+ except Exception:
1692
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
1693
+
1694
+ if at == ActionType.SET_PRIORITY_MODE:
1695
+ action = _action_model_from_kwargs(at, priority_mode=priority_mode)
1696
+ elif at == ActionType.ASSIGN_CAPACITY:
1697
+ action = _action_model_from_kwargs(at, service=service, officer_delta=delta or 1)
1698
+ elif at == ActionType.REQUEST_MISSING_DOCUMENTS:
1699
+ action = _action_model_from_kwargs(at, service=service)
1700
+ elif at == ActionType.ESCALATE_SERVICE:
1701
+ action = _action_model_from_kwargs(at, service=service)
1702
+ elif at == ActionType.REALLOCATE_OFFICERS:
1703
+ src = _enum_service(service)
1704
+ action = (
1705
+ _action_model_from_kwargs(at, service=src, target_service=src, officer_delta=delta or 1)
1706
+ if src is not None
1707
+ else ActionModel(action_type=ActionType.ADVANCE_TIME)
1708
+ )
1709
+ else:
1710
+ action = ActionModel(action_type=ActionType.ADVANCE_TIME)
1711
+
1712
+ return action, at.value
app/env.py ADDED
@@ -0,0 +1,553 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ env.py — Gov Workflow OpenEnv
3
+ Gymnasium/OpenEnv-compatible environment aligned with Phase 1 schemas.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import random
9
+ from uuid import uuid4
10
+
11
+ from app.event_engine import EventEngine
12
+ from app.models import (
13
+ ActionModel,
14
+ ActionType,
15
+ ApplicationCase,
16
+ EpisodeStateModel,
17
+ InternalSubstate,
18
+ ObservationModel,
19
+ OfficerPool,
20
+ PriorityMode,
21
+ QueueSnapshot,
22
+ RewardModel,
23
+ ScenarioMode,
24
+ ServiceType,
25
+ StepInfoModel,
26
+ TaskConfig,
27
+ )
28
+ from app.reward import compute_reward
29
+ from app.signal_computer import SignalComputer
30
+ from app.engine import DayResult, DaySimulator
31
+ from app.tasks import get_task
32
+
33
+
34
+ def completion_fairness_gap(
35
+ arrived_by_service: dict[ServiceType, int],
36
+ completed_by_service: dict[ServiceType, int],
37
+ ) -> float:
38
+ services = list(arrived_by_service.keys())
39
+ if len(services) < 2:
40
+ return 0.0
41
+
42
+ rates = []
43
+ for svc in services:
44
+ arrived = max(1, arrived_by_service.get(svc, 0))
45
+ completed = completed_by_service.get(svc, 0)
46
+ rates.append(completed / arrived)
47
+
48
+ return max(rates) - min(rates) if rates else 0.0
49
+
50
+
51
+ class EpisodeMetrics:
52
+ def __init__(self):
53
+ self.total_arrived: int = 0
54
+ self.total_completed: int = 0
55
+ self.total_sla_breaches: int = 0
56
+ self.total_rejected: int = 0
57
+ self.total_invalid_actions: int = 0
58
+ self.total_escalations_used: int = 0
59
+ self.total_wasted_escalations: int = 0
60
+ self.total_docs_requested: int = 0
61
+ self.total_docs_cleared: int = 0
62
+ self.total_idle_officer_days: int = 0
63
+ self.total_capacity_days: int = 0
64
+ self.total_urgent_arrived: int = 0
65
+ self.total_urgent_completed: int = 0
66
+ self.cumulative_reward: float = 0.0
67
+
68
+ def to_reward_model(self) -> RewardModel:
69
+ return RewardModel(total_reward=self.cumulative_reward)
70
+
71
+
72
+ class GovWorkflowEnv:
73
+ def __init__(self, task_id: str = "district_backlog_easy", seed: int | None = None) -> None:
74
+ self.task_id = task_id
75
+ self.task: TaskConfig = get_task(task_id)
76
+ self.seed = seed
77
+ self.max_steps_per_episode = max(1, int(self.task.max_days) * 10)
78
+ self._init_episode_state()
79
+
80
+ def reset(
81
+ self,
82
+ seed: int | None = None,
83
+ options: dict | None = None,
84
+ ) -> tuple[ObservationModel, dict]:
85
+ task_id = (options or {}).get("task_id", self.task_id)
86
+ self.task = get_task(task_id)
87
+ self.task_id = self.task.task_id
88
+
89
+ self.seed = self.task.seed if seed is None else int(seed)
90
+ self.rng = random.Random(self.seed)
91
+ max_steps_override = (options or {}).get("max_steps_per_episode")
92
+ if max_steps_override is None:
93
+ self.max_steps_per_episode = max(1, int(self.task.max_days) * 10)
94
+ else:
95
+ self.max_steps_per_episode = max(1, int(max_steps_override))
96
+
97
+ self.episode_id = f"{self.task_id}-s{self.seed}-{uuid4().hex[:6]}"
98
+ self.day = 0
99
+ self.total_steps = 0
100
+ self.terminated = False
101
+ self.truncated = False
102
+ self.priority_mode = PriorityMode.BALANCED
103
+
104
+ pool = self.task.initial_officer_pool
105
+ self.officer_pool = OfficerPool(
106
+ total_officers=pool.total_officers,
107
+ available_officers=pool.available_officers,
108
+ allocated=dict(pool.allocated),
109
+ pending_reallocation=dict(getattr(pool, "pending_reallocation", {})),
110
+ )
111
+
112
+ self.active_cases: list[ApplicationCase] = []
113
+ self.completed_cases: list[ApplicationCase] = []
114
+ self.escalation_budget_remaining = self.task.escalation_budget
115
+
116
+ self.arrived_by_service = {s: 0 for s in self.task.enabled_services}
117
+ self.completed_by_service = {s: 0 for s in self.task.enabled_services}
118
+
119
+ self.metrics = EpisodeMetrics()
120
+ self.action_history: list[dict] = []
121
+ self.last_action_valid = True
122
+ self.last_action_message = "reset"
123
+ self.last_action_explanation = ""
124
+
125
+ self.event_engine = EventEngine(
126
+ seed=self.seed,
127
+ scenario_mode=self.task.scenario_mode,
128
+ )
129
+ self.simulator = DaySimulator(
130
+ task_config=self.task,
131
+ rng=self.rng,
132
+ event_engine=self.event_engine,
133
+ )
134
+ self.signal_computer = SignalComputer()
135
+
136
+ obs = self._build_observation(active_events=[])
137
+ info = {
138
+ "task_id": self.task_id,
139
+ "seed": self.seed,
140
+ "episode_id": self.episode_id,
141
+ "max_days": self.task.max_days,
142
+ }
143
+ return obs, info
144
+
145
+ def step(
146
+ self,
147
+ action: ActionModel | dict,
148
+ ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
149
+ if isinstance(action, dict):
150
+ from app.models import ActionModel
151
+ action = ActionModel(**action)
152
+
153
+ if self.terminated or self.truncated:
154
+ raise RuntimeError("Episode ended — call reset() before stepping.")
155
+
156
+ self.total_steps += 1
157
+ invalid_action = False
158
+ day_result = DayResult()
159
+
160
+ try:
161
+ notes, day_result = self._apply_action(action, day_result)
162
+ self.last_action_valid = True
163
+ self.last_action_message = notes[-1] if notes else "ok"
164
+ self.last_action_explanation = self.last_action_message
165
+ except ValueError as exc:
166
+ invalid_action = True
167
+ self.metrics.total_invalid_actions += 1
168
+ self.last_action_valid = False
169
+ self.last_action_message = str(exc)
170
+ self.last_action_explanation = f"Invalid: {exc}"
171
+
172
+ fairness_gap = completion_fairness_gap(
173
+ self.arrived_by_service,
174
+ self.completed_by_service,
175
+ )
176
+
177
+ reward: RewardModel = compute_reward(
178
+ stage_advances=day_result.stage_advances,
179
+ completions=day_result.new_completions,
180
+ active_backlog=len(self.active_cases),
181
+ new_sla_breaches=day_result.new_sla_breaches,
182
+ fairness_gap=fairness_gap,
183
+ fairness_threshold=self.task.fairness_threshold or 0.0,
184
+ invalid_action=invalid_action,
185
+ idle_capacity=day_result.idle_officer_days,
186
+ award_stability_bonus=(action.action_type == ActionType.ADVANCE_TIME),
187
+ )
188
+ self.metrics.cumulative_reward += reward.total_reward
189
+
190
+ self.terminated = (
191
+ len(self.active_cases) == 0
192
+ and self.day > 0
193
+ and not invalid_action
194
+ )
195
+ self.truncated = (
196
+ (self.day >= self.task.max_days or self.total_steps >= self.max_steps_per_episode)
197
+ and not self.terminated
198
+ )
199
+
200
+ info = StepInfoModel(
201
+ reward_breakdown=reward,
202
+ newly_arrived_cases=day_result.new_arrivals,
203
+ newly_completed_cases=day_result.new_completions,
204
+ newly_sla_breached_cases=day_result.new_sla_breaches,
205
+ newly_resolved_doc_cases=day_result.newly_unblocked_missing,
206
+ invalid_action=invalid_action,
207
+ action_explanation=self.last_action_explanation,
208
+ active_events=day_result.active_events,
209
+ grader_preview_score=0.0,
210
+ effects_resolved_this_step=[],
211
+ )
212
+
213
+ self.action_history.append({
214
+ "step": self.total_steps,
215
+ "day": self.day,
216
+ "action": action.model_dump(mode="json"),
217
+ "invalid": invalid_action,
218
+ "message": self.last_action_message,
219
+ "reward": reward.total_reward,
220
+ })
221
+
222
+ obs = self._build_observation(active_events=day_result.active_events)
223
+ return obs, reward.total_reward, self.terminated, self.truncated, info
224
+
225
+ def count_pending_effects(self) -> int:
226
+ """Count all pending delayed effects waiting to resolve."""
227
+ if hasattr(self, '_pending_effects') and self._pending_effects:
228
+ return len(self._pending_effects)
229
+ if hasattr(self, 'simulator') and hasattr(self.simulator, 'pending_effects'):
230
+ return len(self.simulator.pending_effects)
231
+ if hasattr(self, 'pending_effects'):
232
+ return len(self.pending_effects)
233
+ return 0
234
+
235
+
236
+ def state(self) -> EpisodeStateModel:
237
+
238
+ fairness_gap = completion_fairness_gap(
239
+ self.arrived_by_service, self.completed_by_service
240
+ )
241
+
242
+ # Compute average waiting days across completed cases
243
+ avg_wait = (
244
+ sum(c.waiting_days for c in self.completed_cases) / len(self.completed_cases)
245
+ if self.completed_cases else 0.0
246
+ )
247
+
248
+ return EpisodeStateModel(
249
+ episode_id=self.episode_id,
250
+ task_id=self.task_id,
251
+ seed=self.seed,
252
+ scenario_mode=self.task.scenario_mode,
253
+ day=self.day,
254
+ max_days=self.task.max_days,
255
+ terminated=self.terminated,
256
+ truncated=self.truncated,
257
+ total_steps=self.total_steps,
258
+ total_completed=len(self.completed_cases),
259
+ total_backlog=len(self.active_cases),
260
+ total_sla_breaches=self.metrics.total_sla_breaches,
261
+ total_rejected=self.metrics.total_rejected,
262
+ action_history_count=len(self.action_history),
263
+ cumulative_reward=self.metrics.cumulative_reward,
264
+ officer_pool=self.officer_pool.model_copy(deep=True),
265
+ pending_effects_count=self.count_pending_effects(),
266
+ active_events_today=[],
267
+
268
+ # ── Grader-facing fields ──────────────────────────────────
269
+ fairness_gap=round(fairness_gap, 4),
270
+ total_arrived=self.metrics.total_arrived,
271
+ total_docs_requested=self.metrics.total_docs_requested,
272
+ total_docs_cleared=self.metrics.total_docs_cleared,
273
+ total_idle_officer_days=self.metrics.total_idle_officer_days,
274
+ total_capacity_days=self.metrics.total_capacity_days,
275
+ total_urgent_arrived=self.metrics.total_urgent_arrived,
276
+ total_urgent_completed=self.metrics.total_urgent_completed,
277
+ total_escalations_used=self.metrics.total_escalations_used,
278
+ total_wasted_escalations=self.metrics.total_wasted_escalations,
279
+ total_invalid_actions=self.metrics.total_invalid_actions,
280
+ avg_waiting_days=round(avg_wait, 2),
281
+
282
+ # Full action log — populated but stripped by API unless requested
283
+ action_history=list(self.action_history),
284
+ )
285
+
286
+ def _apply_action(
287
+ self,
288
+ action: ActionModel,
289
+ day_result: DayResult,
290
+ ) -> tuple[list[str], DayResult]:
291
+ notes: list[str] = []
292
+
293
+ if action.action_type == ActionType.SET_PRIORITY_MODE:
294
+ if action.priority_mode is None:
295
+ raise ValueError("priority_mode required for set_priority_mode")
296
+ old_mode = self.priority_mode
297
+ self.priority_mode = action.priority_mode
298
+ notes.append(f"Priority mode changed: {old_mode.value} -> {action.priority_mode.value}")
299
+ return notes, day_result
300
+
301
+ if action.action_type == ActionType.ASSIGN_CAPACITY:
302
+ cap = action.capacity_assignment
303
+ if not cap:
304
+ raise ValueError("capacity_assignment dict required for assign_capacity")
305
+
306
+ for svc_key, delta in cap.items():
307
+ svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
308
+ if svc not in self.task.enabled_services:
309
+ raise ValueError(f"{svc.value} is not enabled in this task")
310
+ if delta <= 0:
311
+ raise ValueError("capacity delta must be positive")
312
+ idle = self.officer_pool.idle_officers
313
+ if delta > idle:
314
+ raise ValueError(f"Only {idle} idle officers available; requested {delta}")
315
+ self.officer_pool.allocated[svc] = self.officer_pool.allocated.get(svc, 0) + delta
316
+ notes.append(f"Assigned {delta} officer(s) to {svc.value}")
317
+ return notes, day_result
318
+
319
+ if action.action_type == ActionType.REQUEST_MISSING_DOCUMENTS:
320
+ svc = action.service_target
321
+ if svc is None:
322
+ raise ValueError("service_target required for request_missing_documents")
323
+
324
+ candidates = [
325
+ c for c in self.active_cases
326
+ if c.service_type == svc
327
+ and c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
328
+ ]
329
+ if not candidates:
330
+ raise ValueError(f"No BLOCKED_MISSING_DOCS cases for {svc.value}")
331
+
332
+ candidates.sort(key=lambda c: (-c.sla_risk, c.arrival_day))
333
+ resolved = 0
334
+ for case in candidates[:3]:
335
+ case.doc_request_sent_day = self.day
336
+ case.doc_resolution_day = self.day + self.rng.randint(2, 3)
337
+ self.metrics.total_docs_requested += 1
338
+ resolved += 1
339
+
340
+ notes.append(f"Sent missing-doc requests for {resolved} case(s) in {svc.value}")
341
+ return notes, day_result
342
+
343
+ if action.action_type == ActionType.ESCALATE_SERVICE:
344
+ if self.escalation_budget_remaining <= 0:
345
+ self.metrics.total_wasted_escalations += 1
346
+ raise ValueError("Escalation budget exhausted")
347
+
348
+ svc = action.escalation_target or action.service_target
349
+ candidates = [
350
+ c for c in self.active_cases
351
+ if (svc is None or c.service_type == svc) and not c.is_urgent
352
+ ]
353
+ if not candidates:
354
+ self.metrics.total_wasted_escalations += 1
355
+ raise ValueError("No eligible non-urgent cases to escalate")
356
+
357
+ best = max(candidates, key=lambda c: (c.sla_risk, -c.arrival_day))
358
+ best.is_urgent = True
359
+ self.escalation_budget_remaining -= 1
360
+ self.metrics.total_escalations_used += 1
361
+ notes.append(f"Escalated case {best.case_id} ({best.service_type.value})")
362
+ return notes, day_result
363
+
364
+ if action.action_type == ActionType.ADVANCE_TIME:
365
+ day_result = self._advance_one_day()
366
+ notes.append(f"Day {self.day} simulated")
367
+ return notes, day_result
368
+
369
+ if action.action_type == ActionType.REALLOCATE_OFFICERS:
370
+ delta = action.reallocation_delta
371
+ if not delta or len(delta) < 2:
372
+ raise ValueError("reallocation_delta must have at least 2 entries")
373
+
374
+ total = sum(delta.values())
375
+ if total != 0:
376
+ raise ValueError(f"reallocation_delta must sum to 0 (got {total})")
377
+
378
+ for svc_key, change in delta.items():
379
+ svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
380
+ if svc not in self.task.enabled_services:
381
+ raise ValueError(f"{svc.value} not in enabled services")
382
+ current = self.officer_pool.allocated.get(svc, 0)
383
+ if current + change < 0:
384
+ raise ValueError(
385
+ f"Cannot reduce {svc.value} below 0 (current={current}, change={change})"
386
+ )
387
+
388
+ for svc_key, change in delta.items():
389
+ svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
390
+ self.officer_pool.allocated[svc] = self.officer_pool.allocated.get(svc, 0) + change
391
+
392
+ changes = ", ".join(f"{k}:{'+' if v > 0 else ''}{v}" for k, v in delta.items())
393
+ notes.append(f"Officers reallocated: {changes}")
394
+ return notes, day_result
395
+
396
+ raise ValueError(f"Unsupported action_type: {action.action_type.value}")
397
+
398
+ def _advance_one_day(self) -> DayResult:
399
+ self.day += 1
400
+
401
+ alloc = dict(self.officer_pool.allocated)
402
+ result = self.simulator.simulate_day(
403
+ day=self.day,
404
+ active_cases=self.active_cases,
405
+ completed_cases=self.completed_cases,
406
+ priority_mode=self.priority_mode,
407
+ officer_allocations=alloc,
408
+ )
409
+
410
+ for case in self.completed_cases:
411
+ if getattr(case, "_counted", False):
412
+ continue
413
+ case._counted = True
414
+ svc = case.service_type
415
+ self.completed_by_service[svc] = self.completed_by_service.get(svc, 0) + 1
416
+
417
+ for case in self.active_cases:
418
+ if getattr(case, "_arrival_counted", False):
419
+ continue
420
+ case._arrival_counted = True
421
+ svc = case.service_type
422
+ self.arrived_by_service[svc] = self.arrived_by_service.get(svc, 0) + 1
423
+ self.metrics.total_arrived += 1
424
+ if case.is_urgent:
425
+ self.metrics.total_urgent_arrived += 1
426
+
427
+ self.metrics.total_completed = len(self.completed_cases)
428
+ self.metrics.total_sla_breaches += result.new_sla_breaches
429
+ self.metrics.total_idle_officer_days += result.idle_officer_days
430
+ self.metrics.total_capacity_days += result.total_capacity_days
431
+ self.metrics.total_urgent_completed += result.urgent_completed
432
+ self.metrics.total_docs_cleared += result.newly_unblocked_missing
433
+
434
+ return result
435
+
436
+ def _build_observation(self, active_events: list = None) -> ObservationModel:
437
+ active_events = active_events or []
438
+
439
+ snapshots: dict[str, QueueSnapshot] = {}
440
+ todays_digital = 0
441
+ todays_arrivals = 0
442
+ today_completed: dict[ServiceType, int] = {}
443
+
444
+ for case in self.completed_cases:
445
+ today_completed[case.service_type] = today_completed.get(case.service_type, 0) + 1
446
+
447
+ for service in self.task.enabled_services:
448
+ snap = self.simulator.build_queue_snapshot(service, self.active_cases, self.day)
449
+ snap.total_completed_today = today_completed.get(service, 0)
450
+ snapshots[service.value] = snap
451
+
452
+ for case in self.active_cases:
453
+ if case.arrival_day == self.day:
454
+ todays_arrivals += 1
455
+ if case.intake_channel.value == "digital":
456
+ todays_digital += 1
457
+
458
+ sigs = self.signal_computer.compute(
459
+ queue_snapshots=snapshots,
460
+ officer_pool=self.officer_pool,
461
+ todays_arrivals=todays_arrivals,
462
+ digital_arrivals=todays_digital,
463
+ capacity_per_day=max(1.0, float(self.officer_pool.available_officers)),
464
+ )
465
+
466
+ pending_doc = sum(
467
+ 1 for c in self.active_cases
468
+ if c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
469
+ and c.doc_resolution_day is not None
470
+ )
471
+ pending_officer = len(getattr(self.officer_pool, "pending_reallocation", {}))
472
+
473
+ return ObservationModel(
474
+ task_id=self.task_id,
475
+ episode_id=self.episode_id,
476
+ day=self.day,
477
+ max_days=self.task.max_days,
478
+ scenario_mode=self.task.scenario_mode,
479
+ officer_pool=self.officer_pool.model_copy(deep=True),
480
+ queue_snapshots=snapshots,
481
+ total_backlog=len(self.active_cases),
482
+ total_completed=len(self.completed_cases),
483
+ total_sla_breaches=self.metrics.total_sla_breaches,
484
+ total_rejected=self.metrics.total_rejected,
485
+ escalation_budget_remaining=self.escalation_budget_remaining,
486
+ backlog_pressure=sigs.backlog_pressure,
487
+ sla_risk_score=sigs.sla_risk_score,
488
+ fairness_index=sigs.fairness_index,
489
+ resource_utilization=sigs.resource_utilization,
490
+ digital_intake_ratio=sigs.digital_intake_ratio,
491
+ blocked_cases_missing_docs=sigs.blocked_cases_missing_docs,
492
+ field_verification_load=sigs.field_verification_load,
493
+ active_events=active_events,
494
+ last_action_valid=self.last_action_valid,
495
+ last_action_message=self.last_action_message,
496
+ last_action_explanation=self.last_action_explanation,
497
+ pending_doc_resolutions=pending_doc,
498
+ pending_officer_reallocations=pending_officer,
499
+ )
500
+
501
+ def _init_episode_state(self) -> None:
502
+ self.seed = self.task.seed
503
+ self.rng = random.Random(self.seed)
504
+ self.episode_id = f"{self.task_id}-s{self.seed}-init"
505
+ self.day = 0
506
+ self.total_steps = 0
507
+ self.terminated = False
508
+ self.truncated = False
509
+ self.priority_mode = PriorityMode.BALANCED
510
+ self.officer_pool = OfficerPool(
511
+ total_officers=1,
512
+ available_officers=1,
513
+ allocated={},
514
+ pending_reallocation={},
515
+ )
516
+ self.active_cases: list[ApplicationCase] = []
517
+ self.completed_cases: list[ApplicationCase] = []
518
+ self.escalation_budget_remaining = 0
519
+ self.arrived_by_service: dict[ServiceType, int] = {}
520
+ self.completed_by_service: dict[ServiceType, int] = {}
521
+ self.metrics = EpisodeMetrics()
522
+ self.action_history: list[dict] = []
523
+ self.last_action_valid = True
524
+ self.last_action_message = ""
525
+ self.last_action_explanation = ""
526
+ self.event_engine = EventEngine(seed=self.seed, scenario_mode=ScenarioMode.NORMAL)
527
+ self.simulator = DaySimulator(self.task, self.rng, self.event_engine)
528
+ self.signal_computer = SignalComputer()
529
+
530
+ def _count_pending_effects(self) -> int:
531
+ doc_pending = sum(
532
+ 1 for c in self.active_cases
533
+ if c.doc_resolution_day is not None
534
+ and c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
535
+ )
536
+ fv_pending = sum(
537
+ 1 for c in self.active_cases
538
+ if c.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
539
+ and c.field_verification_completion_day is not None
540
+ )
541
+ return doc_pending + fv_pending
542
+
543
+ @property
544
+ def fairness_gap(self) -> float:
545
+ return completion_fairness_gap(self.arrived_by_service, self.completed_by_service)
546
+
547
+ @property
548
+ def total_completed(self) -> int:
549
+ return len(self.completed_cases)
550
+
551
+ @property
552
+ def total_backlog(self) -> int:
553
+ return len(self.active_cases)
app/event_engine.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ event_engine.py — Gov Workflow OpenEnv v2.0
3
+ Deterministic daily event system. Same seed + day + scenario = same events always.
4
+ """
5
+ import random
6
+ from typing import List
7
+ from app.models import EventType, ScenarioMode, TaskConfig
8
+
9
+ SCENARIO_MULTIPLIER = {
10
+ ScenarioMode.NORMAL: 1.0,
11
+ ScenarioMode.CRISIS: 2.0,
12
+ ScenarioMode.EXTREME_OVERLOAD: 3.5,
13
+ }
14
+
15
+ BASE_PROBS = {
16
+ EventType.SURGE_APPLICATIONS: 0.08,
17
+ EventType.OFFICER_UNAVAILABLE: 0.07,
18
+ EventType.DOCUMENT_REJECTION_SPIKE: 0.10,
19
+ EventType.REVENUE_DB_DELAY: 0.06,
20
+ EventType.SLA_ESCALATION_ORDER: 0.05,
21
+ }
22
+
23
+ EVENT_EFFECTS = {
24
+ EventType.SURGE_APPLICATIONS:
25
+ {ScenarioMode.NORMAL: 1.3, ScenarioMode.CRISIS: 1.5, ScenarioMode.EXTREME_OVERLOAD: 2.0},
26
+ EventType.OFFICER_UNAVAILABLE:
27
+ {ScenarioMode.NORMAL: 1, ScenarioMode.CRISIS: 1, ScenarioMode.EXTREME_OVERLOAD: 2},
28
+ EventType.DOCUMENT_REJECTION_SPIKE:
29
+ {ScenarioMode.NORMAL: 0.15, ScenarioMode.CRISIS: 0.20, ScenarioMode.EXTREME_OVERLOAD: 0.35},
30
+ EventType.REVENUE_DB_DELAY:
31
+ {ScenarioMode.NORMAL: 0.30, ScenarioMode.CRISIS: 0.40, ScenarioMode.EXTREME_OVERLOAD: 0.60},
32
+ EventType.SLA_ESCALATION_ORDER:
33
+ {ScenarioMode.NORMAL: 0.50, ScenarioMode.CRISIS: 0.50, ScenarioMode.EXTREME_OVERLOAD: 0.40},
34
+ }
35
+
36
+
37
+ class DayEventParams:
38
+ def __init__(self):
39
+ self.arrival_multiplier: float = 1.0
40
+ self.officer_reduction: int = 0
41
+ self.doc_defect_rate_boost: float = 0.0
42
+ self.system_dependency_boost: float = 0.0
43
+ self.sla_window_multiplier: float = 1.0
44
+ self.active_events: List[EventType] = []
45
+
46
+ def has_events(self) -> bool:
47
+ return bool(self.active_events)
48
+
49
+
50
+ class EventEngine:
51
+ def __init__(self, seed: int, scenario_mode: ScenarioMode):
52
+ self.seed = seed
53
+ self.scenario_mode = scenario_mode
54
+ self._multiplier = SCENARIO_MULTIPLIER[scenario_mode]
55
+
56
+ def get_events_for_day(self, day: int, task_config: "TaskConfig") -> List[EventType]:
57
+ day_rng = random.Random(self.seed + day * 31337)
58
+ active = []
59
+ for event_type in task_config.allowed_events:
60
+ if event_type == EventType.NO_EVENT:
61
+ continue
62
+ base_prob = BASE_PROBS.get(event_type, 0.0)
63
+ effective_prob = min(0.80, base_prob * self._multiplier)
64
+ if day_rng.random() < effective_prob:
65
+ active.append(event_type)
66
+ return active if active else [EventType.NO_EVENT]
67
+
68
+ def apply_events(self, events: List[EventType], task_config: "TaskConfig") -> DayEventParams:
69
+ params = DayEventParams()
70
+ for event in events:
71
+ if event == EventType.NO_EVENT:
72
+ continue
73
+ params.active_events.append(event)
74
+ magnitude = EVENT_EFFECTS.get(event, {}).get(self.scenario_mode, 0)
75
+ if event == EventType.SURGE_APPLICATIONS:
76
+ params.arrival_multiplier *= magnitude
77
+ elif event == EventType.OFFICER_UNAVAILABLE:
78
+ params.officer_reduction += int(magnitude)
79
+ elif event == EventType.DOCUMENT_REJECTION_SPIKE:
80
+ params.doc_defect_rate_boost += magnitude
81
+ elif event == EventType.REVENUE_DB_DELAY:
82
+ params.system_dependency_boost += magnitude
83
+ elif event == EventType.SLA_ESCALATION_ORDER:
84
+ params.sla_window_multiplier = min(params.sla_window_multiplier, magnitude)
85
+ if not params.active_events:
86
+ params.active_events = [EventType.NO_EVENT]
87
+ return params
88
+
89
+ def describe_events(self, events: List[EventType]) -> str:
90
+ descriptions = {
91
+ EventType.SURGE_APPLICATIONS: "Digital surge: arrivals increased",
92
+ EventType.OFFICER_UNAVAILABLE: "Officer absent: reduced capacity",
93
+ EventType.DOCUMENT_REJECTION_SPIKE: "Doc rejection spike: higher defect rate",
94
+ EventType.REVENUE_DB_DELAY: "Revenue DB delay: land records slower",
95
+ EventType.SLA_ESCALATION_ORDER: "SLA escalation order: deadlines tightened",
96
+ EventType.NO_EVENT: "No active events today",
97
+ }
98
+ active = [e for e in events if e != EventType.NO_EVENT]
99
+ if not active:
100
+ return "No active events today"
101
+ return "; ".join(descriptions.get(e, str(e)) for e in active)
app/graders.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ graders.py — Gov Workflow OpenEnv: Deterministic Episode Graders
3
+
4
+ Rules:
5
+ - All graders read ONLY from EpisodeStateModel flat fields.
6
+ - No access to env internals, EpisodeMetrics, or reward breakdown proxies.
7
+ - GraderResult uses the aligned schema (score, grader_name, named metric fields).
8
+ - grade_episode() dispatches by task_id.
9
+
10
+ Grader weights:
11
+ Easy — completion(0.45) + SLA(0.35) + idle_efficiency(0.20) = 1.00
12
+ Medium — completion(0.35) + SLA(0.30) + doc_rework(0.20) + urgent(0.15) = 1.00
13
+ Hard — completion(0.28) + SLA(0.24) + doc_rework(0.16)
14
+ + fairness(0.16) + escalation_discipline(0.16) = 1.00
15
+ """
16
+ from __future__ import annotations
17
+ from app.models import EpisodeStateModel, GraderResult
18
+
19
+
20
+ # ─────────────────────────────────────────────────────────────────────────────
21
+ # INTERNAL HELPERS
22
+ # ─────────────────────────────────────────────────────────────────────────────
23
+
24
+ def _safe_ratio(num: float, den: float, default: float = 1.0) -> float:
25
+ """Safe division, clamped to [0.0, 1.0]. Returns `default` when den ≤ 0."""
26
+ if den <= 0:
27
+ return max(0.0, min(1.0, default))
28
+ return max(0.0, min(1.0, num / den))
29
+
30
+
31
+ def _b(value: float) -> float:
32
+ """Clamp any float to [0.0, 1.0]."""
33
+ return max(0.0, min(1.0, float(value)))
34
+
35
+
36
+ def _extract(state: EpisodeStateModel) -> dict[str, float]:
37
+ """
38
+ Extract all grader input metrics from EpisodeStateModel flat fields.
39
+
40
+ Design note:
41
+ - total_arrived : populated by env.state() from metrics.total_arrived
42
+ - fairness_gap : computed by completion_fairness_gap() in env.state()
43
+ - All other fields are direct EpisodeStateModel attributes.
44
+ """
45
+ total_arrived = max(1, state.total_arrived)
46
+ total_completed = float(state.total_completed)
47
+ total_breaches = float(state.total_sla_breaches)
48
+ total_docs_req = float(state.total_docs_requested)
49
+ total_docs_cleared = float(state.total_docs_cleared)
50
+ total_urgent_arr = float(state.total_urgent_arrived)
51
+ total_urgent_comp = float(state.total_urgent_completed)
52
+ total_idle = float(state.total_idle_officer_days)
53
+ total_capacity = float(state.total_capacity_days)
54
+ total_escused = float(state.total_escalations_used)
55
+ total_wasted_esc = float(state.total_wasted_escalations)
56
+ fairness_gap = float(state.fairness_gap)
57
+
58
+ return {
59
+ "completion_rate": _b(_safe_ratio(total_completed, total_arrived, 0.0)),
60
+ "sla_compliance": _b(1.0 - _safe_ratio(total_breaches, total_arrived, 0.0)),
61
+ "document_rework_quality": _b(_safe_ratio(total_docs_cleared, total_docs_req, 1.0)),
62
+ "urgent_served_rate": _b(_safe_ratio(total_urgent_comp, total_urgent_arr, 1.0)),
63
+ "fairness_score": _b(1.0 - fairness_gap),
64
+ "escalation_discipline": _b(1.0 - _safe_ratio(total_wasted_esc, max(1.0, total_escused), 0.0)),
65
+ "idle_efficiency": _b(1.0 - _safe_ratio(total_idle, max(1.0, total_capacity), 0.0)),
66
+ "fairness_gap": round(fairness_gap, 4),
67
+ }
68
+
69
+
70
+ def _build_result(
71
+ state: EpisodeStateModel,
72
+ score: float,
73
+ grader_name: str,
74
+ m: dict[str, float],
75
+ ) -> GraderResult:
76
+ """Assemble a fully-populated GraderResult from metric dict and state."""
77
+ total_arrived = max(0, state.total_arrived)
78
+ avg_wait = state.avg_waiting_days
79
+
80
+ return GraderResult(
81
+ task_id=state.task_id,
82
+ episode_id=state.episode_id,
83
+ grader_name=grader_name,
84
+ score=_b(score),
85
+ completion_rate=m["completion_rate"],
86
+ sla_compliance_rate=m["sla_compliance"],
87
+ idle_efficiency=m["idle_efficiency"],
88
+ document_rework_quality=m["document_rework_quality"],
89
+ urgent_served_rate=m["urgent_served_rate"],
90
+ fairness_score=m["fairness_score"],
91
+ escalation_discipline=m["escalation_discipline"],
92
+ fairness_gap=m["fairness_gap"],
93
+ total_cases_arrived=total_arrived,
94
+ total_completed=state.total_completed,
95
+ total_sla_breached=state.total_sla_breaches,
96
+ total_rejected=state.total_rejected,
97
+ avg_waiting_days=avg_wait,
98
+ )
99
+
100
+
101
+ # ─────────────────────────────────────────────────────────────────────────────
102
+ # TASK GRADERS
103
+ # ─────────────────────────────────────────────────────────────────────────────
104
+
105
+ def grade_easy(state: EpisodeStateModel) -> GraderResult:
106
+ """
107
+ district_backlog_easy grader.
108
+ Focus: raw throughput and SLA hygiene under simple single-service load.
109
+
110
+ Weights: completion(0.45) + SLA(0.35) + idle_efficiency(0.20)
111
+ """
112
+ m = _extract(state)
113
+ score = (
114
+ 0.45 * m["completion_rate"]
115
+ + 0.35 * m["sla_compliance"]
116
+ + 0.20 * m["idle_efficiency"]
117
+ )
118
+ return _build_result(state, score, "easy", m)
119
+
120
+
121
+ def grade_medium(state: EpisodeStateModel) -> GraderResult:
122
+ """
123
+ mixed_urgency_medium grader.
124
+ Focus: throughput + SLA + document quality + prioritizing urgent cases.
125
+
126
+ Weights: completion(0.35) + SLA(0.30) + doc_rework(0.20) + urgent(0.15)
127
+ """
128
+ m = _extract(state)
129
+ score = (
130
+ 0.35 * m["completion_rate"]
131
+ + 0.30 * m["sla_compliance"]
132
+ + 0.20 * m["document_rework_quality"]
133
+ + 0.15 * m["urgent_served_rate"]
134
+ )
135
+ return _build_result(state, score, "medium", m)
136
+
137
+
138
+ def grade_hard(state: EpisodeStateModel) -> GraderResult:
139
+ """
140
+ cross_department_hard grader.
141
+ Focus: all-round excellence including cross-service fairness and
142
+ restrained escalation use under crisis conditions.
143
+
144
+ Weights: completion(0.28) + SLA(0.24) + doc_rework(0.16)
145
+ + fairness(0.16) + escalation_discipline(0.16)
146
+ """
147
+ m = _extract(state)
148
+ score = (
149
+ 0.28 * m["completion_rate"]
150
+ + 0.24 * m["sla_compliance"]
151
+ + 0.16 * m["document_rework_quality"]
152
+ + 0.16 * m["fairness_score"]
153
+ + 0.16 * m["escalation_discipline"]
154
+ )
155
+ return _build_result(state, score, "hard", m)
156
+
157
+
158
+ # ─────────────────────────────────────────────────────────────────────────────
159
+ # DISPATCHER
160
+ # ─────────────────────────────────────────────────────────────────────────────
161
+
162
+ _GRADER_MAP = {
163
+ "district_backlog_easy": grade_easy,
164
+ "district_backlog_easy_extreme": grade_easy,
165
+ "mixed_urgency_medium": grade_medium,
166
+ "cross_department_hard": grade_hard,
167
+ }
168
+
169
+
170
+ def grade_episode(state: EpisodeStateModel) -> GraderResult:
171
+ """
172
+ Dispatch to the correct task grader.
173
+ Falls back to grade_hard for unknown task IDs (safe default for new tasks).
174
+ """
175
+ grader_fn = _GRADER_MAP.get(state.task_id, grade_hard)
176
+ return grader_fn(state)
app/main.py ADDED
The diff for this file is too large to render. See raw diff
 
app/models.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ models.py — Gov Workflow OpenEnv v2.0 — Phase 2 FULL FILE
3
+ Adds: DocEnrichmentType, doc_enrichment fields on ApplicationCase,
4
+ blocked_cases_enrichment / pending_enrichment_lookups on observation,
5
+ INTERNAL_TO_PUBLIC_STAGE mapping,
6
+ SectorProfile enrichment fields.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ from enum import Enum
11
+ from typing import Dict, List, Optional
12
+ from pydantic import BaseModel, Field
13
+ import uuid
14
+
15
+
16
+ # ─────────────────────────────────────────────
17
+ # ENUMS
18
+ # ─────────────────────────────────────────────
19
+
20
+ class ServiceType(str, Enum):
21
+ PASSPORT = "passport"
22
+ DRIVING_LICENSE = "driving_license"
23
+ AADHAAR_CARD = "aadhaar_card"
24
+ GST_REGISTRATION = "gst_registration"
25
+ INCOME_CERTIFICATE = "income_certificate"
26
+ CASTE_CERTIFICATE = "caste_certificate"
27
+ BIRTH_CERTIFICATE = "birth_certificate"
28
+ LAND_REGISTRATION = "land_registration"
29
+
30
+
31
+ class StageType(str, Enum):
32
+ SUBMISSION = "submission"
33
+ DOCUMENT_VERIFICATION = "document_verification"
34
+ FIELD_VERIFICATION = "field_verification"
35
+ APPROVAL = "approval"
36
+ ISSUANCE = "issuance"
37
+
38
+
39
+ class InternalSubstate(str, Enum):
40
+ PRE_SCRUTINY = "pre_scrutiny"
41
+ DOC_VALIDATION = "doc_validation"
42
+ SERVICE_SPECIFIC_VALIDATION = "service_specific_validation"
43
+ FIELD_VERIFICATION_PENDING = "field_verification_pending"
44
+ DECISION_PENDING = "decision_pending"
45
+ ISSUANCE_READY = "issuance_ready"
46
+ BLOCKED_MISSING_DOCS = "blocked_missing_docs"
47
+ BLOCKED_ENRICHMENT = "blocked_enrichment"
48
+ COMPLETED = "completed"
49
+ REJECTED = "rejected"
50
+
51
+
52
+ # ── Phase 2 addition ──────────────────────────────────────────────────────────
53
+ class DocEnrichmentType(str, Enum):
54
+ """External lookup needed for document verification."""
55
+ NONE = "none"
56
+ PAST_LAND_RECORDS = "past_land_records" # Land Registration — Revenue DB
57
+ FAMILY_CASTE_HISTORY = "family_caste_history" # Caste Certificate — Caste Registry
58
+ POLICE_VERIFICATION = "police_verification" # Passport — Police Station
59
+ TAX_RECORD_CROSS_CHECK= "tax_record_cross_check" # GST Registration — Tax DB
60
+
61
+
62
+ # Public stage mapping — used by state_machine.build_public_stage
63
+ INTERNAL_TO_PUBLIC_STAGE: dict = {
64
+ "pre_scrutiny": "submission",
65
+ "doc_validation": "document_verification",
66
+ "service_specific_validation": "document_verification",
67
+ "field_verification_pending": "field_verification",
68
+ "decision_pending": "approval",
69
+ "issuance_ready": "issuance",
70
+ "blocked_missing_docs": "document_verification",
71
+ "blocked_enrichment": "document_verification",
72
+ "completed": "issuance",
73
+ "rejected": "approval",
74
+ }
75
+
76
+
77
+ class PriorityMode(str, Enum):
78
+ URGENT_FIRST = "urgent_first"
79
+ OLDEST_FIRST = "oldest_first"
80
+ BALANCED = "balanced"
81
+ BACKLOG_CLEARANCE = "backlog_clearance"
82
+
83
+
84
+ class ActionType(str, Enum):
85
+ SET_PRIORITY_MODE = "set_priority_mode"
86
+ ASSIGN_CAPACITY = "assign_capacity"
87
+ REQUEST_MISSING_DOCUMENTS = "request_missing_documents"
88
+ ESCALATE_SERVICE = "escalate_service"
89
+ ADVANCE_TIME = "advance_time"
90
+ REALLOCATE_OFFICERS = "reallocate_officers"
91
+
92
+
93
+ class EventType(str, Enum):
94
+ SURGE_APPLICATIONS = "surge_applications"
95
+ OFFICER_UNAVAILABLE = "officer_unavailable"
96
+ DOCUMENT_REJECTION_SPIKE = "document_rejection_spike"
97
+ REVENUE_DB_DELAY = "revenue_db_delay"
98
+ SLA_ESCALATION_ORDER = "sla_escalation_order"
99
+ NO_EVENT = "no_event"
100
+
101
+
102
+ class ScenarioMode(str, Enum):
103
+ NORMAL = "normal"
104
+ CRISIS = "crisis"
105
+ EXTREME_OVERLOAD = "extreme_overload"
106
+
107
+
108
+ class UrgencyProfile(str, Enum):
109
+ LOW = "low"
110
+ MODERATE = "moderate"
111
+ HIGH = "high"
112
+ LOW_BUT_STICKY = "low_but_sticky"
113
+
114
+
115
+ class IntakeChannel(str, Enum):
116
+ DIGITAL = "digital"
117
+ PAPER = "paper"
118
+ HYBRID = "hybrid"
119
+
120
+
121
+ class DelayedEffectType(str, Enum):
122
+ DOC_REQUEST_RESOLUTION = "doc_request_resolution"
123
+ OFFICER_REALLOCATION = "officer_reallocation"
124
+ ESCALATION_RELIEF = "escalation_relief"
125
+
126
+
127
+ # ─────────────────────────────────────────────
128
+ # SECTOR / SERVICE CONFIGURATION
129
+ # ─────────────────────────────────────────────
130
+
131
+ class SectorProfile(BaseModel):
132
+ service_type: ServiceType
133
+ sector_name: str
134
+ missing_docs_probability: float = Field(ge=0.0, le=1.0)
135
+ doc_defect_rate_digital: float = Field(ge=0.0, le=1.0)
136
+ doc_defect_rate_paper: float = Field(ge=0.0, le=1.0)
137
+ field_verification_probability: float = Field(ge=0.0, le=1.0)
138
+ manual_scrutiny_intensity: float = Field(ge=0.0, le=1.0)
139
+ decision_backlog_sensitivity: float = Field(ge=0.0, le=1.0)
140
+ system_dependency_risk: float = Field(ge=0.0, le=1.0)
141
+ sla_days: int = Field(ge=1)
142
+ urgency_profile: UrgencyProfile
143
+ base_processing_rate: float = Field(ge=0.1)
144
+ field_verification_days: int = Field(ge=1)
145
+ # ── Phase 2: enrichment ─────────────────────────────────────────
146
+ doc_enrichment_type: DocEnrichmentType = DocEnrichmentType.NONE
147
+ doc_enrichment_probability: float = Field(default=0.0, ge=0.0, le=1.0)
148
+ doc_enrichment_delay_days_min: int = Field(default=1, ge=1)
149
+ doc_enrichment_delay_days_max: int = Field(default=3, ge=1)
150
+
151
+
152
+ class OfficerPool(BaseModel):
153
+ total_officers: int = Field(ge=1)
154
+ available_officers: int = Field(ge=0)
155
+ allocated: Dict[str, int] = Field(default_factory=dict)
156
+ pending_reallocation: Dict[str, int] = Field(default_factory=dict)
157
+
158
+ @property
159
+ def idle_officers(self) -> int:
160
+ return self.available_officers - sum(self.allocated.values())
161
+
162
+
163
+ # ─────────────────────────────────────────────
164
+ # CASE MODEL (Phase 2: enrichment fields added)
165
+ # ─────────────────────────────────────────────
166
+
167
+ class ApplicationCase(BaseModel):
168
+ case_id: str = Field(default_factory=lambda: str(uuid.uuid4())[:8])
169
+ service_type: ServiceType
170
+ internal_substate: InternalSubstate = InternalSubstate.PRE_SCRUTINY
171
+ public_stage: StageType = StageType.SUBMISSION
172
+
173
+ arrival_day: int = Field(ge=0)
174
+ current_day: int = Field(ge=0)
175
+ sla_deadline_day: int = Field(ge=0)
176
+ days_in_current_stage:int = Field(default=0, ge=0)
177
+ waiting_days: int = Field(default=0, ge=0)
178
+
179
+ is_urgent: bool = False
180
+ intake_channel: IntakeChannel = IntakeChannel.DIGITAL
181
+ has_missing_docs: bool = False
182
+ doc_request_sent_day: Optional[int] = None
183
+ doc_resolution_day: Optional[int] = None
184
+ field_verification_required: bool = False
185
+ field_verification_completion_day: Optional[int] = None
186
+
187
+ sla_breached: bool = False
188
+ completed: bool = False
189
+ rejected: bool = False
190
+
191
+ # ── Phase 2: enrichment ─────────────────────────────────────────
192
+ doc_enrichment_type: DocEnrichmentType = DocEnrichmentType.NONE
193
+ doc_enrichment_triggered:bool = False
194
+ enrichment_resolution_day:Optional[int] = None
195
+ doc_enrichment_reason: Optional[str] = None
196
+
197
+ @property
198
+ def days_until_sla(self) -> int:
199
+ return max(0, self.sla_deadline_day - self.current_day)
200
+
201
+ @property
202
+ def sla_risk(self) -> float:
203
+ total_window = self.sla_deadline_day - self.arrival_day
204
+ if total_window <= 0:
205
+ return 1.0
206
+ elapsed = self.current_day - self.arrival_day
207
+ return min(1.0, elapsed / total_window)
208
+
209
+
210
+ class QueueSnapshot(BaseModel):
211
+ service_type: ServiceType
212
+ public_stage_counts: Dict[str, int] = Field(default_factory=dict)
213
+ total_pending: int = Field(default=0, ge=0)
214
+ total_completed_today: int = Field(default=0, ge=0)
215
+ total_sla_breached: int = Field(default=0, ge=0)
216
+ urgent_pending: int = Field(default=0, ge=0)
217
+ blocked_missing_docs: int = Field(default=0, ge=0)
218
+ blocked_enrichment: int = Field(default=0, ge=0) # Phase 2
219
+ field_verification_pending:int = Field(default=0, ge=0)
220
+ oldest_case_age_days: int = Field(default=0, ge=0)
221
+ avg_waiting_days: float = Field(default=0.0, ge=0.0)
222
+ current_sla_risk: float = Field(default=0.0, ge=0.0, le=1.0)
223
+
224
+
225
+ # ─────────────────────────────────────────────
226
+ # DELAYED EFFECT MODEL
227
+ # ─────────────────────────────────────────────
228
+
229
+ class DelayedEffect(BaseModel):
230
+ effect_id: str = Field(default_factory=lambda: str(uuid.uuid4())[:8])
231
+ effect_type: DelayedEffectType
232
+ target_service: Optional[ServiceType] = None
233
+ target_case_id: Optional[str] = None
234
+ resolution_day: int = Field(ge=0)
235
+ magnitude: float = Field(default=1.0)
236
+ description: str = Field(default="")
237
+
238
+
239
+ # ─────────────────────────────────────────────
240
+ # OBSERVATION MODEL (Phase 2: enrichment signals added)
241
+ # ─────────────────────────────────────────────
242
+
243
+ class ObservationModel(BaseModel):
244
+ task_id: str
245
+ episode_id: str
246
+ day: int = Field(ge=0)
247
+ max_days: int = Field(ge=1)
248
+ scenario_mode: ScenarioMode = ScenarioMode.NORMAL
249
+ officer_pool: OfficerPool
250
+ queue_snapshots: Dict[str, QueueSnapshot] = Field(default_factory=dict)
251
+
252
+ total_backlog: int = Field(default=0, ge=0)
253
+ total_completed: int = Field(default=0, ge=0)
254
+ total_sla_breaches: int = Field(default=0, ge=0)
255
+ total_rejected: int = Field(default=0, ge=0)
256
+ escalation_budget_remaining:int = Field(default=0, ge=0)
257
+
258
+ # Compressed signals
259
+ backlog_pressure: float = Field(default=0.0, ge=0.0, le=1.0)
260
+ sla_risk_score: float = Field(default=0.0, ge=0.0, le=1.0)
261
+ fairness_index: float = Field(default=1.0, ge=0.0, le=1.0)
262
+ resource_utilization: float = Field(default=0.0, ge=0.0, le=1.0)
263
+ digital_intake_ratio: float = Field(default=0.5, ge=0.0, le=1.0)
264
+ blocked_cases_missing_docs:int = Field(default=0, ge=0)
265
+ blocked_cases_enrichment: int = Field(default=0, ge=0) # Phase 2
266
+ field_verification_load: float = Field(default=0.0, ge=0.0, le=1.0)
267
+
268
+ active_events: List[EventType] = Field(default_factory=list)
269
+
270
+ last_action_valid: bool = True
271
+ last_action_message: str = ""
272
+ last_action_explanation: str = Field(default="")
273
+
274
+ pending_doc_resolutions: int = Field(default=0, ge=0)
275
+ pending_enrichment_lookups:int = Field(default=0, ge=0) # Phase 2
276
+ pending_officer_reallocations:int = Field(default=0, ge=0)
277
+
278
+
279
+ # ─────────────────────────────────────────────
280
+ # ACTION / REWARD / STATE MODELS (unchanged)
281
+ # ─────────────────────────────────────────────
282
+
283
+ class ActionModel(BaseModel):
284
+ action_type: ActionType
285
+ service_target: Optional[ServiceType] = None
286
+ priority_mode: Optional[PriorityMode] = None
287
+ reallocation_delta: Optional[Dict[str, int]] = None
288
+ escalation_target: Optional[ServiceType] = None
289
+ capacity_assignment: Optional[Dict[str, int]] = None
290
+ notes: Optional[str] = None
291
+
292
+
293
+ class RewardModel(BaseModel):
294
+ total_reward: float = 0.0
295
+ progress_reward: float = 0.0
296
+ completion_reward: float = 0.0
297
+ recovery_reward: float = 0.0
298
+ stability_bonus: float = 0.0
299
+ waiting_penalty: float = 0.0
300
+ sla_penalty: float = 0.0
301
+ fairness_penalty: float = 0.0
302
+ invalid_action_penalty: float = 0.0
303
+ idle_capacity_penalty: float = 0.0
304
+ oscillation_penalty: float = 0.0
305
+
306
+
307
+ class EpisodeStateModel(BaseModel):
308
+ """Internal episode state exposed via GET /state and POST /state endpoints."""
309
+ episode_id: str
310
+ task_id: str
311
+ seed: int
312
+ scenario_mode: ScenarioMode
313
+ day: int = Field(ge=0)
314
+ max_days: int = Field(ge=1)
315
+ terminated: bool = False
316
+ truncated: bool = False
317
+ total_steps: int = Field(default=0, ge=0)
318
+ total_completed: int = Field(default=0, ge=0)
319
+ total_backlog: int = Field(default=0, ge=0)
320
+ total_sla_breaches: int = Field(default=0, ge=0)
321
+ total_rejected: int = Field(default=0, ge=0)
322
+ action_history_count: int = Field(default=0, ge=0)
323
+ cumulative_reward: float = 0.0
324
+ cumulative_reward_breakdown: RewardModel = Field(default_factory=RewardModel)
325
+ officer_pool: Optional[OfficerPool] = None
326
+ pending_effects_count: int = Field(default=0, ge=0)
327
+ active_events_today: List[EventType] = Field(default_factory=list)
328
+
329
+ # ── Grader-facing fields ──────────────────────────────────────
330
+ # These are populated by env.state() so graders never need to
331
+ # reach into private EpisodeMetrics.
332
+ fairness_gap: float = Field(
333
+ default=0.0, ge=0.0, le=1.0,
334
+ description="Cross-service completion fairness gap at episode end"
335
+ )
336
+ total_arrived: int = Field(
337
+ default=0, ge=0,
338
+ description="Total cases that arrived across all services"
339
+ )
340
+ total_docs_requested: int = Field(
341
+ default=0, ge=0,
342
+ description="Total missing-doc requests sent"
343
+ )
344
+ total_docs_cleared: int = Field(
345
+ default=0, ge=0,
346
+ description="Total missing-doc cases subsequently resolved"
347
+ )
348
+ total_idle_officer_days: int = Field(
349
+ default=0, ge=0,
350
+ description="Cumulative officer-days wasted idle"
351
+ )
352
+ total_capacity_days: int = Field(
353
+ default=0, ge=0,
354
+ description="Cumulative total officer-days available"
355
+ )
356
+ total_urgent_arrived: int = Field(
357
+ default=0, ge=0,
358
+ description="Total urgent cases that arrived"
359
+ )
360
+ total_urgent_completed: int = Field(
361
+ default=0, ge=0,
362
+ description="Total urgent cases completed"
363
+ )
364
+ total_escalations_used: int = Field(
365
+ default=0, ge=0,
366
+ description="Total escalation actions consumed"
367
+ )
368
+ total_wasted_escalations: int = Field(
369
+ default=0, ge=0,
370
+ description="Escalations used on already-urgent or ineligible cases"
371
+ )
372
+ total_invalid_actions: int = Field(
373
+ default=0, ge=0,
374
+ description="Total invalid actions submitted by agent"
375
+ )
376
+ avg_waiting_days: float = Field(
377
+ default=0.0, ge=0.0,
378
+ description="Mean waiting days across all completed cases"
379
+ )
380
+
381
+ # ── Full action log (optional, stripped by default) ──────────
382
+ action_history: Optional[List[dict]] = Field(
383
+ default=None,
384
+ description="Step-by-step action log. Stripped in normal API responses."
385
+ )
386
+
387
+
388
+ class StepInfoModel(BaseModel):
389
+ reward_breakdown: RewardModel = Field(default_factory=RewardModel)
390
+ newly_arrived_cases: int = Field(default=0, ge=0)
391
+ newly_completed_cases: int = Field(default=0, ge=0)
392
+ newly_sla_breached_cases: int = Field(default=0, ge=0)
393
+ newly_resolved_doc_cases: int = Field(default=0, ge=0)
394
+ invalid_action: bool = False
395
+ action_explanation: str = ""
396
+ active_events: List[EventType] = Field(default_factory=list)
397
+ grader_preview_score: float = Field(default=0.0, ge=0.0, le=1.0)
398
+ effects_resolved_this_step: List[str] = Field(default_factory=list)
399
+
400
+
401
+ class TaskConfig(BaseModel):
402
+ task_id: str
403
+ display_name: str
404
+ difficulty: str
405
+ scenario_mode: ScenarioMode
406
+ seed: int
407
+ max_days: int = Field(ge=1)
408
+ enabled_services: List[ServiceType]
409
+ arrival_rate_per_day: Dict[str, float]
410
+ digital_intake_ratio: float = Field(default=0.6, ge=0.0, le=1.0)
411
+ initial_officer_pool: OfficerPool
412
+ missing_docs_probability_override: Optional[Dict[str, float]] = None
413
+ field_verification_probability_override: Optional[Dict[str, float]] = None
414
+ escalation_budget: int = Field(ge=0)
415
+ fairness_threshold: Optional[float] = Field(default=None, ge=0.0, le=1.0)
416
+ event_probability: float = Field(default=0.1, ge=0.0, le=1.0)
417
+ allowed_events: List[EventType] = Field(default_factory=list)
418
+
419
+
420
+ class GraderResult(BaseModel):
421
+ """
422
+ Final deterministic score for a completed or in-progress episode.
423
+ Range: [0.0, 1.0].
424
+
425
+ Design decision: exposes .score and .grader_name as convenience aliases,
426
+ plus a .metrics dict for easy serialization to JSON by main.py endpoints.
427
+ The named fields (completion_rate, sla_compliance_rate, etc.) remain
428
+ for typed access in tests and baselines.
429
+ """
430
+ task_id: str = ""
431
+ episode_id: str = ""
432
+ grader_name: str = "" # "easy" | "medium" | "hard"
433
+
434
+ # Primary scalar — use result.score everywhere
435
+ score: float = Field(default=0.0, ge=0.0, le=1.0)
436
+
437
+ # Named metric components
438
+ completion_rate: float = Field(default=0.0, ge=0.0, le=1.0)
439
+ sla_compliance_rate: float = Field(default=0.0, ge=0.0, le=1.0)
440
+ idle_efficiency: float = Field(default=1.0, ge=0.0, le=1.0)
441
+ document_rework_quality: float = Field(default=1.0, ge=0.0, le=1.0)
442
+ urgent_served_rate: float = Field(default=1.0, ge=0.0, le=1.0)
443
+ fairness_score: float = Field(default=1.0, ge=0.0, le=1.0)
444
+ escalation_discipline: float = Field(default=1.0, ge=0.0, le=1.0)
445
+ fairness_gap: float = Field(default=0.0, ge=0.0, le=1.0)
446
+
447
+ # Episode counters — populated from EpisodeStateModel
448
+ total_cases_arrived: int = 0
449
+ total_completed: int = 0
450
+ total_sla_breached: int = 0
451
+ total_rejected: int = 0
452
+ avg_waiting_days: float = 0.0
453
+
454
+ @property
455
+ def metrics(self) -> dict:
456
+ """
457
+ Convenience dict for JSON serialization in API endpoints.
458
+ main.py uses result.metrics directly in GradeResponse.
459
+ """
460
+ return {
461
+ "completion_rate": round(self.completion_rate, 4),
462
+ "sla_compliance_rate": round(self.sla_compliance_rate, 4),
463
+ "idle_efficiency": round(self.idle_efficiency, 4),
464
+ "document_rework_quality": round(self.document_rework_quality, 4),
465
+ "urgent_served_rate": round(self.urgent_served_rate, 4),
466
+ "fairness_score": round(self.fairness_score, 4),
467
+ "escalation_discipline": round(self.escalation_discipline, 4),
468
+ "fairness_gap": round(self.fairness_gap, 4),
469
+ "total_cases_arrived": self.total_cases_arrived,
470
+ "total_completed": self.total_completed,
471
+ "total_sla_breached": self.total_sla_breached,
472
+ "total_rejected": self.total_rejected,
473
+ "avg_waiting_days": round(self.avg_waiting_days, 2),
474
+ }
475
+
476
+
477
+ class ResetRequest(BaseModel):
478
+ task_id: str
479
+ seed: Optional[int] = None
480
+ scenario_mode: Optional[ScenarioMode] = None
481
+
482
+
483
+ class ResetResponse(BaseModel):
484
+ observation: ObservationModel
485
+ info: dict
486
+ episode_id: str
487
+
488
+
489
+ class StepRequest(BaseModel):
490
+ episode_id: str
491
+ action: ActionModel
492
+
493
+
494
+ class StepResponse(BaseModel):
495
+ observation: ObservationModel
496
+ reward: float
497
+ terminated: bool
498
+ truncated: bool
499
+ info: StepInfoModel
500
+
501
+
502
+ class StateResponse(BaseModel):
503
+ state: EpisodeStateModel
504
+
505
+
506
+ class HealthResponse(BaseModel):
507
+ status: str = "ok"
508
+ version: str = "2.0.0"
509
+ active_episodes:int = 0
app/persistence.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sqlite3
6
+ import time
7
+ from pathlib import Path
8
+ from threading import Lock
9
+ from typing import Any
10
+ from uuid import uuid4
11
+
12
+
13
+ def _now() -> float:
14
+ return time.time()
15
+
16
+
17
+ def _as_json(payload: dict[str, Any]) -> str:
18
+ return json.dumps(payload, separators=(",", ":"), ensure_ascii=True)
19
+
20
+
21
+ def _from_json(payload: str) -> dict[str, Any]:
22
+ data = json.loads(payload)
23
+ return data if isinstance(data, dict) else {}
24
+
25
+
26
+ def _resolve_data_dir(repo_root: Path) -> Path:
27
+ configured = os.getenv("OPENENV_DATA_DIR") or os.getenv("STORAGE_DATA_DIR")
28
+ if configured:
29
+ return Path(configured).expanduser().resolve()
30
+ if Path("/data").exists():
31
+ return Path("/data/openenv_rl").resolve()
32
+ return (repo_root / "outputs" / "persist").resolve()
33
+
34
+
35
+ def _storage_enabled() -> bool:
36
+ raw = str(os.getenv("STORAGE_ENABLED", "true")).strip().lower()
37
+ return raw not in {"0", "false", "no", "off"}
38
+
39
+
40
+ class PersistenceStore:
41
+ def __init__(self, repo_root: Path) -> None:
42
+ self.repo_root = repo_root.resolve()
43
+ self.enabled = _storage_enabled()
44
+ self.data_dir = _resolve_data_dir(self.repo_root)
45
+ self.db_path = self.data_dir / "openenv_state.sqlite3"
46
+ self.training_runs_dir = self.data_dir / "training_runs"
47
+ self._lock = Lock()
48
+
49
+ if not self.enabled:
50
+ return
51
+
52
+ self.data_dir.mkdir(parents=True, exist_ok=True)
53
+ self.training_runs_dir.mkdir(parents=True, exist_ok=True)
54
+ self._init_schema()
55
+
56
+ def _connect(self) -> sqlite3.Connection:
57
+ conn = sqlite3.connect(self.db_path, timeout=30)
58
+ conn.row_factory = sqlite3.Row
59
+ return conn
60
+
61
+ def _init_schema(self) -> None:
62
+ with self._connect() as conn:
63
+ conn.executescript(
64
+ """
65
+ CREATE TABLE IF NOT EXISTS training_jobs (
66
+ job_id TEXT PRIMARY KEY,
67
+ created_at REAL NOT NULL,
68
+ updated_at REAL NOT NULL,
69
+ payload_json TEXT NOT NULL
70
+ );
71
+
72
+ CREATE TABLE IF NOT EXISTS simulation_runs (
73
+ run_id TEXT PRIMARY KEY,
74
+ created_at REAL NOT NULL,
75
+ updated_at REAL NOT NULL,
76
+ task_id TEXT,
77
+ agent_mode TEXT,
78
+ status TEXT,
79
+ payload_json TEXT NOT NULL
80
+ );
81
+
82
+ CREATE TABLE IF NOT EXISTS comparison_runs (
83
+ comparison_id TEXT PRIMARY KEY,
84
+ created_at REAL NOT NULL,
85
+ updated_at REAL NOT NULL,
86
+ task_id TEXT,
87
+ payload_json TEXT NOT NULL
88
+ );
89
+ """
90
+ )
91
+ conn.commit()
92
+
93
+ # Training jobs ---------------------------------------------------------
94
+ def upsert_training_job(self, snapshot: dict[str, Any]) -> None:
95
+ if not self.enabled:
96
+ return
97
+ job_id = str(snapshot.get("job_id") or "")
98
+ if not job_id:
99
+ return
100
+ created_at = float(snapshot.get("created_at") or _now())
101
+ updated_at = float(snapshot.get("updated_at") or _now())
102
+ with self._lock, self._connect() as conn:
103
+ conn.execute(
104
+ """
105
+ INSERT INTO training_jobs (job_id, created_at, updated_at, payload_json)
106
+ VALUES (?, ?, ?, ?)
107
+ ON CONFLICT(job_id) DO UPDATE SET
108
+ updated_at = excluded.updated_at,
109
+ payload_json = excluded.payload_json
110
+ """,
111
+ (job_id, created_at, updated_at, _as_json(snapshot)),
112
+ )
113
+ conn.commit()
114
+
115
+ def list_training_jobs(self, limit: int = 500) -> list[dict[str, Any]]:
116
+ if not self.enabled:
117
+ return []
118
+ rows: list[dict[str, Any]] = []
119
+ with self._lock, self._connect() as conn:
120
+ cur = conn.execute(
121
+ """
122
+ SELECT payload_json FROM training_jobs
123
+ ORDER BY updated_at DESC
124
+ LIMIT ?
125
+ """,
126
+ (max(1, int(limit)),),
127
+ )
128
+ for row in cur.fetchall():
129
+ rows.append(_from_json(str(row["payload_json"])))
130
+ return rows
131
+
132
+ def clear_training_jobs(self) -> int:
133
+ if not self.enabled:
134
+ return 0
135
+ with self._lock, self._connect() as conn:
136
+ cur = conn.execute("DELETE FROM training_jobs")
137
+ conn.commit()
138
+ return int(cur.rowcount or 0)
139
+
140
+ def delete_training_job(self, job_id: str) -> int:
141
+ if not self.enabled:
142
+ return 0
143
+ with self._lock, self._connect() as conn:
144
+ cur = conn.execute("DELETE FROM training_jobs WHERE job_id = ?", (str(job_id),))
145
+ conn.commit()
146
+ return int(cur.rowcount or 0)
147
+
148
+ # Simulation runs -------------------------------------------------------
149
+ def upsert_simulation_run(
150
+ self,
151
+ *,
152
+ run_id: str,
153
+ task_id: str,
154
+ agent_mode: str,
155
+ status: str,
156
+ payload: dict[str, Any],
157
+ ) -> None:
158
+ if not self.enabled:
159
+ return
160
+ now = _now()
161
+ created_at = float(payload.get("created_at") or now)
162
+ payload = dict(payload)
163
+ payload["run_id"] = run_id
164
+ payload["created_at"] = created_at
165
+ payload["updated_at"] = now
166
+ payload["task_id"] = task_id
167
+ payload["agent_mode"] = agent_mode
168
+ payload["status"] = status
169
+ with self._lock, self._connect() as conn:
170
+ conn.execute(
171
+ """
172
+ INSERT INTO simulation_runs (run_id, created_at, updated_at, task_id, agent_mode, status, payload_json)
173
+ VALUES (?, ?, ?, ?, ?, ?, ?)
174
+ ON CONFLICT(run_id) DO UPDATE SET
175
+ updated_at = excluded.updated_at,
176
+ task_id = excluded.task_id,
177
+ agent_mode = excluded.agent_mode,
178
+ status = excluded.status,
179
+ payload_json = excluded.payload_json
180
+ """,
181
+ (
182
+ run_id,
183
+ created_at,
184
+ now,
185
+ task_id,
186
+ agent_mode,
187
+ status,
188
+ _as_json(payload),
189
+ ),
190
+ )
191
+ conn.commit()
192
+
193
+ def list_simulation_runs(self, limit: int = 50) -> list[dict[str, Any]]:
194
+ if not self.enabled:
195
+ return []
196
+ out: list[dict[str, Any]] = []
197
+ with self._lock, self._connect() as conn:
198
+ cur = conn.execute(
199
+ """
200
+ SELECT payload_json FROM simulation_runs
201
+ ORDER BY updated_at DESC
202
+ LIMIT ?
203
+ """,
204
+ (max(1, int(limit)),),
205
+ )
206
+ for row in cur.fetchall():
207
+ data = _from_json(str(row["payload_json"]))
208
+ if isinstance(data.get("trace"), list):
209
+ data["trace_len"] = len(data["trace"])
210
+ data["has_trace"] = bool(data["trace"])
211
+ data.pop("trace", None)
212
+ out.append(data)
213
+ return out
214
+
215
+ def get_simulation_run(self, run_id: str) -> dict[str, Any] | None:
216
+ if not self.enabled:
217
+ return None
218
+ with self._lock, self._connect() as conn:
219
+ cur = conn.execute(
220
+ "SELECT payload_json FROM simulation_runs WHERE run_id = ?",
221
+ (run_id,),
222
+ )
223
+ row = cur.fetchone()
224
+ if row is None:
225
+ return None
226
+ return _from_json(str(row["payload_json"]))
227
+
228
+ def clear_simulation_runs(self) -> int:
229
+ if not self.enabled:
230
+ return 0
231
+ with self._lock, self._connect() as conn:
232
+ cur = conn.execute("DELETE FROM simulation_runs")
233
+ conn.commit()
234
+ return int(cur.rowcount or 0)
235
+
236
+ # Comparison runs -------------------------------------------------------
237
+ def create_comparison_run(self, payload: dict[str, Any]) -> str | None:
238
+ if not self.enabled:
239
+ return None
240
+ comparison_id = str(payload.get("comparison_id") or uuid4())
241
+ now = _now()
242
+ body = dict(payload)
243
+ body["comparison_id"] = comparison_id
244
+ body["created_at"] = float(body.get("created_at") or now)
245
+ body["updated_at"] = now
246
+ task_id = str(body.get("task_id") or "")
247
+ with self._lock, self._connect() as conn:
248
+ conn.execute(
249
+ """
250
+ INSERT INTO comparison_runs (comparison_id, created_at, updated_at, task_id, payload_json)
251
+ VALUES (?, ?, ?, ?, ?)
252
+ ON CONFLICT(comparison_id) DO UPDATE SET
253
+ updated_at = excluded.updated_at,
254
+ task_id = excluded.task_id,
255
+ payload_json = excluded.payload_json
256
+ """,
257
+ (
258
+ comparison_id,
259
+ float(body["created_at"]),
260
+ now,
261
+ task_id,
262
+ _as_json(body),
263
+ ),
264
+ )
265
+ conn.commit()
266
+ return comparison_id
267
+
268
+ def list_comparison_runs(self, limit: int = 50) -> list[dict[str, Any]]:
269
+ if not self.enabled:
270
+ return []
271
+ out: list[dict[str, Any]] = []
272
+ with self._lock, self._connect() as conn:
273
+ cur = conn.execute(
274
+ """
275
+ SELECT payload_json FROM comparison_runs
276
+ ORDER BY updated_at DESC
277
+ LIMIT ?
278
+ """,
279
+ (max(1, int(limit)),),
280
+ )
281
+ for row in cur.fetchall():
282
+ out.append(_from_json(str(row["payload_json"])))
283
+ return out
284
+
285
+ def get_comparison_run(self, comparison_id: str) -> dict[str, Any] | None:
286
+ if not self.enabled:
287
+ return None
288
+ with self._lock, self._connect() as conn:
289
+ cur = conn.execute(
290
+ "SELECT payload_json FROM comparison_runs WHERE comparison_id = ?",
291
+ (comparison_id,),
292
+ )
293
+ row = cur.fetchone()
294
+ if row is None:
295
+ return None
296
+ return _from_json(str(row["payload_json"]))
297
+
298
+ def clear_comparison_runs(self) -> int:
299
+ if not self.enabled:
300
+ return 0
301
+ with self._lock, self._connect() as conn:
302
+ cur = conn.execute("DELETE FROM comparison_runs")
303
+ conn.commit()
304
+ return int(cur.rowcount or 0)
app/reward.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ reward.py — Gov Workflow OpenEnv Phase 4: Dense Reward Shaping
3
+
4
+ Formula (per step):
5
+ R_t = progress_reward + completion_reward + recovery_reward + stability_bonus
6
+ - waiting_penalty - sla_penalty - fairness_penalty
7
+ - invalid_action_penalty - idle_capacity_penalty - oscillation_penalty
8
+
9
+ All coefficients are named constants — never magic numbers inline.
10
+ """
11
+ from __future__ import annotations
12
+ from app.models import RewardModel
13
+
14
+ # ── Positive coefficients ─────────────────────────────────────────
15
+ COEFF_PROGRESS = 0.7 # per stage advance
16
+ COEFF_COMPLETION = 4.0 # per completed case
17
+ COEFF_RECOVERY = 1.5 # per unblocked missing-doc case resolved
18
+ COEFF_STABILITY = 0.1 # per step with zero SLA breaches and zero invalid actions
19
+
20
+ # ── Negative coefficients ─────────────────────────────────────────
21
+ COEFF_WAITING = 0.04 # per case per day in backlog
22
+ COEFF_SLA = 1.5 # per new SLA breach
23
+ COEFF_FAIRNESS = 2.0 # per unit of fairness excess above threshold
24
+ COEFF_INVALID = 1.5 # flat penalty per invalid action
25
+ COEFF_IDLE = 0.05 # per idle officer-day
26
+ COEFF_OSCILLATION = 0.15 # per oscillation event (repeated contradictory actions)
27
+
28
+ # ── Fairness default tolerance (when no threshold set by task) ────
29
+ DEFAULT_FAIRNESS_TOLERANCE = 0.40
30
+
31
+
32
+ def compute_reward(
33
+ *,
34
+ stage_advances: int,
35
+ completions: int,
36
+ active_backlog: int,
37
+ new_sla_breaches: int,
38
+ fairness_gap: float,
39
+ fairness_threshold: float | None,
40
+ invalid_action: bool,
41
+ idle_capacity: int,
42
+ newly_unblocked_docs: int = 0,
43
+ oscillation_detected: bool = False,
44
+ award_stability_bonus: bool = True,
45
+ ) -> RewardModel:
46
+ """
47
+ Compute one-step dense reward.
48
+
49
+ Args:
50
+ stage_advances: Number of applications that moved forward one stage today.
51
+ completions: Number of applications fully completed today.
52
+ active_backlog: Total cases still pending (creates waiting pressure).
53
+ new_sla_breaches: New SLA deadline violations this step.
54
+ fairness_gap: Cross-service completion fairness gap [0.0, 1.0].
55
+ fairness_threshold: Task-defined acceptable fairness gap (or None → default).
56
+ invalid_action: Whether the submitted action was invalid.
57
+ idle_capacity: Officer-days wasted idle while backlog exists.
58
+ newly_unblocked_docs: Cases unblocked after missing-doc resolution (positive signal).
59
+ oscillation_detected: True if agent is rapidly reversing recent decisions.
60
+
61
+ Returns:
62
+ RewardModel with all components filled and total_reward as the scalar.
63
+ """
64
+ # ── Positive components ───────────────────────────────────────
65
+ progress_reward = COEFF_PROGRESS * stage_advances
66
+ completion_reward = COEFF_COMPLETION * completions
67
+ recovery_reward = COEFF_RECOVERY * newly_unblocked_docs
68
+ stability_bonus = (
69
+ COEFF_STABILITY
70
+ if (award_stability_bonus and new_sla_breaches == 0 and not invalid_action)
71
+ else 0.0
72
+ )
73
+
74
+ # ── Negative components ───────────────────────────────────────
75
+ waiting_penalty = COEFF_WAITING * active_backlog
76
+
77
+ sla_penalty = COEFF_SLA * new_sla_breaches
78
+
79
+ tolerance = fairness_threshold if fairness_threshold is not None else DEFAULT_FAIRNESS_TOLERANCE
80
+ unfairness_excess = max(0.0, fairness_gap - tolerance)
81
+ fairness_penalty = COEFF_FAIRNESS * unfairness_excess
82
+
83
+ invalid_action_penalty = COEFF_INVALID if invalid_action else 0.0
84
+
85
+ idle_capacity_penalty = COEFF_IDLE * idle_capacity
86
+
87
+ oscillation_penalty = COEFF_OSCILLATION if oscillation_detected else 0.0
88
+
89
+ # ── Total ─────────────────────────────────────────────────────
90
+ total_reward = (
91
+ progress_reward + completion_reward + recovery_reward + stability_bonus
92
+ - waiting_penalty - sla_penalty - fairness_penalty
93
+ - invalid_action_penalty - idle_capacity_penalty - oscillation_penalty
94
+ )
95
+
96
+ return RewardModel(
97
+ total_reward=round(total_reward, 4),
98
+ progress_reward=round(progress_reward, 4),
99
+ completion_reward=round(completion_reward, 4),
100
+ recovery_reward=round(recovery_reward, 4),
101
+ stability_bonus=round(stability_bonus, 4),
102
+ waiting_penalty=round(-waiting_penalty, 4),
103
+ sla_penalty=round(-sla_penalty, 4),
104
+ fairness_penalty=round(-fairness_penalty, 4),
105
+ invalid_action_penalty=round(-invalid_action_penalty, 4),
106
+ idle_capacity_penalty=round(-idle_capacity_penalty, 4),
107
+ oscillation_penalty=round(-oscillation_penalty, 4),
108
+ )
app/sector_profiles.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ sector_profiles.py — Phase 2 update: enrichment type, probability, delay range per service.
3
+ """
4
+
5
+ from app.models import (
6
+ DocEnrichmentType, SectorProfile, ServiceType, UrgencyProfile
7
+ )
8
+
9
+ INCOME_CERTIFICATE_PROFILE = SectorProfile(
10
+ service_type=ServiceType.INCOME_CERTIFICATE,
11
+ sector_name="Revenue Sector — Income Certificate",
12
+ missing_docs_probability=0.45,
13
+ doc_defect_rate_digital=0.30,
14
+ doc_defect_rate_paper=0.65,
15
+ field_verification_probability=0.30,
16
+ manual_scrutiny_intensity=0.60,
17
+ decision_backlog_sensitivity=0.70,
18
+ system_dependency_risk=0.20,
19
+ sla_days=21,
20
+ urgency_profile=UrgencyProfile.MODERATE,
21
+ base_processing_rate=8.0,
22
+ field_verification_days=3,
23
+ doc_enrichment_type=DocEnrichmentType.NONE,
24
+ doc_enrichment_probability=0.0,
25
+ doc_enrichment_delay_days_min=1,
26
+ doc_enrichment_delay_days_max=2,
27
+ )
28
+
29
+ LAND_REGISTRATION_PROFILE = SectorProfile(
30
+ service_type=ServiceType.LAND_REGISTRATION,
31
+ sector_name="Land Sector — 7/12 Mutation",
32
+ missing_docs_probability=0.35,
33
+ doc_defect_rate_digital=0.25,
34
+ doc_defect_rate_paper=0.55,
35
+ field_verification_probability=0.65,
36
+ manual_scrutiny_intensity=0.75,
37
+ decision_backlog_sensitivity=0.85,
38
+ system_dependency_risk=0.55,
39
+ sla_days=30,
40
+ urgency_profile=UrgencyProfile.LOW_BUT_STICKY,
41
+ base_processing_rate=4.0,
42
+ field_verification_days=5,
43
+ doc_enrichment_type=DocEnrichmentType.PAST_LAND_RECORDS,
44
+ doc_enrichment_probability=0.70,
45
+ doc_enrichment_delay_days_min=2,
46
+ doc_enrichment_delay_days_max=5, # REVENUE_DB_DELAY event adds 1-2 more
47
+ )
48
+
49
+ CASTE_CERTIFICATE_PROFILE = SectorProfile(
50
+ service_type=ServiceType.CASTE_CERTIFICATE,
51
+ sector_name="Revenue Sector — Caste Certificate",
52
+ missing_docs_probability=0.40,
53
+ doc_defect_rate_digital=0.25,
54
+ doc_defect_rate_paper=0.60,
55
+ field_verification_probability=0.35,
56
+ manual_scrutiny_intensity=0.65,
57
+ decision_backlog_sensitivity=0.65,
58
+ system_dependency_risk=0.25,
59
+ sla_days=21,
60
+ urgency_profile=UrgencyProfile.MODERATE,
61
+ base_processing_rate=7.0,
62
+ field_verification_days=3,
63
+ doc_enrichment_type=DocEnrichmentType.FAMILY_CASTE_HISTORY,
64
+ doc_enrichment_probability=0.55,
65
+ doc_enrichment_delay_days_min=2,
66
+ doc_enrichment_delay_days_max=4,
67
+ )
68
+
69
+ BIRTH_CERTIFICATE_PROFILE = SectorProfile(
70
+ service_type=ServiceType.BIRTH_CERTIFICATE,
71
+ sector_name="Municipal Sector — Birth Certificate",
72
+ missing_docs_probability=0.20,
73
+ doc_defect_rate_digital=0.15,
74
+ doc_defect_rate_paper=0.35,
75
+ field_verification_probability=0.05,
76
+ manual_scrutiny_intensity=0.30,
77
+ decision_backlog_sensitivity=0.40,
78
+ system_dependency_risk=0.30,
79
+ sla_days=7,
80
+ urgency_profile=UrgencyProfile.HIGH,
81
+ base_processing_rate=15.0,
82
+ field_verification_days=1,
83
+ doc_enrichment_type=DocEnrichmentType.NONE,
84
+ doc_enrichment_probability=0.0,
85
+ doc_enrichment_delay_days_min=1,
86
+ doc_enrichment_delay_days_max=1,
87
+ )
88
+
89
+ PASSPORT_PROFILE = SectorProfile(
90
+ service_type=ServiceType.PASSPORT,
91
+ sector_name="National Sector — Passport",
92
+ missing_docs_probability=0.25,
93
+ doc_defect_rate_digital=0.20,
94
+ doc_defect_rate_paper=0.50,
95
+ field_verification_probability=0.90,
96
+ manual_scrutiny_intensity=0.80,
97
+ decision_backlog_sensitivity=0.75,
98
+ system_dependency_risk=0.35,
99
+ sla_days=30,
100
+ urgency_profile=UrgencyProfile.HIGH,
101
+ base_processing_rate=5.0,
102
+ field_verification_days=14,
103
+ doc_enrichment_type=DocEnrichmentType.POLICE_VERIFICATION,
104
+ doc_enrichment_probability=0.85,
105
+ doc_enrichment_delay_days_min=7,
106
+ doc_enrichment_delay_days_max=14,
107
+ )
108
+
109
+ GST_REGISTRATION_PROFILE = SectorProfile(
110
+ service_type=ServiceType.GST_REGISTRATION,
111
+ sector_name="Tax Sector — GST Registration",
112
+ missing_docs_probability=0.30,
113
+ doc_defect_rate_digital=0.20,
114
+ doc_defect_rate_paper=0.50,
115
+ field_verification_probability=0.20,
116
+ manual_scrutiny_intensity=0.55,
117
+ decision_backlog_sensitivity=0.60,
118
+ system_dependency_risk=0.45,
119
+ sla_days=7,
120
+ urgency_profile=UrgencyProfile.HIGH,
121
+ base_processing_rate=10.0,
122
+ field_verification_days=2,
123
+ doc_enrichment_type=DocEnrichmentType.TAX_RECORD_CROSS_CHECK,
124
+ doc_enrichment_probability=0.50,
125
+ doc_enrichment_delay_days_min=1,
126
+ doc_enrichment_delay_days_max=3,
127
+ )
128
+
129
+ DRIVING_LICENSE_PROFILE = SectorProfile(
130
+ service_type=ServiceType.DRIVING_LICENSE,
131
+ sector_name="Transport Sector — Driving License",
132
+ missing_docs_probability=0.28,
133
+ doc_defect_rate_digital=0.18,
134
+ doc_defect_rate_paper=0.45,
135
+ field_verification_probability=0.40,
136
+ manual_scrutiny_intensity=0.50,
137
+ decision_backlog_sensitivity=0.55,
138
+ system_dependency_risk=0.30,
139
+ sla_days=14,
140
+ urgency_profile=UrgencyProfile.MODERATE,
141
+ base_processing_rate=12.0,
142
+ field_verification_days=2,
143
+ doc_enrichment_type=DocEnrichmentType.NONE,
144
+ doc_enrichment_probability=0.0,
145
+ doc_enrichment_delay_days_min=1,
146
+ doc_enrichment_delay_days_max=1,
147
+ )
148
+
149
+ AADHAAR_CARD_PROFILE = SectorProfile(
150
+ service_type=ServiceType.AADHAAR_CARD,
151
+ sector_name="National Identity Sector - Aadhaar Card",
152
+ missing_docs_probability=0.22,
153
+ doc_defect_rate_digital=0.12,
154
+ doc_defect_rate_paper=0.30,
155
+ field_verification_probability=0.18,
156
+ manual_scrutiny_intensity=0.42,
157
+ decision_backlog_sensitivity=0.50,
158
+ system_dependency_risk=0.38,
159
+ sla_days=10,
160
+ urgency_profile=UrgencyProfile.HIGH,
161
+ base_processing_rate=13.0,
162
+ field_verification_days=2,
163
+ doc_enrichment_type=DocEnrichmentType.NONE,
164
+ doc_enrichment_probability=0.0,
165
+ doc_enrichment_delay_days_min=1,
166
+ doc_enrichment_delay_days_max=2,
167
+ )
168
+
169
+ SECTOR_REGISTRY: dict = {
170
+ ServiceType.INCOME_CERTIFICATE: INCOME_CERTIFICATE_PROFILE,
171
+ ServiceType.LAND_REGISTRATION: LAND_REGISTRATION_PROFILE,
172
+ ServiceType.CASTE_CERTIFICATE: CASTE_CERTIFICATE_PROFILE,
173
+ ServiceType.BIRTH_CERTIFICATE: BIRTH_CERTIFICATE_PROFILE,
174
+ ServiceType.PASSPORT: PASSPORT_PROFILE,
175
+ ServiceType.GST_REGISTRATION: GST_REGISTRATION_PROFILE,
176
+ ServiceType.DRIVING_LICENSE: DRIVING_LICENSE_PROFILE,
177
+ ServiceType.AADHAAR_CARD: AADHAAR_CARD_PROFILE,
178
+ }
179
+
180
+ def get_sector_profile(service_type: ServiceType) -> SectorProfile:
181
+ if service_type not in SECTOR_REGISTRY:
182
+ raise KeyError(f"No SectorProfile for {service_type}")
183
+ return SECTOR_REGISTRY[service_type]
app/signal_computer.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ signal_computer.py — Gov Workflow OpenEnv v2.0
3
+ Computes normalized compressed state signals for observations.
4
+ All signals are deterministic and normalized to [0.0, 1.0].
5
+ """
6
+ from typing import Dict
7
+ from app.models import QueueSnapshot, OfficerPool
8
+
9
+
10
+ class ComputedSignals:
11
+ def __init__(self):
12
+ self.backlog_pressure: float = 0.0
13
+ self.sla_risk_score: float = 0.0
14
+ self.fairness_index: float = 1.0
15
+ self.resource_utilization: float = 0.0
16
+ self.digital_intake_ratio: float = 0.5
17
+ self.blocked_cases_missing_docs: int = 0
18
+ self.blocked_cases_enrichment: int = 0
19
+ self.field_verification_load: float = 0.0
20
+
21
+
22
+ class SignalComputer:
23
+ def compute(
24
+ self,
25
+ queue_snapshots: Dict[str, QueueSnapshot],
26
+ officer_pool: OfficerPool,
27
+ todays_arrivals: int = 0,
28
+ digital_arrivals: int = 0,
29
+ capacity_per_day: float = 1.0,
30
+ ) -> ComputedSignals:
31
+ signals = ComputedSignals()
32
+ snapshots = list(queue_snapshots.values())
33
+ if not snapshots:
34
+ return signals
35
+
36
+ total_pending = sum(s.total_pending for s in snapshots)
37
+
38
+ # Backlog pressure
39
+ capacity_ceiling = max(1.0, capacity_per_day * 5.0)
40
+ signals.backlog_pressure = min(1.0, total_pending / capacity_ceiling)
41
+
42
+ # SLA risk score (weighted average)
43
+ total_nonzero = max(1, total_pending)
44
+ signals.sla_risk_score = min(1.0, max(0.0,
45
+ sum(s.current_sla_risk * s.total_pending for s in snapshots) / total_nonzero
46
+ ))
47
+
48
+ # Fairness index (1 - coefficient of variation of completion rates)
49
+ if len(snapshots) < 2:
50
+ signals.fairness_index = 1.0
51
+ else:
52
+ rates = []
53
+ for s in snapshots:
54
+ total = s.total_pending + s.total_completed_today
55
+ rates.append(s.total_completed_today / max(1, total) if total > 0 else 0.0)
56
+ mean = sum(rates) / len(rates)
57
+ if mean > 0:
58
+ variance = sum((r - mean) ** 2 for r in rates) / len(rates)
59
+ cv = (variance ** 0.5) / mean
60
+ signals.fairness_index = max(0.0, 1.0 - min(1.0, cv))
61
+ else:
62
+ signals.fairness_index = 1.0
63
+
64
+ # Resource utilization
65
+ allocated = sum(officer_pool.allocated.values())
66
+ signals.resource_utilization = min(1.0, allocated / max(1, officer_pool.available_officers))
67
+
68
+ # Digital intake ratio
69
+ signals.digital_intake_ratio = (
70
+ min(1.0, digital_arrivals / todays_arrivals) if todays_arrivals > 0 else 0.5
71
+ )
72
+
73
+ # Blocked cases
74
+ signals.blocked_cases_missing_docs = sum(s.blocked_missing_docs for s in snapshots)
75
+ signals.blocked_cases_enrichment = sum(s.blocked_enrichment for s in snapshots)
76
+
77
+ # Field verification load
78
+ total_in_field = sum(s.field_verification_pending for s in snapshots)
79
+ signals.field_verification_load = total_in_field / total_nonzero if total_nonzero > 0 else 0.0
80
+
81
+ return signals
app/simulator.py ADDED
@@ -0,0 +1,1106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import random
6
+ import re
7
+ from dataclasses import dataclass
8
+ from typing import Any, Literal
9
+
10
+ from openai import OpenAI
11
+
12
+ from app.baselines import POLICIES, backlog_clearance_policy
13
+ from app.env import GovWorkflowEnv
14
+ from app.graders import grade_episode
15
+ from app.models import ActionModel, ActionType, ObservationModel, PriorityMode, ServiceType
16
+ from app.engine import DayResult, DaySimulator
17
+
18
+ from enum import Enum
19
+ SimulationAgentMode = Literal["baseline_policy", "llm_inference", "trained_rl"]
20
+
21
+ class SimulationAgentModeEnum(str, Enum):
22
+ baseline_policy = "baseline_policy"
23
+ llm_inference = "llm_inference"
24
+ trained_rl = "trained_rl"
25
+
26
+ SimulationAgentMode = SimulationAgentModeEnum
27
+
28
+
29
+ LEGACY_NVIDIA_MODEL_POOL = [
30
+ "meta/llama-3.3-70b-instruct",
31
+ "qwen/qwen3-next-80b-a3b-instruct",
32
+ "moonshotai/kimi-k2-instruct-0905",
33
+ "meta/llama-3.1-405b-instruct",
34
+ "deepseek-ai/deepseek-v3.2",
35
+ "qwen/qwq-32b",
36
+ "mistralai/mixtral-8x22b-instruct-v0.1",
37
+ "google/gemma-3-27b-it",
38
+ "microsoft/phi-4-mini-instruct",
39
+ "meta/llama-3.1-8b-instruct",
40
+ ]
41
+
42
+
43
+ @dataclass
44
+ class SimulationRun:
45
+ task_id: str
46
+ agent_mode: SimulationAgentMode
47
+ seed: int
48
+ total_reward: float
49
+ score: float
50
+ grader_name: str
51
+ summary: dict[str, Any]
52
+ trace: list[dict[str, Any]]
53
+
54
+
55
+ def _dedupe(values: list[str | None]) -> list[str]:
56
+ out: list[str] = []
57
+ for value in values:
58
+ if value is None:
59
+ continue
60
+ v = value.strip()
61
+ if v and v not in out:
62
+ out.append(v)
63
+ return out
64
+
65
+
66
+ def _env_csv_list(name: str) -> list[str]:
67
+ raw = os.getenv(name, "").strip()
68
+ if not raw:
69
+ return []
70
+ return [x.strip() for x in raw.split(",") if x.strip()]
71
+
72
+
73
+ def _extract_json_object(text: str) -> dict[str, Any] | None:
74
+ text = (text or "").strip()
75
+ if not text:
76
+ return None
77
+ try:
78
+ parsed = json.loads(text)
79
+ if isinstance(parsed, dict):
80
+ return parsed
81
+ except json.JSONDecodeError:
82
+ pass
83
+
84
+ match = re.search(r"\{.*\}", text, flags=re.DOTALL)
85
+ if not match:
86
+ return None
87
+ try:
88
+ parsed = json.loads(match.group(0))
89
+ except json.JSONDecodeError:
90
+ return None
91
+ return parsed if isinstance(parsed, dict) else None
92
+
93
+
94
+ def _coerce_action(payload: dict[str, Any] | None) -> ActionModel:
95
+ if not payload:
96
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
97
+ try:
98
+ # Remap legacy Phase 1 field names to Phase 2
99
+ remapped = dict(payload)
100
+ if "service" in remapped and "service_target" not in remapped:
101
+ remapped["service_target"] = remapped.pop("service")
102
+ if "target_service" in remapped:
103
+ src = remapped.pop("service_target", None)
104
+ tgt = remapped.pop("target_service", None)
105
+ delta = remapped.pop("officer_delta", 1)
106
+ remapped["reallocation_delta"] = {
107
+ (src.value if hasattr(src, 'value') else str(src)): -int(delta),
108
+ (tgt.value if hasattr(tgt, 'value') else str(tgt)): int(delta),
109
+ } if src and tgt else None
110
+ if "officer_delta" in remapped and "capacity_assignment" not in remapped:
111
+ svc = remapped.get("service_target")
112
+ if svc:
113
+ svc_key = svc.value if hasattr(svc, 'value') else str(svc)
114
+ remapped["capacity_assignment"] = {svc_key: int(remapped.pop("officer_delta"))}
115
+ else:
116
+ remapped.pop("officer_delta", None)
117
+ if "case_id" in remapped:
118
+ remapped.pop("case_id", None)
119
+ return ActionModel(**remapped)
120
+ except Exception:
121
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
122
+
123
+
124
+ def _queue_rows(obs: ObservationModel) -> list[dict[str, Any]]:
125
+ return [
126
+ {
127
+ "service": q.service_type.value,
128
+ "active_cases": q.total_pending,
129
+ "missing_docs_cases": q.blocked_missing_docs,
130
+ "urgent_cases": q.urgent_pending,
131
+ "breached_cases": q.total_sla_breached,
132
+ "avg_age_days": q.avg_waiting_days,
133
+ }
134
+ for q in obs.queue_snapshots.values()
135
+ ]
136
+
137
+
138
+ def _recommended_min_steps(task_id: str) -> int:
139
+ if task_id == "cross_department_hard":
140
+ return 70
141
+ if task_id == "mixed_urgency_medium":
142
+ return 60
143
+ return 40
144
+
145
+
146
+ def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
147
+ pool = obs.officer_pool
148
+ # Phase 2 uses 'allocated'; Phase 1 used 'allocations'
149
+ alloc_dict = getattr(pool, "allocated", None) or getattr(pool, "allocations", {})
150
+ raw = alloc_dict.get(service)
151
+ if raw is None:
152
+ raw = alloc_dict.get(service.value if hasattr(service, 'value') else str(service), 0)
153
+ return int(raw or 0)
154
+
155
+
156
+ def _top_backlog_service(
157
+ obs: ObservationModel,
158
+ *,
159
+ exclude: ServiceType | None = None,
160
+ ) -> ServiceType | None:
161
+ qs = obs.queue_snapshots
162
+ snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
163
+ ranked = [q for q in snapshots if getattr(q, 'service_type', getattr(q, 'service', None)) != exclude]
164
+ if not ranked:
165
+ return None
166
+ ranked.sort(
167
+ key=lambda q: (
168
+ getattr(q, 'total_pending', getattr(q, 'active_cases', 0))
169
+ + 2 * getattr(q, 'total_sla_breached', getattr(q, 'breached_cases', 0))
170
+ + getattr(q, 'urgent_pending', getattr(q, 'urgent_cases', 0)),
171
+ getattr(q, 'avg_waiting_days', getattr(q, 'avg_age_days', 0)),
172
+ ),
173
+ reverse=True,
174
+ )
175
+ return getattr(ranked[0], 'service_type', getattr(ranked[0], 'service', None))
176
+
177
+
178
+ def _service_with_missing_docs(obs: ObservationModel) -> ServiceType | None:
179
+ qs = obs.queue_snapshots
180
+ snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
181
+ candidates = [
182
+ q for q in snapshots
183
+ if getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)) > 0
184
+ ]
185
+ if not candidates:
186
+ return None
187
+ candidates.sort(
188
+ key=lambda q: (
189
+ getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)),
190
+ getattr(q, 'total_pending', getattr(q, 'active_cases', 0)),
191
+ ),
192
+ reverse=True,
193
+ )
194
+ return getattr(candidates[0], 'service_type', getattr(candidates[0], 'service', None))
195
+
196
+
197
+ def _service_with_officers(obs: ObservationModel) -> ServiceType | None:
198
+ qs = obs.queue_snapshots
199
+ snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
200
+ services = [
201
+ getattr(q, 'service_type', getattr(q, 'service', None))
202
+ for q in snapshots
203
+ ]
204
+ services.sort(key=lambda s: _alloc_for(obs, s), reverse=True)
205
+ for service in services:
206
+ if service and _alloc_for(obs, service) > 0:
207
+ return service
208
+ return None
209
+
210
+
211
+ def _compute_action_mask(obs: ObservationModel) -> dict[ActionType, bool]:
212
+ pool = obs.officer_pool
213
+ has_reserve = int(getattr(pool, 'idle_officers', getattr(pool, 'reserve_officers', 0))) > 0
214
+ qs = obs.queue_snapshots
215
+ snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
216
+ has_missing = any(
217
+ getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)) > 0
218
+ for q in snapshots
219
+ )
220
+ has_backlog = any(
221
+ getattr(q, 'total_pending', getattr(q, 'active_cases', 0)) > 0
222
+ for q in snapshots
223
+ )
224
+ has_budget = int(obs.escalation_budget_remaining) > 0
225
+ staffed_services = [
226
+ getattr(q, 'service_type', getattr(q, 'service', None))
227
+ for q in snapshots
228
+ if _alloc_for(obs, getattr(q, 'service_type', getattr(q, 'service', None))) > 0
229
+ ]
230
+ can_reallocate = len(staffed_services) >= 1 and len(snapshots) >= 2
231
+ return {
232
+ ActionType.SET_PRIORITY_MODE: True,
233
+ ActionType.ADVANCE_TIME: True,
234
+ ActionType.ASSIGN_CAPACITY: has_reserve and has_backlog,
235
+ ActionType.REQUEST_MISSING_DOCUMENTS: has_missing,
236
+ ActionType.ESCALATE_SERVICE: has_budget and has_backlog,
237
+ ActionType.REALLOCATE_OFFICERS: can_reallocate,
238
+ }
239
+
240
+
241
+ def _masked_action_type_hints(obs: ObservationModel) -> tuple[list[str], list[str]]:
242
+ mask = _compute_action_mask(obs)
243
+ allowed = [k.value for k, ok in mask.items() if ok]
244
+ blocked = [k.value for k, ok in mask.items() if not ok]
245
+ return allowed, blocked
246
+
247
+
248
+ def _best_high_impact_action(obs: ObservationModel) -> tuple[ActionModel, str]:
249
+ top_backlog = _top_backlog_service(obs)
250
+ top_missing = _service_with_missing_docs(obs)
251
+
252
+ if int(obs.officer_pool.idle_officers) > 0 and top_backlog is not None:
253
+ return (
254
+ ActionModel(action_type=ActionType.ASSIGN_CAPACITY, service=top_backlog, officer_delta=1),
255
+ "high-impact: assign reserve capacity to top backlog service",
256
+ )
257
+
258
+ if top_missing is not None:
259
+ return (
260
+ ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service=top_missing),
261
+ "high-impact: clear missing-document bottleneck",
262
+ )
263
+
264
+ if int(obs.escalation_budget_remaining) > 0:
265
+ qs = obs.queue_snapshots
266
+ snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
267
+ hot = sorted(
268
+ snapshots,
269
+ key=lambda q: (
270
+ getattr(q, 'total_sla_breached', getattr(q, 'breached_cases', 0)),
271
+ getattr(q, 'total_pending', getattr(q, 'active_cases', 0)),
272
+ getattr(q, 'urgent_pending', getattr(q, 'urgent_cases', 0)),
273
+ ),
274
+ reverse=True,
275
+ )
276
+ if hot and (
277
+ getattr(hot[0], 'total_sla_breached', getattr(hot[0], 'breached_cases', 0)) > 0
278
+ or getattr(hot[0], 'total_pending', getattr(hot[0], 'active_cases', 0)) > 0
279
+ ):
280
+ svc = getattr(hot[0], 'service_type', getattr(hot[0], 'service', None))
281
+ return (
282
+ ActionModel(action_type=ActionType.ESCALATE_SERVICE, escalation_target=svc),
283
+ "high-impact: escalate highest SLA-risk service",
284
+ )
285
+
286
+ source = _service_with_officers(obs)
287
+ if source is not None and _alloc_for(obs, source) > 0:
288
+ target = _top_backlog_service(obs, exclude=source)
289
+ if target is not None and target != source:
290
+ return (
291
+ ActionModel(
292
+ action_type=ActionType.REALLOCATE_OFFICERS,
293
+ service_target=source,
294
+ reallocation_delta={source.value: -1, target.value: 1},
295
+ ),
296
+ "high-impact: reallocate one officer toward highest backlog",
297
+ )
298
+
299
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), "fallback: no high-impact action available"
300
+
301
+
302
+ def _repair_action_for_observation(
303
+ action: ActionModel,
304
+ obs: ObservationModel,
305
+ ) -> tuple[ActionModel, str | None]:
306
+ mask = _compute_action_mask(obs)
307
+ at = action.action_type
308
+
309
+ if not bool(mask.get(at, True)):
310
+ fallback, why = _best_high_impact_action(obs)
311
+ return fallback, f"masked {at.value}; {why}"
312
+
313
+ if at == ActionType.ADVANCE_TIME:
314
+ return action, None
315
+
316
+ if at == ActionType.SET_PRIORITY_MODE:
317
+ if action.priority_mode is None:
318
+ return (
319
+ ActionModel(action_type=ActionType.SET_PRIORITY_MODE, priority_mode=PriorityMode.BACKLOG_CLEARANCE),
320
+ "missing priority_mode, defaulted to backlog_clearance",
321
+ )
322
+ return action, None
323
+
324
+ if at == ActionType.ASSIGN_CAPACITY:
325
+ pool = obs.officer_pool
326
+ reserve = int(getattr(pool, 'idle_officers', getattr(pool, 'reserve_officers', 0)))
327
+ if reserve <= 0:
328
+ fallback, why = _best_high_impact_action(obs)
329
+ return fallback, f"reserve officers exhausted; {why}"
330
+ service = getattr(action, 'service_target', None) or getattr(action, 'service', None) or _top_backlog_service(obs)
331
+ if service is None:
332
+ fallback, why = _best_high_impact_action(obs)
333
+ return fallback, f"no service available for assign_capacity; {why}"
334
+ cap = action.capacity_assignment or {}
335
+ delta = cap.get(service.value, cap.get(str(service), 1))
336
+ delta = max(1, min(int(delta), reserve))
337
+ repaired = ActionModel(
338
+ action_type=ActionType.ASSIGN_CAPACITY,
339
+ service_target=service,
340
+ capacity_assignment={service.value: delta},
341
+ )
342
+ note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired assign_capacity payload"
343
+ return repaired, note
344
+
345
+ if at == ActionType.REQUEST_MISSING_DOCUMENTS:
346
+ service = getattr(action, 'service_target', None) or getattr(action, 'service', None) or _service_with_missing_docs(obs)
347
+ if service is None:
348
+ fallback, why = _best_high_impact_action(obs)
349
+ return fallback, f"no missing-doc queue available; {why}"
350
+ repaired = ActionModel(
351
+ action_type=ActionType.REQUEST_MISSING_DOCUMENTS,
352
+ service_target=service,
353
+ )
354
+ note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired request_missing_documents payload"
355
+ return repaired, note
356
+
357
+ if at == ActionType.ESCALATE_SERVICE:
358
+ if int(obs.escalation_budget_remaining) <= 0:
359
+ fallback, why = _best_high_impact_action(obs)
360
+ return fallback, f"escalation budget exhausted; {why}"
361
+ service = (
362
+ getattr(action, 'escalation_target', None)
363
+ or getattr(action, 'service_target', None)
364
+ or getattr(action, 'service', None)
365
+ or _top_backlog_service(obs)
366
+ )
367
+ if service is None:
368
+ fallback, why = _best_high_impact_action(obs)
369
+ return fallback, f"no escalation target available; {why}"
370
+ repaired = ActionModel(
371
+ action_type=ActionType.ESCALATE_SERVICE,
372
+ escalation_target=service,
373
+ )
374
+ note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired escalate_service payload"
375
+ return repaired, note
376
+
377
+ if at == ActionType.REALLOCATE_OFFICERS:
378
+ source = (
379
+ getattr(action, 'service_target', None)
380
+ or getattr(action, 'service', None)
381
+ or _service_with_officers(obs)
382
+ )
383
+ if source is None:
384
+ fallback, why = _best_high_impact_action(obs)
385
+ return fallback, f"no staffed source service; {why}"
386
+ source_alloc = _alloc_for(obs, source)
387
+ if source_alloc <= 0:
388
+ source = _service_with_officers(obs)
389
+ source_alloc = _alloc_for(obs, source) if source is not None else 0
390
+ if source is None or source_alloc <= 0:
391
+ fallback, why = _best_high_impact_action(obs)
392
+ return fallback, f"insufficient source officers; {why}"
393
+
394
+ # Phase 2: target comes from reallocation_delta; Phase 1 from target_service
395
+ rdelta = action.reallocation_delta or {}
396
+ target = None
397
+ for k, v in rdelta.items():
398
+ if v > 0:
399
+ try:
400
+ target = ServiceType(k)
401
+ except Exception:
402
+ pass
403
+ break
404
+ if target is None:
405
+ target = getattr(action, 'target_service', None)
406
+ if target is None or target == source:
407
+ target = _top_backlog_service(obs, exclude=source)
408
+ if target is None or target == source:
409
+ fallback, why = _best_high_impact_action(obs)
410
+ return fallback, f"missing distinct target_service; {why}"
411
+
412
+ delta = max(1, min(abs(rdelta.get(source.value, 1)), source_alloc))
413
+ repaired = ActionModel(
414
+ action_type=ActionType.REALLOCATE_OFFICERS,
415
+ service_target=source,
416
+ reallocation_delta={source.value: -delta, target.value: delta},
417
+ )
418
+ note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired reallocate_officers payload"
419
+ return repaired, note
420
+
421
+ return action, None
422
+
423
+ """
424
+ The high-level simulation orchestration now lives in app.engine.
425
+ This module re-exports the public runtime API so existing imports
426
+ from app.simulator continue to work unchanged.
427
+ """
428
+
429
+ def _model_label_for_mode(agent_mode: SimulationAgentMode) -> str:
430
+ if agent_mode == "baseline_policy":
431
+ return "baseline_policy"
432
+ if agent_mode == "trained_rl":
433
+ return "trained_rl"
434
+ return os.getenv("MODEL_NAME", "llm_inference")
435
+
436
+
437
+ def _log_step_line(step_row: dict[str, Any]) -> str:
438
+ done = "true" if bool(step_row.get("done")) else "false"
439
+ error = step_row.get("last_action_error") or "null"
440
+ action = json.dumps(step_row.get("action_payload", {}), separators=(",", ":"))
441
+ source = step_row.get("decision_source") or "unknown"
442
+ model = step_row.get("model_used") or "null"
443
+ repair = step_row.get("repair_note") or "null"
444
+ switch_note = step_row.get("switch_note") or "null"
445
+ return (
446
+ f"[STEP] step={step_row.get('step', 0)} action={action} "
447
+ f"reward={float(step_row.get('reward', 0.0)):.2f} done={done} "
448
+ f"error={error} source={source} model={model} repair={repair} switch={switch_note}"
449
+ )
450
+
451
+
452
+ class LiveSimulationSession:
453
+ def __init__(
454
+ self,
455
+ *,
456
+ task_id: str,
457
+ agent_mode: SimulationAgentMode,
458
+ max_steps: int,
459
+ seed: int | None,
460
+ policy_name: str | None = None,
461
+ model_path: str | None = None,
462
+ model_type: Literal["maskable", "recurrent"] = "maskable",
463
+ ) -> None:
464
+ self.task_id = task_id
465
+ self.agent_mode = agent_mode
466
+ recommended = _recommended_min_steps(task_id)
467
+ if agent_mode == "llm_inference":
468
+ self.max_steps = max(int(max_steps), int(recommended))
469
+ else:
470
+ self.max_steps = int(max_steps)
471
+ self.seed = int(seed if seed is not None else random.randint(1, 999999))
472
+ self.policy_name = policy_name or "backlog_clearance"
473
+ self.model_path = model_path
474
+ self.model_type = model_type
475
+ self.trace: list[dict[str, Any]] = []
476
+ self.total_reward = 0.0
477
+ self.step_idx = 0
478
+ self.done = False
479
+ self.summary: dict[str, Any] | None = None
480
+ self.score: float | None = None
481
+ self.grader_name: str | None = None
482
+
483
+ self.env: GovWorkflowEnv | None = None
484
+ self.obs: ObservationModel | Any = None
485
+ self.policy = None
486
+
487
+ self.rl_env: Any = None
488
+ self.rl_model: Any = None
489
+ self.rl_lstm_state: Any = None
490
+ self.rl_episode_start: Any = None
491
+
492
+ self.llm_runtimes: list[dict[str, Any]] = []
493
+ self.llm_route: list[str] = []
494
+ self.llm_model_stats: dict[tuple[str, str], dict[str, Any]] = {}
495
+ self.consecutive_failure_steps = 0
496
+ self.recovery_steps_remaining = 0
497
+ self.auto_switch_count = 0
498
+ self.last_switch_reason: str | None = None
499
+
500
+ if self.agent_mode == "trained_rl":
501
+ self._init_trained()
502
+ else:
503
+ self._init_core()
504
+
505
+ def start_line(self) -> str:
506
+ return (
507
+ f"[START] task={self.task_id} env=gov-workflow-openenv "
508
+ f"model={_model_label_for_mode(self.agent_mode)}"
509
+ )
510
+
511
+ def _init_core(self) -> None:
512
+ self.env = GovWorkflowEnv(task_id=self.task_id)
513
+ self.obs, _ = self.env.reset(seed=self.seed)
514
+ if self.agent_mode == "baseline_policy":
515
+ self.policy = POLICIES.get(self.policy_name, backlog_clearance_policy)
516
+ else:
517
+ self.policy = self._llm_action_with_meta
518
+ self._init_llm_runtimes()
519
+
520
+ def _init_llm_runtimes(self) -> None:
521
+ openai_base = os.getenv("API_BASE_URL") or os.getenv("OPENAI_API_BASE_URL") or "https://api.openai.com/v1"
522
+ nvidia_base = os.getenv("NVIDIA_API_BASE_URL", "https://integrate.api.nvidia.com/v1")
523
+
524
+ openai_keys = _dedupe(
525
+ [
526
+ os.getenv("HF_TOKEN"),
527
+ os.getenv("OPENAI_API_KEY"),
528
+ os.getenv("API_KEY"),
529
+ ]
530
+ )
531
+ nvidia_keys = _dedupe(
532
+ [
533
+ os.getenv("NVIDIA_API_KEY"),
534
+ os.getenv("NVIDIA_API_KEY_2"),
535
+ ]
536
+ )
537
+
538
+ openai_models = _dedupe(
539
+ [
540
+ os.getenv("MODEL_NAME", "meta/llama-3.3-70b-instruct"),
541
+ *_env_csv_list("MODEL_FALLBACKS"),
542
+ ]
543
+ )
544
+ nvidia_models = _dedupe(
545
+ [
546
+ os.getenv("NVIDIA_MODEL"),
547
+ *_env_csv_list("NVIDIA_MODEL_FALLBACKS"),
548
+ *LEGACY_NVIDIA_MODEL_POOL,
549
+ ]
550
+ )
551
+
552
+ runtimes: list[dict[str, Any]] = []
553
+
554
+ if openai_keys and openai_models:
555
+ clients: list[tuple[OpenAI, str]] = []
556
+ for idx, key in enumerate(openai_keys, start=1):
557
+ try:
558
+ clients.append((OpenAI(base_url=openai_base, api_key=key, timeout=8.0, max_retries=0), f"openai_key_{idx}"))
559
+ except Exception:
560
+ continue
561
+ if clients:
562
+ runtimes.append(
563
+ {
564
+ "provider": "openai-compatible",
565
+ "base_url": openai_base,
566
+ "clients": clients,
567
+ "models": openai_models,
568
+ }
569
+ )
570
+
571
+ if nvidia_keys and nvidia_models:
572
+ clients = []
573
+ for idx, key in enumerate(nvidia_keys, start=1):
574
+ try:
575
+ clients.append((OpenAI(base_url=nvidia_base, api_key=key, timeout=8.0, max_retries=0), f"nvidia_key_{idx}"))
576
+ except Exception:
577
+ continue
578
+ if clients:
579
+ runtimes.append(
580
+ {
581
+ "provider": "nvidia",
582
+ "base_url": nvidia_base,
583
+ "clients": clients,
584
+ "models": nvidia_models,
585
+ }
586
+ )
587
+
588
+ self.llm_runtimes = runtimes
589
+ self.llm_model_stats = {}
590
+ for runtime in runtimes:
591
+ provider = str(runtime.get("provider"))
592
+ for model in runtime.get("models", []):
593
+ self.llm_model_stats[(provider, str(model))] = {
594
+ "calls": 0,
595
+ "invalid": 0,
596
+ "repaired": 0,
597
+ "failures": 0,
598
+ "cooldown_until_step": 0,
599
+ }
600
+
601
+ openai_runtime = next((rt for rt in runtimes if rt.get("provider") == "openai-compatible"), None)
602
+ nvidia_runtime = next((rt for rt in runtimes if rt.get("provider") == "nvidia"), None)
603
+
604
+ if openai_runtime is not None:
605
+ openai_route = (
606
+ f"openai-compatible ({len(openai_runtime['clients'])} keys, "
607
+ f"{len(openai_runtime['models'])} models)"
608
+ )
609
+ else:
610
+ openai_route = "openai-compatible (unavailable: missing API key/model)"
611
+
612
+ if nvidia_runtime is not None:
613
+ nvidia_route = (
614
+ f"nvidia ({len(nvidia_runtime['clients'])} keys, "
615
+ f"{len(nvidia_runtime['models'])} models)"
616
+ )
617
+ else:
618
+ nvidia_route = "nvidia (unavailable: missing API key/model)"
619
+
620
+ self.llm_route = [
621
+ openai_route,
622
+ nvidia_route,
623
+ "adaptive ranking: prefer models with lower invalid/repaired rates",
624
+ "heuristic fallback (backlog_clearance_policy)",
625
+ ]
626
+
627
+ def _rank_runtime_models(self, provider: str, models: list[str]) -> list[str]:
628
+ def _score(model_name: str) -> tuple[float, int]:
629
+ stat = self.llm_model_stats.get((provider, model_name), {})
630
+ calls = max(1, int(stat.get("calls", 0)))
631
+ invalid_rate = float(stat.get("invalid", 0)) / calls
632
+ repaired_rate = float(stat.get("repaired", 0)) / calls
633
+ fail_rate = float(stat.get("failures", 0)) / calls
634
+ cooldown = int(stat.get("cooldown_until_step", 0))
635
+ cooldown_penalty = 1.0 if self.step_idx < cooldown else 0.0
636
+ return (invalid_rate * 2.0 + repaired_rate * 1.25 + fail_rate * 1.5 + cooldown_penalty, -calls)
637
+
638
+ return sorted([str(m) for m in models], key=_score)
639
+
640
+ def _llm_action_with_meta(self, obs: ObservationModel) -> tuple[ActionModel, dict[str, Any]]:
641
+ if self.recovery_steps_remaining > 0:
642
+ self.recovery_steps_remaining -= 1
643
+ action, why = _best_high_impact_action(obs)
644
+ return action, {
645
+ "decision_source": "auto_recovery_policy",
646
+ "provider": "heuristic",
647
+ "model_used": "backlog_clearance_policy",
648
+ "llm_attempts": 0,
649
+ "llm_error": None,
650
+ "llm_key_label": None,
651
+ "repair_note": why,
652
+ }
653
+
654
+ attempts = 0
655
+ last_error = ""
656
+ allowed_actions, blocked_actions = _masked_action_type_hints(obs)
657
+ schema_hint = {
658
+ "required_fields": {
659
+ "set_priority_mode": ["action_type", "priority_mode"],
660
+ "assign_capacity": ["action_type", "service", "officer_delta"],
661
+ "request_missing_documents": ["action_type", "service"],
662
+ "escalate_service": ["action_type", "service"],
663
+ "advance_time": ["action_type"],
664
+ "reallocate_officers": ["action_type", "service", "target_service", "officer_delta"],
665
+ },
666
+ "allowed_priority_mode": [m.value for m in PriorityMode],
667
+ "allowed_services": [s.value for s in ServiceType],
668
+ }
669
+ system_prompt = (
670
+ "You are controlling a government workflow simulator. "
671
+ "Return exactly one JSON object only. No markdown. No explanation. "
672
+ "Allowed action_type: set_priority_mode, assign_capacity, request_missing_documents, "
673
+ "escalate_service, advance_time, reallocate_officers. "
674
+ "Rules: "
675
+ "1) reallocate_officers requires service + target_service + officer_delta>0 and source!=target. "
676
+ "2) assign_capacity requires service + officer_delta>0. "
677
+ "3) request_missing_documents requires service with missing_docs_cases>0. "
678
+ "4) set_priority_mode requires priority_mode in [urgent_first, oldest_first, balanced, backlog_clearance]. "
679
+ "5) Always prefer high-impact actions that reduce backlog/SLA risk over no-op loops. "
680
+ "Use lowercase enum values."
681
+ )
682
+ user_prompt = (
683
+ "Observation:\n"
684
+ f"{obs.model_dump_json()}\n"
685
+ f"Allowed action types now: {allowed_actions}\n"
686
+ f"Blocked action types now: {blocked_actions}\n"
687
+ f"Action schema hints: {json.dumps(schema_hint, separators=(',', ':'))}\n"
688
+ f"Last action validity: {obs.last_action_valid}\n"
689
+ f"Last action message: {obs.last_action_message}\n"
690
+ "Return action JSON."
691
+ )
692
+
693
+ for runtime in self.llm_runtimes:
694
+ provider = str(runtime["provider"])
695
+ ranked_models = self._rank_runtime_models(provider, list(runtime["models"]))
696
+ for client, key_label in runtime["clients"]:
697
+ for model in ranked_models:
698
+ attempts += 1
699
+ stat_key = (provider, model)
700
+ try:
701
+ out = client.chat.completions.create(
702
+ model=model,
703
+ messages=[
704
+ {"role": "system", "content": system_prompt},
705
+ {"role": "user", "content": user_prompt},
706
+ ],
707
+ temperature=0.0,
708
+ max_tokens=200,
709
+ stream=False,
710
+ )
711
+ content = (out.choices[0].message.content or "").strip()
712
+ action = _coerce_action(_extract_json_object(content))
713
+ if stat_key in self.llm_model_stats:
714
+ self.llm_model_stats[stat_key]["calls"] += 1
715
+ return action, {
716
+ "decision_source": "llm",
717
+ "provider": provider,
718
+ "model_used": model,
719
+ "llm_attempts": attempts,
720
+ "llm_error": None,
721
+ "llm_key_label": key_label,
722
+ }
723
+ except Exception as exc:
724
+ last_error = str(exc)
725
+ stat = self.llm_model_stats.get(stat_key)
726
+ if stat is not None:
727
+ stat["calls"] += 1
728
+ stat["failures"] += 1
729
+ if stat["failures"] >= 2:
730
+ stat["cooldown_until_step"] = self.step_idx + 5
731
+ continue
732
+
733
+ action, why = _best_high_impact_action(obs)
734
+ if not self.llm_runtimes:
735
+ last_error = "No LLM credentials configured."
736
+ return action, {
737
+ "decision_source": "heuristic_fallback",
738
+ "provider": "heuristic",
739
+ "model_used": "backlog_clearance_policy",
740
+ "llm_attempts": attempts,
741
+ "llm_error": last_error or None,
742
+ "llm_key_label": None,
743
+ "repair_note": why,
744
+ }
745
+
746
+ def _init_trained(self) -> None:
747
+ import numpy as np
748
+ from app.main import _load_model_cached_or_503, _resolve_model_path_or_422
749
+ from rl.gym_wrapper import GovWorkflowGymEnv
750
+
751
+ if not self.model_path:
752
+ raise ValueError("model_path is required for trained_rl simulation.")
753
+ model_abs = _resolve_model_path_or_422(self.model_path)
754
+ self.rl_model = _load_model_cached_or_503(model_abs, self.model_type)
755
+ self.rl_env = GovWorkflowGymEnv(task_id=self.task_id, seed=self.seed, hard_action_mask=True)
756
+ self.obs, _ = self.rl_env.reset(seed=self.seed)
757
+ self.rl_lstm_state = None
758
+ self.rl_episode_start = np.array([True], dtype=bool)
759
+
760
+ def step_once(self) -> tuple[dict[str, Any], str, bool]:
761
+ if self.done:
762
+ raise RuntimeError("Simulation already finished.")
763
+
764
+ self.step_idx += 1
765
+ if self.agent_mode == "trained_rl":
766
+ row = self._step_trained()
767
+ else:
768
+ row = self._step_core()
769
+ self.trace.append(row)
770
+ self.total_reward += float(row["reward"])
771
+ step_log = _log_step_line(row)
772
+
773
+ if row["done"] or self.step_idx >= self.max_steps:
774
+ self._finalize()
775
+ row["done"] = True
776
+ return row, step_log, True
777
+ return row, step_log, False
778
+
779
+ def end_line(self) -> str:
780
+ if self.score is None:
781
+ return "[END] success=false steps=0 score=0.00 rewards="
782
+ rewards = ",".join(f"{float(x.get('reward', 0.0)):.2f}" for x in self.trace)
783
+ success = "true" if self.score >= 0.5 else "false"
784
+ return (
785
+ f"[END] success={success} steps={len(self.trace)} "
786
+ f"score={self.score:.2f} rewards={rewards}"
787
+ )
788
+
789
+ def snapshot(self) -> dict[str, Any]:
790
+ return {
791
+ "task_id": self.task_id,
792
+ "agent_mode": self.agent_mode,
793
+ "seed": self.seed,
794
+ "max_steps": self.max_steps,
795
+ "step_idx": self.step_idx,
796
+ "done": self.done,
797
+ "total_reward": float(self.total_reward),
798
+ "score": self.score,
799
+ "grader_name": self.grader_name,
800
+ "summary": self.summary,
801
+ "trace_len": len(self.trace),
802
+ "llm_route": list(self.llm_route),
803
+ }
804
+
805
+ def close(self) -> None:
806
+ try:
807
+ if self.env is not None and hasattr(self.env, "close"):
808
+ self.env.close()
809
+ except Exception:
810
+ pass
811
+ try:
812
+ if self.rl_env is not None and hasattr(self.rl_env, "close"):
813
+ self.rl_env.close()
814
+ except Exception:
815
+ pass
816
+
817
+ def _step_core(self) -> dict[str, Any]:
818
+ if self.env is None:
819
+ raise RuntimeError("Core simulation env not initialized.")
820
+ if self.agent_mode == "baseline_policy":
821
+ action = self.policy(self.obs)
822
+ meta = {
823
+ "decision_source": "baseline_policy",
824
+ "provider": "local_policy",
825
+ "model_used": self.policy_name,
826
+ "llm_attempts": 0,
827
+ "llm_error": None,
828
+ "llm_key_label": None,
829
+ }
830
+ else:
831
+ raw_decision = self.policy(self.obs)
832
+ if isinstance(raw_decision, tuple) and len(raw_decision) == 2:
833
+ action, meta = raw_decision
834
+ else:
835
+ action, meta = raw_decision, {}
836
+ if not isinstance(meta, dict):
837
+ meta = {}
838
+ if not isinstance(action, ActionModel):
839
+ if isinstance(action, dict):
840
+ action = _coerce_action(action)
841
+ else:
842
+ action = ActionModel(action_type=ActionType.ADVANCE_TIME)
843
+ meta["repair_note"] = "non-action output from llm policy, coerced to advance_time"
844
+ allowed_mask = _compute_action_mask(self.obs)
845
+ if not bool(allowed_mask.get(action.action_type, True)):
846
+ masked_fallback, why = _best_high_impact_action(self.obs)
847
+ action = masked_fallback
848
+ if meta.get("decision_source") == "llm":
849
+ meta["decision_source"] = "llm_repaired"
850
+ meta["repair_note"] = f"action masked at runtime; {why}"
851
+ repaired_action, repair_note = _repair_action_for_observation(action, self.obs)
852
+ if repair_note:
853
+ action = repaired_action
854
+ if meta.get("decision_source") == "llm":
855
+ meta["decision_source"] = "llm_repaired"
856
+ meta["repair_note"] = repair_note
857
+
858
+ self.obs, reward, terminated, truncated, info = self.env.step(action)
859
+ done = bool(terminated or truncated)
860
+ # Read observation fields safely for both Phase 1 and Phase 2 model shapes
861
+ fairness_gap = float(
862
+ getattr(self.obs, 'fairness_gap',
863
+ 1.0 - getattr(self.obs, 'fairness_index', 1.0))
864
+ )
865
+ row = {
866
+ "step": self.step_idx,
867
+ "day": self.obs.day,
868
+ "action_type": action.action_type.value,
869
+ "action_payload": action.model_dump(exclude_none=True, mode="json"),
870
+ "reward": float(reward),
871
+ "done": done,
872
+ "backlog": self.obs.total_backlog,
873
+ "completed": self.obs.total_completed,
874
+ "sla_breaches": self.obs.total_sla_breaches,
875
+ "fairness_gap": fairness_gap,
876
+ "escalation_budget_remaining": self.obs.escalation_budget_remaining,
877
+ "invalid_action": bool(getattr(info, 'invalid_action', False)),
878
+ "last_action_error": getattr(info, 'last_action_error', None),
879
+ "queue_rows": _queue_rows(self.obs),
880
+ }
881
+ row.update(meta)
882
+
883
+ if self.agent_mode == "llm_inference":
884
+ is_repaired = row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
885
+ is_invalid = bool(row.get("invalid_action")) or bool(row.get("last_action_error"))
886
+ model_used = str(row.get("model_used") or "")
887
+ provider = str(row.get("provider") or "")
888
+ stat_key = (provider, model_used)
889
+ stat = self.llm_model_stats.get(stat_key)
890
+ if stat is not None:
891
+ if is_repaired:
892
+ stat["repaired"] += 1
893
+ if is_invalid:
894
+ stat["invalid"] += 1
895
+ stat["failures"] += 1
896
+ else:
897
+ stat["failures"] = max(0, int(stat.get("failures", 0)) - 1)
898
+
899
+ is_failure_pattern = is_invalid or is_repaired
900
+ if is_failure_pattern:
901
+ self.consecutive_failure_steps += 1
902
+ else:
903
+ self.consecutive_failure_steps = 0
904
+
905
+ if self.consecutive_failure_steps >= 4:
906
+ if stat is not None:
907
+ stat["cooldown_until_step"] = self.step_idx + 6
908
+ self.recovery_steps_remaining = max(self.recovery_steps_remaining, 3)
909
+ self.auto_switch_count += 1
910
+ self.last_switch_reason = "repeated invalid/repaired pattern detected"
911
+ row["switch_note"] = "auto-switched to recovery policy and deprioritized failing model"
912
+ self.consecutive_failure_steps = 0
913
+
914
+ return row
915
+
916
+ def _step_trained(self) -> dict[str, Any]:
917
+ import numpy as np
918
+
919
+ masks = self.rl_env.action_masks()
920
+ if self.model_type == "recurrent":
921
+ action, self.rl_lstm_state = self.rl_model.predict(
922
+ self.obs,
923
+ state=self.rl_lstm_state,
924
+ episode_start=self.rl_episode_start,
925
+ deterministic=True,
926
+ )
927
+ action_idx = int(action.item() if hasattr(action, "item") else action)
928
+ if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
929
+ valid = np.flatnonzero(masks)
930
+ action_idx = int(valid[0]) if valid.size > 0 else 18
931
+ else:
932
+ from sb3_contrib.common.maskable.utils import get_action_masks
933
+
934
+ action, _ = self.rl_model.predict(
935
+ self.obs,
936
+ action_masks=get_action_masks(self.rl_env),
937
+ deterministic=True,
938
+ )
939
+ action_idx = int(action.item() if hasattr(action, "item") else action)
940
+
941
+ self.obs, reward, terminated, truncated, info = self.rl_env.step(action_idx)
942
+ done = bool(terminated or truncated)
943
+ if self.model_type == "recurrent":
944
+ self.rl_episode_start = np.array([done], dtype=bool)
945
+ core_obs = self.rl_env._core_env._build_observation()
946
+ action_model, action_label = _decode_action_idx(action_idx)
947
+ return {
948
+ "step": self.step_idx,
949
+ "day": core_obs.day,
950
+ "action_type": action_label,
951
+ "action_payload": action_model.model_dump(exclude_none=True, mode="json"),
952
+ "action_index": action_idx,
953
+ "reward": float(reward),
954
+ "done": done,
955
+ "backlog": core_obs.total_backlog,
956
+ "completed": core_obs.total_completed,
957
+ "sla_breaches": core_obs.total_sla_breaches,
958
+ "fairness_gap": float(core_obs.fairness_gap),
959
+ "escalation_budget_remaining": core_obs.escalation_budget_remaining,
960
+ "invalid_action": bool(info.get("invalid_action", False)),
961
+ "last_action_error": info.get("last_action_error"),
962
+ "queue_rows": _queue_rows(core_obs),
963
+ "decision_source": "trained_rl",
964
+ "provider": "rl",
965
+ "model_used": self.model_path or "trained_rl",
966
+ "llm_attempts": 0,
967
+ "llm_error": None,
968
+ "llm_key_label": None,
969
+ }
970
+
971
+ def _finalize(self) -> None:
972
+ if self.done:
973
+ return
974
+ self.done = True
975
+ if self.agent_mode == "trained_rl":
976
+ final_state = self.rl_env._core_env.state()
977
+ else:
978
+ final_state = self.env.state()
979
+ gr = grade_episode(final_state)
980
+ self.score = float(gr.score)
981
+ self.grader_name = gr.grader_name
982
+
983
+ llm_steps = sum(
984
+ 1 for row in self.trace if row.get("decision_source") in {"llm", "llm_repaired"}
985
+ )
986
+ fallback_steps = sum(
987
+ 1
988
+ for row in self.trace
989
+ if row.get("decision_source") in {"heuristic_fallback", "auto_recovery_policy"}
990
+ )
991
+ repaired_steps = sum(
992
+ 1
993
+ for row in self.trace
994
+ if row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
995
+ )
996
+ total_steps = max(1, len(self.trace))
997
+ invalid_actions = int(final_state.metrics.total_invalid_actions)
998
+ invalid_rate = float(invalid_actions) / float(total_steps)
999
+ repaired_rate = float(repaired_steps) / float(total_steps)
1000
+
1001
+ ranked_models: list[dict[str, Any]] = []
1002
+ if self.llm_model_stats:
1003
+ for (provider, model), stat in self.llm_model_stats.items():
1004
+ calls = int(stat.get("calls", 0))
1005
+ if calls <= 0:
1006
+ continue
1007
+ ranked_models.append(
1008
+ {
1009
+ "provider": provider,
1010
+ "model": model,
1011
+ "calls": calls,
1012
+ "invalid_rate": float(stat.get("invalid", 0)) / max(1, calls),
1013
+ "repaired_rate": float(stat.get("repaired", 0)) / max(1, calls),
1014
+ }
1015
+ )
1016
+ ranked_models.sort(key=lambda x: (x["invalid_rate"], x["repaired_rate"], -x["calls"]))
1017
+
1018
+ self.summary = {
1019
+ "total_steps": final_state.total_steps,
1020
+ "total_completed": final_state.total_completed,
1021
+ "total_backlog": final_state.total_backlog,
1022
+ "total_sla_breaches": final_state.total_sla_breaches,
1023
+ "fairness_gap": float(final_state.fairness_gap),
1024
+ "total_invalid_actions": final_state.metrics.total_invalid_actions,
1025
+ "invalid_action_rate": invalid_rate,
1026
+ "llm_steps": llm_steps,
1027
+ "heuristic_fallback_steps": fallback_steps,
1028
+ "llm_repaired_steps": repaired_steps,
1029
+ "repaired_action_rate": repaired_rate,
1030
+ "auto_switch_count": self.auto_switch_count,
1031
+ "last_switch_reason": self.last_switch_reason,
1032
+ "effective_max_steps": self.max_steps,
1033
+ "recommended_min_steps": _recommended_min_steps(self.task_id),
1034
+ }
1035
+ if self.agent_mode == "llm_inference":
1036
+ self.summary["llm_route"] = list(self.llm_route)
1037
+ self.summary["llm_model_performance"] = ranked_models
1038
+ if self.agent_mode == "trained_rl":
1039
+ self.summary["model_path"] = self.model_path
1040
+ self.summary["model_type"] = self.model_type
1041
+
1042
+
1043
+ def run_simulation(
1044
+ *,
1045
+ task_id: str,
1046
+ agent_mode: SimulationAgentMode,
1047
+ max_steps: int,
1048
+ seed: int | None,
1049
+ policy_name: str | None = None,
1050
+ model_path: str | None = None,
1051
+ model_type: Literal["maskable", "recurrent"] = "maskable",
1052
+ ) -> SimulationRun:
1053
+ session = LiveSimulationSession(
1054
+ task_id=task_id,
1055
+ agent_mode=agent_mode,
1056
+ max_steps=max_steps,
1057
+ seed=seed,
1058
+ policy_name=policy_name,
1059
+ model_path=model_path,
1060
+ model_type=model_type,
1061
+ )
1062
+ try:
1063
+ while not session.done:
1064
+ session.step_once()
1065
+ return SimulationRun(
1066
+ task_id=session.task_id,
1067
+ agent_mode=session.agent_mode,
1068
+ seed=session.seed,
1069
+ total_reward=float(session.total_reward),
1070
+ score=float(session.score or 0.0),
1071
+ grader_name=str(session.grader_name or "unknown"),
1072
+ summary=dict(session.summary or {}),
1073
+ trace=list(session.trace),
1074
+ )
1075
+ finally:
1076
+ session.close()
1077
+
1078
+
1079
+ def _decode_action_idx(action_idx: int) -> tuple[ActionModel, str]:
1080
+ try:
1081
+ from rl.feature_builder import ACTION_DECODE_TABLE
1082
+ from app.models import PriorityMode, ServiceType
1083
+ except Exception:
1084
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
1085
+
1086
+ row = ACTION_DECODE_TABLE.get(int(action_idx))
1087
+ if row is None:
1088
+ return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
1089
+
1090
+ from app.engine import (
1091
+ DayResult,
1092
+ DaySimulator,
1093
+ LiveSimulationSession,
1094
+ SimulationAgentMode,
1095
+ SimulationRun,
1096
+ run_simulation,
1097
+ )
1098
+
1099
+ __all__ = [
1100
+ "DayResult",
1101
+ "DaySimulator",
1102
+ "SimulationAgentMode",
1103
+ "SimulationRun",
1104
+ "LiveSimulationSession",
1105
+ "run_simulation",
1106
+ ]
app/state_machine.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ state_machine.py — Gov Workflow OpenEnv
3
+ Deterministic workflow transition engine aligned with Phase 1 schemas.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from app.models import ApplicationCase, InternalSubstate, StageType
9
+
10
+
11
+ INTERNAL_TO_PUBLIC_STAGE: dict[InternalSubstate, StageType] = {
12
+ InternalSubstate.PRE_SCRUTINY: StageType.SUBMISSION,
13
+ InternalSubstate.DOC_VALIDATION: StageType.DOCUMENT_VERIFICATION,
14
+ InternalSubstate.SERVICE_SPECIFIC_VALIDATION: StageType.DOCUMENT_VERIFICATION,
15
+ InternalSubstate.FIELD_VERIFICATION_PENDING: StageType.FIELD_VERIFICATION,
16
+ InternalSubstate.DECISION_PENDING: StageType.APPROVAL,
17
+ InternalSubstate.ISSUANCE_READY: StageType.ISSUANCE,
18
+ InternalSubstate.BLOCKED_MISSING_DOCS: StageType.DOCUMENT_VERIFICATION,
19
+ InternalSubstate.COMPLETED: StageType.ISSUANCE,
20
+ InternalSubstate.REJECTED: StageType.APPROVAL,
21
+ }
22
+
23
+
24
+ def build_public_stage(substate: InternalSubstate) -> StageType:
25
+ return INTERNAL_TO_PUBLIC_STAGE.get(substate, StageType.SUBMISSION)
26
+
27
+
28
+ def transition_case(case: ApplicationCase, new_substate: InternalSubstate) -> None:
29
+ case.internal_substate = new_substate
30
+ case.public_stage = build_public_stage(new_substate)
31
+ case.days_in_current_stage = 0
32
+
33
+
34
+ def can_advance(case: ApplicationCase) -> bool:
35
+ if case.completed or case.rejected:
36
+ return False
37
+ if case.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS:
38
+ return False
39
+ return True
40
+
41
+
42
+ def advance_case(case: ApplicationCase, rng: object = None) -> tuple[bool, bool]:
43
+ """
44
+ Returns (progressed, completed).
45
+ """
46
+ if not can_advance(case):
47
+ return False, False
48
+
49
+ early_stages = {
50
+ InternalSubstate.PRE_SCRUTINY,
51
+ InternalSubstate.DOC_VALIDATION,
52
+ }
53
+
54
+ if case.has_missing_docs and case.internal_substate in early_stages:
55
+ transition_case(case, InternalSubstate.BLOCKED_MISSING_DOCS)
56
+ return True, False
57
+
58
+ current = case.internal_substate
59
+
60
+ if current == InternalSubstate.PRE_SCRUTINY:
61
+ transition_case(case, InternalSubstate.DOC_VALIDATION)
62
+ return True, False
63
+
64
+ if current == InternalSubstate.DOC_VALIDATION:
65
+ if case.field_verification_required:
66
+ transition_case(case, InternalSubstate.FIELD_VERIFICATION_PENDING)
67
+ else:
68
+ transition_case(case, InternalSubstate.DECISION_PENDING)
69
+ return True, False
70
+
71
+ if current == InternalSubstate.SERVICE_SPECIFIC_VALIDATION:
72
+ if case.field_verification_required:
73
+ transition_case(case, InternalSubstate.FIELD_VERIFICATION_PENDING)
74
+ else:
75
+ transition_case(case, InternalSubstate.DECISION_PENDING)
76
+ return True, False
77
+
78
+ if current == InternalSubstate.FIELD_VERIFICATION_PENDING:
79
+ return False, False
80
+
81
+ if current == InternalSubstate.DECISION_PENDING:
82
+ transition_case(case, InternalSubstate.ISSUANCE_READY)
83
+ return True, False
84
+
85
+ if current == InternalSubstate.ISSUANCE_READY:
86
+ transition_case(case, InternalSubstate.COMPLETED)
87
+ case.completed = True
88
+ return True, True
89
+
90
+ return False, False
91
+
92
+
93
+ def unblock_missing_docs(case: ApplicationCase) -> bool:
94
+ if case.internal_substate != InternalSubstate.BLOCKED_MISSING_DOCS:
95
+ return False
96
+ case.has_missing_docs = False
97
+ case.doc_resolution_day = None
98
+ transition_case(case, InternalSubstate.DOC_VALIDATION)
99
+ return True
100
+
101
+
102
+ def complete_field_verification(case: ApplicationCase) -> bool:
103
+ if case.internal_substate != InternalSubstate.FIELD_VERIFICATION_PENDING:
104
+ return False
105
+ case.field_verification_completion_day = None
106
+ transition_case(case, InternalSubstate.DECISION_PENDING)
107
+ return True
app/story_router.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app/story_router.py
3
+
4
+ FastAPI router that serves LLM training story data.
5
+ All 7 endpoints are READ-ONLY - they serve pre-saved JSON files.
6
+ No frontend elements are invoked from backend.
7
+ No training runs happen here - only data serving.
8
+
9
+ Mount in main.py with:
10
+ from app.story_router import router as story_router
11
+ app.include_router(story_router)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import json
18
+ from pathlib import Path
19
+ from typing import Optional
20
+
21
+ from fastapi import APIRouter, HTTPException
22
+ from fastapi.responses import StreamingResponse
23
+
24
+ router = APIRouter(prefix="/training", tags=["Training Story"])
25
+
26
+ # --- Data directory --------------------------------------------------
27
+ DATA_DIR = Path("data/training_logs")
28
+
29
+ HEURISTIC_BASELINES: dict[str, dict] = {
30
+ "district_backlog_easy": {
31
+ "score": 0.527, "completed": 41,
32
+ "breaches": 184, "reward": -79.86, "avg_wait": 6.9,
33
+ },
34
+ "mixed_urgency_medium": {
35
+ "score": 0.454, "completed": 58,
36
+ "breaches": 34, "reward": -684.22, "avg_wait": 12.4,
37
+ },
38
+ "cross_department_hard": {
39
+ "score": 0.606, "completed": 83,
40
+ "breaches": 723, "reward": -2318.78, "avg_wait": 15.6,
41
+ },
42
+ }
43
+
44
+
45
+ # --- Internal helpers ------------------------------------------------
46
+
47
+ def _load_log(task_id: str) -> dict:
48
+ """Load JSON training log for given task. Raises 404 if missing."""
49
+ path = DATA_DIR / f"{task_id}_training_log.json"
50
+ if not path.exists():
51
+ raise HTTPException(
52
+ status_code=404,
53
+ detail=(
54
+ f"Training log not found for task '{task_id}'. "
55
+ f"Run: python scripts/convert_grpo_csv.py "
56
+ f"--csv <your_csv> --task {task_id}"
57
+ ),
58
+ )
59
+ with open(path, encoding="utf-8") as f:
60
+ return json.load(f)
61
+
62
+
63
+ def _dominant_action(episodes: list[dict]) -> str:
64
+ """Returns the action name with the highest total weight across episodes."""
65
+ totals: dict[str, float] = {}
66
+ for ep in episodes:
67
+ for action, val in ep.get("actions", {}).items():
68
+ totals[action] = totals.get(action, 0.0) + float(val)
69
+ return max(totals, key=totals.get) if totals else "advance_time"
70
+
71
+
72
+ def _phase_message(ep: dict) -> str:
73
+ """Returns a human-readable learning message for one episode."""
74
+ phase = ep.get("phase", "random")
75
+ reward = ep.get("total_reward", 0)
76
+ score = ep.get("score", 0)
77
+ fn1 = ep.get("fn1_valid", 1.0)
78
+ fn2 = ep.get("fn2_no_halluc", 1.0)
79
+ episode = ep.get("episode", 0)
80
+
81
+ validity_note = "" if fn1 >= 1.0 else f" WARNING: Invalid action at step {episode}."
82
+ halluc_note = "" if fn2 >= 1.0 else " WARNING: Hallucination detected."
83
+
84
+ messages = {
85
+ "random": (
86
+ f"Step {episode}: LLM is exploring. "
87
+ f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
88
+ ),
89
+ "exploring": (
90
+ f"Step {episode}: LLM finding patterns. "
91
+ f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
92
+ ),
93
+ "learning": (
94
+ f"Step {episode}: LLM reinforcing good actions. "
95
+ f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
96
+ ),
97
+ "converged": (
98
+ f"Step {episode}: LLM converged. "
99
+ f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
100
+ ),
101
+ }
102
+ return messages.get(phase, f"Step {episode}: reward={reward:.3f}")
103
+
104
+
105
+ # ================================================================
106
+ # ENDPOINT 1 - GET /training/tasks
107
+ # ================================================================
108
+ @router.get("/tasks")
109
+ async def list_trained_tasks() -> dict:
110
+ """
111
+ Returns all tasks that have a saved training log JSON file.
112
+ Frontend calls this first to populate task selector.
113
+ """
114
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
115
+ available = []
116
+ for path in sorted(DATA_DIR.glob("*_training_log.json")):
117
+ task_id = path.stem.replace("_training_log", "")
118
+ try:
119
+ log = _load_log(task_id)
120
+ available.append({
121
+ "task_id": task_id,
122
+ "total_episodes": log["total_episodes"],
123
+ "final_score": log["summary"]["last_episode_score"],
124
+ "reward_improvement": log["summary"]["reward_improvement_pct"],
125
+ "base_model": log.get("base_model", ""),
126
+ "training_method": log.get("training_method", "GRPO"),
127
+ })
128
+ except HTTPException:
129
+ pass
130
+ return {"tasks": available}
131
+
132
+
133
+ # ================================================================
134
+ # ENDPOINT 2 - GET /training/summary/{task_id}
135
+ # ================================================================
136
+ @router.get("/summary/{task_id}")
137
+ async def training_summary(task_id: str) -> dict:
138
+ """Returns overview stats + narrative for the ACT 2 header card."""
139
+ log = _load_log(task_id)
140
+ eps = log["episodes"]
141
+ n = len(eps)
142
+
143
+ q1, q2, q3 = n // 4, n // 2, 3 * n // 4
144
+
145
+ p1_dom = _dominant_action(eps[:q1])
146
+ p2_dom = _dominant_action(eps[q1:q2])
147
+ p3_dom = _dominant_action(eps[q2:q3])
148
+ p4_dom = _dominant_action(eps[q3:])
149
+
150
+ avg_p1_r = sum(e["total_reward"] for e in eps[:q1]) / max(q1, 1)
151
+ avg_p4_r = sum(e["total_reward"] for e in eps[q3:]) / max(n - q3, 1)
152
+
153
+ return {
154
+ "task_id": log["task_id"],
155
+ "base_model": log.get("base_model", ""),
156
+ "training_method": log.get("training_method", "GRPO"),
157
+ "lora_rank": log.get("lora_rank", 16),
158
+ "total_episodes": n,
159
+ "reward_functions": log.get("reward_functions", {}),
160
+ "summary": log["summary"],
161
+ "narrative": {
162
+ "phase_1": (
163
+ f"Steps 1-{q1}: LLM chose '{p1_dom}' most often. "
164
+ f"Avg reward {avg_p1_r:.2f}. Still exploring randomly."
165
+ ),
166
+ "phase_2": (
167
+ f"Steps {q1}-{q2}: LLM discovered '{p2_dom}'. "
168
+ "Reward started improving as valid patterns emerged."
169
+ ),
170
+ "phase_3": (
171
+ f"Steps {q2}-{q3}: LLM reinforced '{p3_dom}'. "
172
+ "Action validity reaching near-perfect levels."
173
+ ),
174
+ "phase_4": (
175
+ f"Steps {q3}-{n}: LLM converged on '{p4_dom}'. "
176
+ f"Avg reward {avg_p4_r:.2f}. "
177
+ f"Final score {log['summary']['last_episode_score']:.1%}."
178
+ ),
179
+ },
180
+ }
181
+
182
+
183
+ # ================================================================
184
+ # ENDPOINT 3 - GET /training/curve/{task_id}
185
+ # ================================================================
186
+ @router.get("/curve/{task_id}")
187
+ async def training_curve(
188
+ task_id: str,
189
+ downsample: int = 1,
190
+ ) -> dict:
191
+ """
192
+ Returns episode-by-episode reward + score for chart rendering.
193
+ downsample=5 -> returns every 5th step.
194
+ """
195
+ log = _load_log(task_id)
196
+ eps = log["episodes"]
197
+ sampled = eps[::max(1, downsample)]
198
+ return {
199
+ "task_id": task_id,
200
+ "total_points": len(sampled),
201
+ "curve": [
202
+ {
203
+ "episode": e["episode"],
204
+ "reward": e["total_reward"],
205
+ "score": e["score"],
206
+ "fn1_valid": e.get("fn1_valid", 1.0),
207
+ "fn2_no_halluc": e.get("fn2_no_halluc", 1.0),
208
+ "fn3_env_score": e.get("fn3_env_score", 0.0),
209
+ "phase": e["phase"],
210
+ }
211
+ for e in sampled
212
+ ],
213
+ }
214
+
215
+
216
+ # ================================================================
217
+ # ENDPOINT 4 - GET /training/actions/{task_id}
218
+ # ================================================================
219
+ @router.get("/actions/{task_id}")
220
+ async def action_evolution(task_id: str) -> dict:
221
+ """Returns action distribution at 5 checkpoints across training."""
222
+ log = _load_log(task_id)
223
+ eps = log["episodes"]
224
+ n = len(eps)
225
+
226
+ idxs = [0, n // 4, n // 2, 3 * n // 4, n - 1]
227
+ result = []
228
+ for idx in idxs:
229
+ ep = eps[idx]
230
+ result.append({
231
+ "episode": ep["episode"],
232
+ "phase": ep["phase"],
233
+ "actions": ep.get("actions", {}),
234
+ "reward": ep["total_reward"],
235
+ "score": ep["score"],
236
+ })
237
+
238
+ avg_fn1_start = sum(e.get("fn1_valid", 1.0) for e in eps[:n // 4]) / max(n // 4, 1)
239
+ avg_fn1_end = sum(e.get("fn1_valid", 1.0) for e in eps[3 * n // 4:]) / max(n - 3 * n // 4, 1)
240
+
241
+ insight = (
242
+ f"Action validity improved from {avg_fn1_start:.1%} (early) "
243
+ f"to {avg_fn1_end:.1%} (final). "
244
+ "LLM learned to output valid government workflow JSON consistently."
245
+ )
246
+
247
+ return {
248
+ "task_id": task_id,
249
+ "checkpoints": result,
250
+ "insight": insight,
251
+ }
252
+
253
+
254
+ # ================================================================
255
+ # ENDPOINT 5 - GET /training/episode/{task_id}/{episode_num}
256
+ # ================================================================
257
+ @router.get("/episode/{task_id}/{episode_num}")
258
+ async def episode_detail(task_id: str, episode_num: int) -> dict:
259
+ """Returns detail for one specific training step."""
260
+ log = _load_log(task_id)
261
+ eps = log["episodes"]
262
+
263
+ if episode_num < 1 or episode_num > len(eps):
264
+ raise HTTPException(
265
+ status_code=400,
266
+ detail=f"episode_num must be 1-{len(eps)}. Got {episode_num}.",
267
+ )
268
+
269
+ ep = eps[episode_num - 1]
270
+ rewards_so_far = [e["total_reward"] for e in eps[:episode_num]]
271
+ scores_so_far = [e["score"] for e in eps[:episode_num]]
272
+
273
+ return {
274
+ "task_id": task_id,
275
+ "episode": ep["episode"],
276
+ "total_episodes": len(eps),
277
+ "reward": ep["total_reward"],
278
+ "score": ep["score"],
279
+ "fn1_valid": ep.get("fn1_valid", 1.0),
280
+ "fn2_no_halluc": ep.get("fn2_no_halluc", 1.0),
281
+ "fn3_env_score": ep.get("fn3_env_score", 0.0),
282
+ "phase": ep["phase"],
283
+ "actions": ep.get("actions", {}),
284
+ "running_best_reward": max(rewards_so_far),
285
+ "running_avg_score": round(sum(scores_so_far) / len(scores_so_far), 4),
286
+ "message": _phase_message(ep),
287
+ }
288
+
289
+
290
+ # ================================================================
291
+ # ENDPOINT 6 - GET /training/stream/{task_id} [SSE]
292
+ # ================================================================
293
+ @router.get("/stream/{task_id}")
294
+ async def stream_training_replay(
295
+ task_id: str,
296
+ delay_ms: int = 100,
297
+ start_episode: int = 1,
298
+ end_episode: Optional[int] = None,
299
+ ) -> StreamingResponse:
300
+ """Server-Sent Events endpoint for animated chart replay."""
301
+ log = _load_log(task_id)
302
+ eps = log["episodes"]
303
+ end = min(end_episode or len(eps), len(eps))
304
+ subset = eps[start_episode - 1: end]
305
+
306
+ async def generate():
307
+ meta_event = json.dumps({
308
+ "type": "meta",
309
+ "task_id": task_id,
310
+ "total_episodes": len(eps),
311
+ "summary": log["summary"],
312
+ "reward_functions": log.get("reward_functions", {}),
313
+ })
314
+ yield f"data: {meta_event}\n\n"
315
+
316
+ rewards_so_far: list[float] = []
317
+ scores_so_far: list[float] = []
318
+
319
+ for ep in subset:
320
+ rewards_so_far.append(ep["total_reward"])
321
+ scores_so_far.append(ep["score"])
322
+
323
+ event = json.dumps({
324
+ "type": "episode",
325
+ "episode": ep["episode"],
326
+ "total_episodes": len(eps),
327
+ "reward": ep["total_reward"],
328
+ "score": ep["score"],
329
+ "fn1_valid": ep.get("fn1_valid", 1.0),
330
+ "fn2_no_halluc": ep.get("fn2_no_halluc", 1.0),
331
+ "fn3_env_score": ep.get("fn3_env_score", 0.0),
332
+ "phase": ep["phase"],
333
+ "actions": ep.get("actions", {}),
334
+ "running_best": max(rewards_so_far),
335
+ "running_avg_score": round(
336
+ sum(scores_so_far) / len(scores_so_far), 4
337
+ ),
338
+ "message": _phase_message(ep),
339
+ })
340
+ yield f"data: {event}\n\n"
341
+ await asyncio.sleep(delay_ms / 1000.0)
342
+
343
+ done_event = json.dumps({
344
+ "type": "done",
345
+ "final_score": scores_so_far[-1] if scores_so_far else 0.0,
346
+ "best_reward": max(rewards_so_far) if rewards_so_far else 0.0,
347
+ "total_steps": len(subset),
348
+ })
349
+ yield f"data: {done_event}\n\n"
350
+
351
+ return StreamingResponse(
352
+ generate(),
353
+ media_type="text/event-stream",
354
+ headers={
355
+ "Cache-Control": "no-cache",
356
+ "X-Accel-Buffering": "no",
357
+ "Connection": "keep-alive",
358
+ },
359
+ )
360
+
361
+
362
+ # ================================================================
363
+ # ENDPOINT 7 - GET /training/comparison/{task_id}
364
+ # ================================================================
365
+ @router.get("/comparison/{task_id}")
366
+ async def before_after_comparison(task_id: str) -> dict:
367
+ """Returns before (heuristic) vs after (trained LLM)."""
368
+ log = _load_log(task_id)
369
+ baseline = HEURISTIC_BASELINES.get(task_id, {})
370
+ summary = log["summary"]
371
+
372
+ bef_score = baseline.get("score", 0.0)
373
+ after_score = summary["last_episode_score"]
374
+ delta = round(after_score - bef_score, 4)
375
+ pct = round((delta / bef_score) * 100, 1) if bef_score else 0.0
376
+
377
+ return {
378
+ "task_id": task_id,
379
+ "before": {
380
+ "label": "Heuristic Baseline (no AI)",
381
+ "score": bef_score,
382
+ "reward": baseline.get("reward", 0.0),
383
+ "completed": baseline.get("completed", 0),
384
+ "breaches": baseline.get("breaches", 0),
385
+ "avg_wait": baseline.get("avg_wait", 0.0),
386
+ },
387
+ "after": {
388
+ "label": f"GRPO Trained LLM ({log.get('base_model','')})",
389
+ "score": after_score,
390
+ "reward": summary["last_episode_reward"],
391
+ "avg_fn1_valid": summary.get("avg_fn1_valid", 0.0),
392
+ "avg_fn2_no_halluc": summary.get("avg_fn2_no_halluc", 0.0),
393
+ "invalid_steps": summary.get("invalid_action_steps", 0),
394
+ "hallucination_steps": summary.get("hallucination_steps", 0),
395
+ },
396
+ "improvement": {
397
+ "score_delta": delta,
398
+ "score_pct": pct,
399
+ "verdict": (
400
+ "LLM significantly outperforms baseline"
401
+ if delta > 0.10 else
402
+ "LLM moderately outperforms baseline"
403
+ if delta > 0.0 else
404
+ "LLM needs more training"
405
+ ),
406
+ },
407
+ }
app/tasks.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tasks.py — Gov Workflow OpenEnv v2.0
3
+ Three deterministic benchmark tasks: easy, medium, hard.
4
+ """
5
+ from app.models import (
6
+ TaskConfig, ServiceType, ScenarioMode, EventType, OfficerPool
7
+ )
8
+
9
+ TASK_EASY = TaskConfig(
10
+ task_id="district_backlog_easy",
11
+ display_name="District Backlog Clearance — Revenue Office",
12
+ difficulty="easy",
13
+ scenario_mode=ScenarioMode.NORMAL,
14
+ seed=42,
15
+ max_days=30,
16
+ enabled_services=[ServiceType.INCOME_CERTIFICATE],
17
+ arrival_rate_per_day={ServiceType.INCOME_CERTIFICATE: 12.0},
18
+ digital_intake_ratio=0.65,
19
+ initial_officer_pool=OfficerPool(
20
+ total_officers=8, available_officers=8,
21
+ allocated={ServiceType.INCOME_CERTIFICATE: 8},
22
+ ),
23
+ missing_docs_probability_override={ServiceType.INCOME_CERTIFICATE: 0.20},
24
+ field_verification_probability_override={ServiceType.INCOME_CERTIFICATE: 0.15},
25
+ escalation_budget=5,
26
+ fairness_threshold=None,
27
+ event_probability=0.05,
28
+ allowed_events=[EventType.NO_EVENT],
29
+ )
30
+
31
+ TASK_MEDIUM = TaskConfig(
32
+ task_id="mixed_urgency_medium",
33
+ display_name="Mixed Urgency Backlog — Taluka Office",
34
+ difficulty="medium",
35
+ scenario_mode=ScenarioMode.NORMAL,
36
+ seed=123,
37
+ max_days=45,
38
+ enabled_services=[
39
+ ServiceType.INCOME_CERTIFICATE,
40
+ ServiceType.LAND_REGISTRATION,
41
+ ServiceType.PASSPORT,
42
+ ServiceType.DRIVING_LICENSE,
43
+ ServiceType.AADHAAR_CARD,
44
+ ],
45
+ arrival_rate_per_day={
46
+ ServiceType.INCOME_CERTIFICATE: 8.0,
47
+ ServiceType.LAND_REGISTRATION: 4.0,
48
+ ServiceType.PASSPORT: 4.0,
49
+ ServiceType.DRIVING_LICENSE: 5.0,
50
+ ServiceType.AADHAAR_CARD: 6.0,
51
+ },
52
+ digital_intake_ratio=0.72,
53
+ initial_officer_pool=OfficerPool(
54
+ total_officers=14, available_officers=14,
55
+ allocated={
56
+ ServiceType.INCOME_CERTIFICATE: 4,
57
+ ServiceType.LAND_REGISTRATION: 2,
58
+ ServiceType.PASSPORT: 2,
59
+ ServiceType.DRIVING_LICENSE: 3,
60
+ ServiceType.AADHAAR_CARD: 3,
61
+ },
62
+ ),
63
+ missing_docs_probability_override=None,
64
+ field_verification_probability_override=None,
65
+ escalation_budget=8,
66
+ fairness_threshold=None,
67
+ event_probability=0.15,
68
+ allowed_events=[EventType.DOCUMENT_REJECTION_SPIKE],
69
+ )
70
+
71
+ TASK_HARD = TaskConfig(
72
+ task_id="cross_department_hard",
73
+ display_name="Cross-Department Crisis — District Collectorate",
74
+ difficulty="hard",
75
+ scenario_mode=ScenarioMode.CRISIS,
76
+ seed=999,
77
+ max_days=60,
78
+ enabled_services=[
79
+ ServiceType.INCOME_CERTIFICATE,
80
+ ServiceType.LAND_REGISTRATION,
81
+ ServiceType.PASSPORT,
82
+ ServiceType.DRIVING_LICENSE,
83
+ ServiceType.AADHAAR_CARD,
84
+ ],
85
+ arrival_rate_per_day={
86
+ ServiceType.INCOME_CERTIFICATE: 11.0,
87
+ ServiceType.LAND_REGISTRATION: 6.0,
88
+ ServiceType.PASSPORT: 6.0,
89
+ ServiceType.DRIVING_LICENSE: 7.0,
90
+ ServiceType.AADHAAR_CARD: 8.0,
91
+ },
92
+ digital_intake_ratio=0.80,
93
+ initial_officer_pool=OfficerPool(
94
+ total_officers=18, available_officers=18,
95
+ allocated={
96
+ ServiceType.INCOME_CERTIFICATE: 5,
97
+ ServiceType.LAND_REGISTRATION: 3,
98
+ ServiceType.PASSPORT: 3,
99
+ ServiceType.DRIVING_LICENSE: 3,
100
+ ServiceType.AADHAAR_CARD: 4,
101
+ },
102
+ ),
103
+ missing_docs_probability_override=None,
104
+ field_verification_probability_override=None,
105
+ escalation_budget=10,
106
+ fairness_threshold=0.70,
107
+ event_probability=0.30,
108
+ allowed_events=[
109
+ EventType.SURGE_APPLICATIONS,
110
+ EventType.OFFICER_UNAVAILABLE,
111
+ EventType.DOCUMENT_REJECTION_SPIKE,
112
+ EventType.REVENUE_DB_DELAY,
113
+ EventType.SLA_ESCALATION_ORDER,
114
+ ],
115
+ )
116
+
117
+ def make_extreme_variant(base_task: TaskConfig) -> TaskConfig:
118
+ variant = base_task.model_copy(deep=True)
119
+ variant.task_id = base_task.task_id + "_extreme"
120
+ variant.display_name = base_task.display_name + " [EXTREME]"
121
+ variant.scenario_mode = ScenarioMode.EXTREME_OVERLOAD
122
+ variant.event_probability = min(1.0, base_task.event_probability * 3.0)
123
+ variant.allowed_events = [e for e in EventType if e != EventType.NO_EVENT]
124
+ return variant
125
+
126
+ TASK_REGISTRY: dict = {
127
+ "district_backlog_easy": TASK_EASY,
128
+ "mixed_urgency_medium": TASK_MEDIUM,
129
+ "cross_department_hard": TASK_HARD,
130
+ "district_backlog_easy_extreme": make_extreme_variant(TASK_EASY),
131
+ }
132
+
133
+ def get_task(task_id: str) -> TaskConfig:
134
+ if task_id not in TASK_REGISTRY:
135
+ raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASK_REGISTRY)}")
136
+ return TASK_REGISTRY[task_id]
137
+
138
+ def list_tasks() -> list:
139
+ return list(TASK_REGISTRY.keys())
140
+
141
+ def list_benchmark_tasks() -> list:
142
+ return ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]
143
+
144
+ TASKS = TASK_REGISTRY
app/training_jobs.py ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import threading
9
+ import time
10
+ import math
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+ from typing import Any, Literal
15
+ from uuid import uuid4
16
+
17
+ from app.persistence import PersistenceStore
18
+
19
+ Status = Literal["queued", "running", "completed", "failed", "stopped"]
20
+
21
+ _PROGRESS_RE = re.compile(r"(\d[\d,]*)/(\d[\d,]*)")
22
+ _METRIC_ROW_RE = re.compile(r"\|\s*([a-zA-Z0-9_ ]+?)\s*\|\s*(-?\d+(?:\.\d+)?)\s*\|")
23
+ _EVAL_PROGRESS_RE = re.compile(
24
+ r"Eval\s+num_timesteps=(\d+),\s*episode_reward=([-]?\d+(?:\.\d+)?)",
25
+ re.IGNORECASE,
26
+ )
27
+ _EVAL_ROW_RE = re.compile(
28
+ r"^\[Eval\]\s+([a-z_]+)\s+score=([0-9.]+)\s+reward=([-0-9.]+)\s+completed=(\d+)\s+sla_breaches=(\d+)$"
29
+ )
30
+ _AVG_RE = re.compile(r"^\[Eval\]\s+Average grader score:\s+([0-9.]+)$")
31
+ _BEST_GRADER_RE = re.compile(
32
+ r"\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)",
33
+ re.IGNORECASE,
34
+ )
35
+
36
+
37
+ def _now() -> float:
38
+ return time.time()
39
+
40
+
41
+ def _tail_append(lines: list[str], line: str, max_size: int = 500) -> None:
42
+ lines.append(line.rstrip("\n"))
43
+ if len(lines) > max_size:
44
+ del lines[: len(lines) - max_size]
45
+
46
+
47
+ def _normalize_metric_key(raw: str) -> str:
48
+ return raw.strip().lower().replace(" ", "_")
49
+
50
+
51
+ def _parse_eval(stdout: str) -> tuple[list[dict[str, Any]], float | None]:
52
+ rows: list[dict[str, Any]] = []
53
+ avg: float | None = None
54
+ for line in stdout.splitlines():
55
+ line = line.strip()
56
+ if not line:
57
+ continue
58
+ row = _EVAL_ROW_RE.match(line)
59
+ if row:
60
+ rows.append(
61
+ {
62
+ "task_id": row.group(1),
63
+ "grader_score": float(row.group(2)),
64
+ "total_reward": float(row.group(3)),
65
+ "total_completed": int(row.group(4)),
66
+ "total_sla_breaches": int(row.group(5)),
67
+ }
68
+ )
69
+ continue
70
+ m = _AVG_RE.match(line)
71
+ if m:
72
+ avg = float(m.group(1))
73
+ return rows, avg
74
+
75
+
76
+ @dataclass
77
+ class TrainingJob:
78
+ job_id: str
79
+ phase: int
80
+ timesteps: int
81
+ n_envs: int
82
+ seed: int
83
+ config_path: str
84
+ created_at: float = field(default_factory=_now)
85
+ started_at: float | None = None
86
+ updated_at: float = field(default_factory=_now)
87
+ ended_at: float | None = None
88
+ status: Status = "queued"
89
+ progress: float = 0.0
90
+ process_id: int | None = None
91
+ command: list[str] = field(default_factory=list)
92
+ output_model_path: str | None = None
93
+ output_model_name: str | None = None
94
+ latest_metrics: dict[str, float] = field(default_factory=dict)
95
+ metric_history: list[dict[str, Any]] = field(default_factory=list)
96
+ evaluation_rows: list[dict[str, Any]] = field(default_factory=list)
97
+ evaluation_avg_score: float | None = None
98
+ logs_tail: list[str] = field(default_factory=list)
99
+ error_message: str | None = None
100
+ return_code: int | None = None
101
+
102
+ process: subprocess.Popen[str] | None = field(default=None, repr=False)
103
+ lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
104
+ last_persist_at: float = field(default_factory=lambda: 0.0, repr=False)
105
+
106
+ def snapshot(self) -> dict[str, Any]:
107
+ with self.lock:
108
+ return {
109
+ "job_id": self.job_id,
110
+ "phase": self.phase,
111
+ "timesteps": self.timesteps,
112
+ "n_envs": self.n_envs,
113
+ "seed": self.seed,
114
+ "config_path": self.config_path,
115
+ "created_at": self.created_at,
116
+ "started_at": self.started_at,
117
+ "updated_at": self.updated_at,
118
+ "ended_at": self.ended_at,
119
+ "status": self.status,
120
+ "progress": self.progress,
121
+ "process_id": self.process_id,
122
+ "command": self.command,
123
+ "output_model_path": self.output_model_path,
124
+ "output_model_name": self.output_model_name,
125
+ "latest_metrics": dict(self.latest_metrics),
126
+ "metric_history": list(self.metric_history),
127
+ "evaluation_rows": list(self.evaluation_rows),
128
+ "evaluation_avg_score": self.evaluation_avg_score,
129
+ "logs_tail": list(self.logs_tail),
130
+ "error_message": self.error_message,
131
+ "return_code": self.return_code,
132
+ }
133
+
134
+
135
+ class TrainingJobManager:
136
+ def __init__(self, repo_root: Path, persistence: PersistenceStore | None = None) -> None:
137
+ self._repo_root = repo_root
138
+ self._persistence = persistence
139
+ self._jobs: dict[str, TrainingJob] = {}
140
+ self._lock = threading.Lock()
141
+ self._training_runs_root = (
142
+ self._persistence.training_runs_dir
143
+ if self._persistence is not None and self._persistence.enabled
144
+ else self._repo_root / "results" / "training_runs"
145
+ )
146
+ self._load_persisted_jobs()
147
+
148
+ def _load_persisted_jobs(self) -> None:
149
+ if self._persistence is None or not self._persistence.enabled:
150
+ return
151
+ persisted = self._persistence.list_training_jobs(limit=500)
152
+ with self._lock:
153
+ for snap in persisted:
154
+ try:
155
+ job = TrainingJob(
156
+ job_id=str(snap["job_id"]),
157
+ phase=int(snap["phase"]),
158
+ timesteps=int(snap["timesteps"]),
159
+ n_envs=int(snap["n_envs"]),
160
+ seed=int(snap["seed"]),
161
+ config_path=str(snap.get("config_path") or ""),
162
+ created_at=float(snap.get("created_at") or _now()),
163
+ started_at=float(snap["started_at"]) if snap.get("started_at") is not None else None,
164
+ updated_at=float(snap.get("updated_at") or _now()),
165
+ ended_at=float(snap["ended_at"]) if snap.get("ended_at") is not None else None,
166
+ status=str(snap.get("status") or "failed"),
167
+ progress=float(snap.get("progress") or 0.0),
168
+ process_id=int(snap["process_id"]) if snap.get("process_id") is not None else None,
169
+ command=list(snap.get("command") or []),
170
+ output_model_path=snap.get("output_model_path"),
171
+ output_model_name=snap.get("output_model_name"),
172
+ latest_metrics=dict(snap.get("latest_metrics") or {}),
173
+ metric_history=list(snap.get("metric_history") or []),
174
+ evaluation_rows=list(snap.get("evaluation_rows") or []),
175
+ evaluation_avg_score=(
176
+ float(snap["evaluation_avg_score"])
177
+ if snap.get("evaluation_avg_score") is not None
178
+ else None
179
+ ),
180
+ logs_tail=list(snap.get("logs_tail") or []),
181
+ error_message=snap.get("error_message"),
182
+ return_code=int(snap["return_code"]) if snap.get("return_code") is not None else None,
183
+ )
184
+ except Exception:
185
+ continue
186
+
187
+ # Process handles cannot survive a server restart. Recover to terminal state.
188
+ if job.status in ("queued", "running"):
189
+ job.status = "failed"
190
+ msg = "Recovered after restart: previous process state unavailable."
191
+ job.error_message = f"{job.error_message} {msg}".strip() if job.error_message else msg
192
+ if job.ended_at is None:
193
+ job.ended_at = _now()
194
+ job.process = None
195
+ self._jobs[job.job_id] = job
196
+
197
+ def clear_jobs(self, *, clear_artifacts: bool = False) -> int:
198
+ to_stop: list[subprocess.Popen[str]] = []
199
+ with self._lock:
200
+ removed = len(self._jobs)
201
+ for job in self._jobs.values():
202
+ with job.lock:
203
+ proc = job.process
204
+ if proc is not None and job.status in ("queued", "running"):
205
+ to_stop.append(proc)
206
+ self._jobs.clear()
207
+ for proc in to_stop:
208
+ try:
209
+ proc.terminate()
210
+ except Exception:
211
+ pass
212
+ if self._persistence is not None and self._persistence.enabled:
213
+ self._persistence.clear_training_jobs()
214
+ if clear_artifacts:
215
+ try:
216
+ if self._training_runs_root.exists():
217
+ shutil.rmtree(self._training_runs_root, ignore_errors=True)
218
+ self._training_runs_root.mkdir(parents=True, exist_ok=True)
219
+ except Exception:
220
+ pass
221
+ return removed
222
+
223
+ def _persist_job(self, job: TrainingJob) -> None:
224
+ if self._persistence is None or not self._persistence.enabled:
225
+ return
226
+ snapshot = job.snapshot()
227
+ self._persistence.upsert_training_job(snapshot)
228
+ with job.lock:
229
+ job.last_persist_at = _now()
230
+
231
+ def list_jobs(self) -> list[dict[str, Any]]:
232
+ with self._lock:
233
+ jobs = list(self._jobs.values())
234
+ jobs.sort(key=lambda x: x.created_at, reverse=True)
235
+ return [job.snapshot() for job in jobs]
236
+
237
+ def get_job(self, job_id: str) -> dict[str, Any] | None:
238
+ with self._lock:
239
+ job = self._jobs.get(job_id)
240
+ return None if job is None else job.snapshot()
241
+
242
+ def start_job(
243
+ self,
244
+ *,
245
+ phase: int,
246
+ timesteps: int,
247
+ n_envs: int,
248
+ seed: int | None,
249
+ config_path: str | None,
250
+ ) -> dict[str, Any]:
251
+ job_id = str(uuid4())
252
+ job_seed = int(seed if seed is not None else int(time.time()) % 1_000_000)
253
+ cfg = config_path or (
254
+ "rl/configs/ppo_easy.yaml" if phase == 1 else "rl/configs/curriculum.yaml"
255
+ )
256
+ job = TrainingJob(
257
+ job_id=job_id,
258
+ phase=phase,
259
+ timesteps=timesteps,
260
+ n_envs=n_envs,
261
+ seed=job_seed,
262
+ config_path=cfg,
263
+ )
264
+
265
+ with self._lock:
266
+ self._jobs[job_id] = job
267
+
268
+ cmd = [
269
+ sys.executable,
270
+ "-u",
271
+ "-m",
272
+ "rl.train_ppo",
273
+ "--phase",
274
+ str(phase),
275
+ "--timesteps",
276
+ str(timesteps),
277
+ "--n-envs",
278
+ str(n_envs),
279
+ "--seed",
280
+ str(job_seed),
281
+ ]
282
+ if phase == 1:
283
+ # Keep Phase 1 UI responsive by emitting multiple eval checkpoints
284
+ # across the requested run length instead of only near the end.
285
+ phase1_eval_freq = max(128, int((timesteps / max(n_envs, 1)) / 15))
286
+ cmd.extend(
287
+ [
288
+ "--phase1-config",
289
+ cfg,
290
+ "--phase1-eval-freq",
291
+ str(phase1_eval_freq),
292
+ ]
293
+ )
294
+ else:
295
+ cmd.extend(["--phase2-config", cfg])
296
+
297
+ env = os.environ.copy()
298
+ env["PYTHONUNBUFFERED"] = "1"
299
+
300
+ proc = subprocess.Popen(
301
+ cmd,
302
+ cwd=str(self._repo_root),
303
+ env=env,
304
+ stdout=subprocess.PIPE,
305
+ stderr=subprocess.STDOUT,
306
+ text=True,
307
+ bufsize=1,
308
+ )
309
+
310
+ with job.lock:
311
+ job.command = cmd
312
+ job.status = "running"
313
+ job.started_at = _now()
314
+ job.updated_at = _now()
315
+ job.process_id = proc.pid
316
+ job.process = proc
317
+ _tail_append(job.logs_tail, f"[training_jobs] started pid={proc.pid}")
318
+ _tail_append(job.logs_tail, f"[training_jobs] command: {' '.join(cmd)}")
319
+ self._persist_job(job)
320
+
321
+ t = threading.Thread(target=self._watch_job, args=(job,), daemon=True)
322
+ t.start()
323
+
324
+ return job.snapshot()
325
+
326
+ @staticmethod
327
+ def _append_metric_point_locked(
328
+ job: TrainingJob,
329
+ *,
330
+ timesteps: float | None,
331
+ reward: float | None = None,
332
+ score: float | None = None,
333
+ source: str | None = None,
334
+ max_points: int = 5000,
335
+ ) -> None:
336
+ """
337
+ Append (or merge) a structured metric point while holding job.lock.
338
+ """
339
+ if timesteps is None or not math.isfinite(float(timesteps)):
340
+ return
341
+
342
+ payload: dict[str, Any] = {"t": float(timesteps)}
343
+ if reward is not None and math.isfinite(float(reward)):
344
+ payload["ep_rew_mean"] = float(reward)
345
+ if score is not None and math.isfinite(float(score)):
346
+ payload["grader_score"] = float(score)
347
+ if source:
348
+ payload["source"] = str(source)
349
+
350
+ if "ep_rew_mean" not in payload and "grader_score" not in payload:
351
+ return
352
+
353
+ if job.metric_history and float(job.metric_history[-1].get("t", -1.0)) == float(payload["t"]):
354
+ job.metric_history[-1].update(payload)
355
+ else:
356
+ job.metric_history.append(payload)
357
+
358
+ if len(job.metric_history) > max_points:
359
+ del job.metric_history[: len(job.metric_history) - max_points]
360
+
361
+ def stop_job(self, job_id: str) -> dict[str, Any] | None:
362
+ with self._lock:
363
+ job = self._jobs.get(job_id)
364
+ if job is None:
365
+ return None
366
+
367
+ with job.lock:
368
+ proc = job.process
369
+ if proc is None or job.status not in ("running", "queued"):
370
+ return job.snapshot()
371
+ job.status = "stopped"
372
+ job.updated_at = _now()
373
+ self._persist_job(job)
374
+
375
+ try:
376
+ proc.terminate()
377
+ except Exception:
378
+ pass
379
+ return job.snapshot()
380
+
381
+ def delete_job(self, job_id: str, *, clear_artifacts: bool = False) -> bool:
382
+ with self._lock:
383
+ job = self._jobs.pop(job_id, None)
384
+ if job is None:
385
+ return False
386
+
387
+ with job.lock:
388
+ proc = job.process
389
+ status = job.status
390
+ output_model_path = job.output_model_path
391
+
392
+ if proc is not None and status in ("queued", "running"):
393
+ try:
394
+ proc.terminate()
395
+ except Exception:
396
+ pass
397
+
398
+ if self._persistence is not None and self._persistence.enabled:
399
+ self._persistence.delete_training_job(job_id)
400
+
401
+ if clear_artifacts and output_model_path:
402
+ try:
403
+ out = Path(output_model_path)
404
+ if out.exists() and out.is_file():
405
+ out.unlink(missing_ok=True)
406
+ parent = out.parent
407
+ if parent.exists() and parent.is_dir() and not any(parent.iterdir()):
408
+ parent.rmdir()
409
+ except Exception:
410
+ pass
411
+ return True
412
+
413
+ def _watch_job(self, job: TrainingJob) -> None:
414
+ proc = job.process
415
+ if proc is None or proc.stdout is None:
416
+ with job.lock:
417
+ job.status = "failed"
418
+ job.error_message = "Training process failed to start."
419
+ job.updated_at = _now()
420
+ job.ended_at = _now()
421
+ self._persist_job(job)
422
+ return
423
+
424
+ for line in proc.stdout:
425
+ self._update_from_line(job, line)
426
+
427
+ return_code = proc.wait()
428
+ with job.lock:
429
+ job.return_code = int(return_code)
430
+ if job.status == "stopped":
431
+ job.ended_at = _now()
432
+ job.updated_at = _now()
433
+ job.process = None
434
+ return
435
+ if return_code == 0:
436
+ job.status = "completed"
437
+ job.progress = 1.0
438
+ else:
439
+ job.status = "failed"
440
+ base_error = f"Training exited with code {return_code}."
441
+ if not job.logs_tail:
442
+ _tail_append(
443
+ job.logs_tail,
444
+ "[training_jobs] Process ended before producing logs. "
445
+ "Check RL dependencies/environment and training command arguments.",
446
+ )
447
+ job.error_message = base_error
448
+ job.ended_at = _now()
449
+ job.updated_at = _now()
450
+ job.process = None
451
+ self._persist_job(job)
452
+
453
+ if return_code == 0:
454
+ self._finalize_artifacts(job)
455
+
456
+ def _update_from_line(self, job: TrainingJob, line: str) -> None:
457
+ line = line.rstrip("\n")
458
+ should_persist = False
459
+ with job.lock:
460
+ _tail_append(job.logs_tail, line)
461
+ job.updated_at = _now()
462
+
463
+ p = _PROGRESS_RE.search(line)
464
+ if p:
465
+ num = int(p.group(1).replace(",", ""))
466
+ den = int(p.group(2).replace(",", ""))
467
+ if den > 0:
468
+ job.progress = max(0.0, min(1.0, num / den))
469
+
470
+ ep = _EVAL_PROGRESS_RE.search(line)
471
+ if ep:
472
+ ts = int(ep.group(1))
473
+ rew = float(ep.group(2))
474
+ job.latest_metrics["total_timesteps"] = float(ts)
475
+ job.latest_metrics["ep_rew_mean"] = rew
476
+ self._append_metric_point_locked(
477
+ job,
478
+ timesteps=float(ts),
479
+ reward=rew,
480
+ source="eval_progress",
481
+ )
482
+ if job.timesteps > 0:
483
+ job.progress = max(0.0, min(1.0, ts / float(job.timesteps)))
484
+
485
+ m = _METRIC_ROW_RE.search(line)
486
+ if m:
487
+ key = _normalize_metric_key(m.group(1))
488
+ val = float(m.group(2))
489
+ interesting = {
490
+ "total_timesteps",
491
+ "ep_rew_mean",
492
+ "ep_len_mean",
493
+ "grader_score",
494
+ "mean_reward",
495
+ "mean_ep_length",
496
+ "episode_mean_sla_penalty",
497
+ "episode_mean_fairness_penalty",
498
+ "explained_variance",
499
+ "approx_kl",
500
+ }
501
+ if key in interesting:
502
+ job.latest_metrics[key] = val
503
+ current_ts = job.latest_metrics.get("total_timesteps")
504
+ if key == "total_timesteps":
505
+ self._append_metric_point_locked(
506
+ job,
507
+ timesteps=val,
508
+ reward=job.latest_metrics.get("ep_rew_mean"),
509
+ score=job.latest_metrics.get("grader_score") or job.latest_metrics.get("avg_grader_score"),
510
+ source="metrics_row_ts",
511
+ )
512
+ elif key in {"ep_rew_mean", "mean_reward"}:
513
+ self._append_metric_point_locked(
514
+ job,
515
+ timesteps=float(current_ts) if current_ts is not None else None,
516
+ reward=val,
517
+ source="metrics_row_reward",
518
+ )
519
+ elif key in {"grader_score", "avg_grader_score"}:
520
+ self._append_metric_point_locked(
521
+ job,
522
+ timesteps=float(current_ts) if current_ts is not None else None,
523
+ score=val,
524
+ source="metrics_row_score",
525
+ )
526
+
527
+ best = _BEST_GRADER_RE.search(line)
528
+ if best:
529
+ score = float(best.group(1))
530
+ job.latest_metrics["grader_score"] = score
531
+ fallback_ts = (
532
+ float(job.latest_metrics.get("total_timesteps"))
533
+ if "total_timesteps" in job.latest_metrics
534
+ else float(job.metric_history[-1]["t"]) if job.metric_history else 0.0
535
+ )
536
+ self._append_metric_point_locked(
537
+ job,
538
+ timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
539
+ score=score,
540
+ source="best_grader",
541
+ )
542
+
543
+ avg_line = _AVG_RE.match(line.strip())
544
+ if avg_line:
545
+ avg_score = float(avg_line.group(1))
546
+ job.latest_metrics["avg_grader_score"] = avg_score
547
+ fallback_ts = (
548
+ float(job.latest_metrics.get("total_timesteps"))
549
+ if "total_timesteps" in job.latest_metrics
550
+ else float(job.metric_history[-1]["t"]) if job.metric_history else 0.0
551
+ )
552
+ self._append_metric_point_locked(
553
+ job,
554
+ timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
555
+ score=avg_score,
556
+ source="avg_grader",
557
+ )
558
+ if job.updated_at - job.last_persist_at >= 1.5:
559
+ should_persist = True
560
+ if should_persist:
561
+ self._persist_job(job)
562
+
563
+ def _finalize_artifacts(self, job: TrainingJob) -> None:
564
+ src_name = "phase1_final.zip" if job.phase == 1 else "phase2_final.zip"
565
+ src = self._repo_root / "results" / "best_model" / src_name
566
+ run_dir = self._training_runs_root / job.job_id
567
+ run_dir.mkdir(parents=True, exist_ok=True)
568
+
569
+ # Keep a mirror under repo/results for local developer convenience.
570
+ mirror_dir = self._repo_root / "results" / "training_runs" / job.job_id
571
+ if mirror_dir != run_dir:
572
+ mirror_dir.mkdir(parents=True, exist_ok=True)
573
+
574
+ if src.exists():
575
+ ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
576
+ unique_name = f"phase{job.phase}_seed{job.seed}_{ts}_{job.job_id[:8]}.zip"
577
+ out = run_dir / unique_name
578
+ shutil.copy2(src, out)
579
+ if mirror_dir != run_dir:
580
+ try:
581
+ shutil.copy2(src, mirror_dir / unique_name)
582
+ except Exception:
583
+ pass
584
+ with job.lock:
585
+ job.output_model_path = str(out.resolve())
586
+ job.output_model_name = unique_name
587
+ job.updated_at = _now()
588
+
589
+ model_type = "maskable"
590
+ eval_cmd = [
591
+ sys.executable,
592
+ "-m",
593
+ "rl.evaluate",
594
+ "--model",
595
+ str(out),
596
+ "--episodes",
597
+ "3",
598
+ "--model-type",
599
+ model_type,
600
+ ]
601
+ proc = subprocess.run(
602
+ eval_cmd,
603
+ cwd=str(self._repo_root),
604
+ env=os.environ.copy(),
605
+ capture_output=True,
606
+ text=True,
607
+ check=False,
608
+ )
609
+ rows, avg = _parse_eval(proc.stdout or "")
610
+ with job.lock:
611
+ job.evaluation_rows = rows
612
+ job.evaluation_avg_score = avg
613
+ if avg is not None:
614
+ job.latest_metrics["avg_grader_score"] = float(avg)
615
+ fallback_ts = (
616
+ float(job.latest_metrics.get("total_timesteps"))
617
+ if "total_timesteps" in job.latest_metrics
618
+ else float(job.timesteps)
619
+ )
620
+ self._append_metric_point_locked(
621
+ job,
622
+ timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
623
+ score=float(avg),
624
+ source="final_eval_avg",
625
+ )
626
+ _tail_append(job.logs_tail, "----- EVALUATION -----")
627
+ for ln in (proc.stdout or "").splitlines():
628
+ _tail_append(job.logs_tail, ln)
629
+ if proc.returncode != 0 and not job.error_message:
630
+ job.error_message = f"Evaluation exited with code {proc.returncode}."
631
+ job.updated_at = _now()
632
+ self._persist_job(job)
633
+ else:
634
+ self._persist_job(job)
app/utils.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py — Shared pure-function helpers.
3
+ No imports from env.py or simulator.py (prevents circular imports).
4
+ """
5
+ from __future__ import annotations
6
+ from app.models import ServiceType
7
+
8
+
9
+ def completion_fairness_gap(
10
+ arrived_by_service: dict,
11
+ completed_by_service: dict,
12
+ ) -> float:
13
+ """
14
+ Fairness gap = max completion rate difference across services.
15
+ Returns 0.0 if only one service, 1.0 if perfectly unfair.
16
+ """
17
+ rates = []
18
+ for svc in arrived_by_service:
19
+ arrived = arrived_by_service.get(svc, 0)
20
+ completed = completed_by_service.get(svc, 0)
21
+ if arrived > 0:
22
+ rates.append(completed / arrived)
23
+ if len(rates) < 2:
24
+ return 0.0
25
+ return round(max(rates) - min(rates), 4)
app/web/app.js ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const state = {
2
+ sessionId: null,
3
+ taskId: "district_backlog_easy",
4
+ agentPolicy: "backlog_clearance",
5
+ availableAgents: [],
6
+ trace: [],
7
+ running: false,
8
+ };
9
+
10
+ const AGENTS_FALLBACK = ["urgent_first", "oldest_first", "backlog_clearance"];
11
+
12
+ const els = {
13
+ taskSelect: document.getElementById("taskSelect"),
14
+ agentSelect: document.getElementById("agentSelect"),
15
+ stepsInput: document.getElementById("stepsInput"),
16
+ startRunBtn: document.getElementById("startRunBtn"),
17
+ resetSessionBtn: document.getElementById("resetSessionBtn"),
18
+ statusLine: document.getElementById("statusLine"),
19
+ stepTableBody: document.querySelector("#stepTable tbody"),
20
+ runChart: document.getElementById("runChart"),
21
+ benchTaskSelect: document.getElementById("benchTaskSelect"),
22
+ benchRunsInput: document.getElementById("benchRunsInput"),
23
+ benchStepsInput: document.getElementById("benchStepsInput"),
24
+ runBenchmarkBtn: document.getElementById("runBenchmarkBtn"),
25
+ benchChart: document.getElementById("benchChart"),
26
+ benchTableBody: document.querySelector("#benchTable tbody"),
27
+ kpiReward: document.getElementById("kpiReward"),
28
+ kpiBacklog: document.getElementById("kpiBacklog"),
29
+ kpiCompleted: document.getElementById("kpiCompleted"),
30
+ kpiSla: document.getElementById("kpiSla"),
31
+ kpiFairness: document.getElementById("kpiFairness"),
32
+ kpiScore: document.getElementById("kpiScore"),
33
+ };
34
+
35
+ function setStatus(msg) {
36
+ els.statusLine.textContent = msg;
37
+ }
38
+
39
+ async function api(path, options = {}) {
40
+ const response = await fetch(`/api${path}`, {
41
+ headers: { "Content-Type": "application/json" },
42
+ ...options,
43
+ });
44
+ let payload = null;
45
+ try {
46
+ payload = await response.json();
47
+ } catch (e) {
48
+ payload = null;
49
+ }
50
+ if (!response.ok) {
51
+ const detail = payload && payload.detail ? payload.detail : `${response.status}`;
52
+ throw new Error(`API ${path} failed: ${detail}`);
53
+ }
54
+ return payload;
55
+ }
56
+
57
+ function setLoading(isLoading) {
58
+ state.running = isLoading;
59
+ els.startRunBtn.disabled = isLoading;
60
+ els.resetSessionBtn.disabled = isLoading;
61
+ els.runBenchmarkBtn.disabled = isLoading;
62
+ }
63
+
64
+ function formatFloat(v) {
65
+ return Number(v).toFixed(2);
66
+ }
67
+
68
+ function updateKpis(step) {
69
+ if (!step) return;
70
+ const totalReward = state.trace.reduce((sum, row) => sum + row.reward, 0);
71
+ els.kpiReward.textContent = formatFloat(totalReward);
72
+ els.kpiBacklog.textContent = `${step.backlog}`;
73
+ els.kpiCompleted.textContent = `${step.completed}`;
74
+ els.kpiSla.textContent = `${step.slaBreaches}`;
75
+ els.kpiFairness.textContent = formatFloat(step.fairnessGap);
76
+ }
77
+
78
+ function renderAction(actionObj) {
79
+ if (!actionObj || typeof actionObj !== "object") {
80
+ return "unknown";
81
+ }
82
+ const actionType = actionObj.action_type || "unknown";
83
+ const extras = [];
84
+ if (actionObj.service) extras.push(`service=${actionObj.service}`);
85
+ if (actionObj.target_service) extras.push(`target=${actionObj.target_service}`);
86
+ if (typeof actionObj.officer_delta === "number") extras.push(`delta=${actionObj.officer_delta}`);
87
+ if (actionObj.priority_mode) extras.push(`mode=${actionObj.priority_mode}`);
88
+ return extras.length ? `${actionType} (${extras.join(", ")})` : actionType;
89
+ }
90
+
91
+ function appendStepRow(row) {
92
+ const tr = document.createElement("tr");
93
+ const status = row.done ? "done" : "running";
94
+ tr.innerHTML = `
95
+ <td>${row.step}</td>
96
+ <td>${row.day}</td>
97
+ <td>${row.action}</td>
98
+ <td>${formatFloat(row.reward)}</td>
99
+ <td>${row.backlog}</td>
100
+ <td>${row.completed}</td>
101
+ <td>${row.slaBreaches}</td>
102
+ <td>${status}</td>
103
+ `;
104
+ els.stepTableBody.appendChild(tr);
105
+ }
106
+
107
+ function clearRunView() {
108
+ state.trace = [];
109
+ els.stepTableBody.innerHTML = "";
110
+ els.kpiReward.textContent = "0.00";
111
+ els.kpiBacklog.textContent = "0";
112
+ els.kpiCompleted.textContent = "0";
113
+ els.kpiSla.textContent = "0";
114
+ els.kpiFairness.textContent = "0.00";
115
+ els.kpiScore.textContent = "-";
116
+ drawRunChart([]);
117
+ }
118
+
119
+ function drawAxes(ctx, w, h, pad) {
120
+ ctx.strokeStyle = "#2f2f2f";
121
+ ctx.lineWidth = 1;
122
+ ctx.beginPath();
123
+ ctx.moveTo(pad, pad);
124
+ ctx.lineTo(pad, h - pad);
125
+ ctx.lineTo(w - pad, h - pad);
126
+ ctx.stroke();
127
+ }
128
+
129
+ function drawSeries(ctx, points, color, pad, w, h, yMax) {
130
+ if (!points.length) return;
131
+ const xStep = (w - pad * 2) / Math.max(points.length - 1, 1);
132
+ ctx.strokeStyle = color;
133
+ ctx.lineWidth = 2;
134
+ ctx.beginPath();
135
+ points.forEach((v, i) => {
136
+ const x = pad + i * xStep;
137
+ const y = h - pad - (v / Math.max(yMax, 1e-6)) * (h - pad * 2);
138
+ if (i === 0) ctx.moveTo(x, y);
139
+ else ctx.lineTo(x, y);
140
+ });
141
+ ctx.stroke();
142
+ }
143
+
144
+ function drawRunChart(trace) {
145
+ const canvas = els.runChart;
146
+ const ctx = canvas.getContext("2d");
147
+ const w = canvas.width;
148
+ const h = canvas.height;
149
+ const pad = 34;
150
+
151
+ ctx.clearRect(0, 0, w, h);
152
+ drawAxes(ctx, w, h, pad);
153
+
154
+ if (!trace.length) return;
155
+
156
+ const rewards = trace.map((x) => Math.max(0, x.reward));
157
+ const backlogs = trace.map((x) => x.backlog);
158
+ const yMax = Math.max(...rewards, ...backlogs, 1);
159
+
160
+ drawSeries(ctx, rewards, "#ffffff", pad, w, h, yMax);
161
+ drawSeries(ctx, backlogs, "#7a7a7a", pad, w, h, yMax);
162
+
163
+ ctx.fillStyle = "#d2d2d2";
164
+ ctx.font = "12px Segoe UI";
165
+ ctx.fillText("reward", pad + 6, pad + 8);
166
+ ctx.fillText("backlog", pad + 70, pad + 8);
167
+ }
168
+
169
+ function drawBenchmarkChart(agentResults) {
170
+ const canvas = els.benchChart;
171
+ const ctx = canvas.getContext("2d");
172
+ const w = canvas.width;
173
+ const h = canvas.height;
174
+ const pad = 34;
175
+
176
+ ctx.clearRect(0, 0, w, h);
177
+ drawAxes(ctx, w, h, pad);
178
+
179
+ if (!agentResults.length) return;
180
+
181
+ const barAreaW = w - pad * 2;
182
+ const slotW = barAreaW / agentResults.length;
183
+
184
+ agentResults.forEach((agent, idx) => {
185
+ const cx = pad + idx * slotW + slotW / 2;
186
+ const barW = Math.max(24, slotW * 0.48);
187
+ const barH = (h - pad * 2) * Math.min(1, Math.max(0, agent.average_score));
188
+ const topY = h - pad - barH;
189
+
190
+ ctx.fillStyle = "#ffffff";
191
+ ctx.fillRect(cx - barW / 2, topY, barW, barH);
192
+
193
+ ctx.fillStyle = "#9a9a9a";
194
+ agent.runs.forEach((run, runIdx) => {
195
+ const jitter = ((runIdx % 7) - 3) * 2.5;
196
+ const dotY = h - pad - (h - pad * 2) * Math.min(1, Math.max(0, run.score));
197
+ ctx.beginPath();
198
+ ctx.arc(cx + jitter, dotY, 3, 0, Math.PI * 2);
199
+ ctx.fill();
200
+ });
201
+
202
+ ctx.fillStyle = "#d0d0d0";
203
+ ctx.font = "11px Segoe UI";
204
+ ctx.textAlign = "center";
205
+ ctx.fillText(agent.agent_policy, cx, h - 10);
206
+ });
207
+
208
+ ctx.textAlign = "start";
209
+ }
210
+
211
+ async function resetSession() {
212
+ if (state.sessionId) {
213
+ try {
214
+ await api(`/sessions/${state.sessionId}`, { method: "DELETE" });
215
+ } catch (err) {
216
+ // Ignore stale session cleanup errors; reset will still create a fresh session.
217
+ }
218
+ }
219
+
220
+ state.taskId = els.taskSelect.value;
221
+ const payload = await api("/reset", {
222
+ method: "POST",
223
+ body: JSON.stringify({ task_id: state.taskId }),
224
+ });
225
+ state.sessionId = payload.session_id;
226
+ clearRunView();
227
+ setStatus(`Session ready: ${state.sessionId.slice(0, 8)}... (${state.taskId})`);
228
+ }
229
+
230
+ async function runSimulation() {
231
+ const requestedSteps = Number(els.stepsInput.value || 0);
232
+ if (!requestedSteps || requestedSteps < 1) {
233
+ setStatus("Enter a valid step count.");
234
+ return;
235
+ }
236
+
237
+ setLoading(true);
238
+ try {
239
+ if (!state.sessionId || state.taskId !== els.taskSelect.value) {
240
+ await resetSession();
241
+ }
242
+
243
+ state.agentPolicy = els.agentSelect.value;
244
+ setStatus(`Running ${requestedSteps} steps with ${state.agentPolicy}...`);
245
+
246
+ for (let i = 0; i < requestedSteps; i += 1) {
247
+ const stepRes = await api("/autostep", {
248
+ method: "POST",
249
+ body: JSON.stringify({
250
+ session_id: state.sessionId,
251
+ agent_policy: state.agentPolicy,
252
+ }),
253
+ });
254
+
255
+ const obs = stepRes.observation;
256
+ const row = {
257
+ step: state.trace.length + 1,
258
+ day: obs.day,
259
+ action: renderAction(stepRes.action),
260
+ reward: Number(stepRes.reward || 0),
261
+ backlog: obs.total_backlog,
262
+ completed: obs.total_completed,
263
+ slaBreaches: obs.total_sla_breaches,
264
+ fairnessGap: Number(obs.fairness_gap || 0),
265
+ done: !!stepRes.done,
266
+ };
267
+ state.trace.push(row);
268
+ appendStepRow(row);
269
+ updateKpis(row);
270
+ drawRunChart(state.trace);
271
+
272
+ if (stepRes.done) break;
273
+ }
274
+
275
+ const gradeRes = await api("/grade", {
276
+ method: "POST",
277
+ body: JSON.stringify({ session_id: state.sessionId }),
278
+ });
279
+ els.kpiScore.textContent = formatFloat(gradeRes.score);
280
+ setStatus(`Run finished. Score: ${formatFloat(gradeRes.score)} (${gradeRes.grader_name})`);
281
+ } catch (err) {
282
+ setStatus(err.message);
283
+ } finally {
284
+ setLoading(false);
285
+ }
286
+ }
287
+
288
+ async function runBenchmark() {
289
+ setLoading(true);
290
+ try {
291
+ const taskId = els.benchTaskSelect.value;
292
+ const runs = Number(els.benchRunsInput.value || 0);
293
+ const maxSteps = Number(els.benchStepsInput.value || 0);
294
+ if (!runs || !maxSteps) {
295
+ setStatus("Benchmark inputs are invalid.");
296
+ return;
297
+ }
298
+
299
+ const benchmarkAgents = state.availableAgents.length ? state.availableAgents : AGENTS_FALLBACK;
300
+ setStatus(`Running benchmark on ${taskId} with ${benchmarkAgents.length} agents...`);
301
+
302
+ const res = await api("/benchmark", {
303
+ method: "POST",
304
+ body: JSON.stringify({
305
+ task_id: taskId,
306
+ runs,
307
+ max_steps: maxSteps,
308
+ agent_policies: benchmarkAgents,
309
+ }),
310
+ });
311
+
312
+ els.benchTableBody.innerHTML = "";
313
+ res.agent_results.forEach((agent) => {
314
+ const tr = document.createElement("tr");
315
+ tr.innerHTML = `
316
+ <td>${agent.agent_policy}</td>
317
+ <td>${formatFloat(agent.average_score)}</td>
318
+ <td>${formatFloat(agent.min_score)}</td>
319
+ <td>${formatFloat(agent.max_score)}</td>
320
+ `;
321
+ els.benchTableBody.appendChild(tr);
322
+ });
323
+ drawBenchmarkChart(res.agent_results);
324
+ setStatus("Benchmark completed.");
325
+ } catch (err) {
326
+ setStatus(err.message);
327
+ } finally {
328
+ setLoading(false);
329
+ }
330
+ }
331
+
332
+ async function init() {
333
+ setLoading(true);
334
+ try {
335
+ const health = await api("/health");
336
+ const tasksRes = await api("/tasks");
337
+ const agents = await api("/agents").catch(() => AGENTS_FALLBACK);
338
+
339
+ tasksRes.tasks.forEach((task) => {
340
+ const optA = new Option(task, task);
341
+ const optB = new Option(task, task);
342
+ els.taskSelect.add(optA);
343
+ els.benchTaskSelect.add(optB);
344
+ });
345
+
346
+ state.availableAgents = agents.length ? agents : AGENTS_FALLBACK;
347
+ state.availableAgents.forEach((agent) => {
348
+ els.agentSelect.add(new Option(agent, agent));
349
+ });
350
+
351
+ els.taskSelect.value = health.available_tasks.includes("district_backlog_easy")
352
+ ? "district_backlog_easy"
353
+ : tasksRes.tasks[0];
354
+ els.benchTaskSelect.value = els.taskSelect.value;
355
+ els.agentSelect.value = state.availableAgents.includes("backlog_clearance")
356
+ ? "backlog_clearance"
357
+ : state.availableAgents[0];
358
+
359
+ await resetSession();
360
+ } catch (err) {
361
+ setStatus(`Initialization failed: ${err.message}`);
362
+ } finally {
363
+ setLoading(false);
364
+ }
365
+ }
366
+
367
+ els.startRunBtn.addEventListener("click", runSimulation);
368
+ els.resetSessionBtn.addEventListener("click", async () => {
369
+ setLoading(true);
370
+ try {
371
+ await resetSession();
372
+ } catch (err) {
373
+ setStatus(err.message);
374
+ } finally {
375
+ setLoading(false);
376
+ }
377
+ });
378
+ els.runBenchmarkBtn.addEventListener("click", runBenchmark);
379
+
380
+ init();
app/web/index.html ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Gov Workflow OpenEnv Console</title>
7
+ <link rel="stylesheet" href="/ui/assets/styles.css" />
8
+ </head>
9
+ <body>
10
+ <div id="app-root" class="app-root">
11
+ <div class="boot">Loading frontend...</div>
12
+ </div>
13
+
14
+ <script>
15
+ window.__APP_MOUNTED__ = false;
16
+ setTimeout(function () {
17
+ if (!window.__APP_MOUNTED__) {
18
+ var root = document.getElementById("app-root");
19
+ if (root) {
20
+ root.innerHTML = "<div class='boot boot-error'><h2>Frontend bootstrap failed</h2><p>The React runtime could not load in this environment. Backend API is still active at <code>/docs</code>.</p></div>";
21
+ }
22
+ }
23
+ }, 6000);
24
+ </script>
25
+ <script type="module" src="/ui/assets/react_app.js"></script>
26
+ </body>
27
+ </html>
app/web/react_app.js ADDED
@@ -0,0 +1,933 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useEffect, useMemo, useRef, useState } from "https://esm.sh/react@18.3.1";
2
+ import { createRoot } from "https://esm.sh/react-dom@18.3.1/client";
3
+
4
+ async function api(path, options = {}) {
5
+ const res = await fetch(`/api${path}`, {
6
+ headers: { "Content-Type": "application/json" },
7
+ ...options,
8
+ });
9
+ let payload = null;
10
+ try {
11
+ payload = await res.json();
12
+ } catch (err) {
13
+ payload = null;
14
+ }
15
+ if (!res.ok) {
16
+ const detail = payload && payload.detail ? payload.detail : `${res.status}`;
17
+ throw new Error(`API ${path} failed: ${detail}`);
18
+ }
19
+ return payload;
20
+ }
21
+
22
+ function drawAxes(ctx, w, h, pad) {
23
+ ctx.clearRect(0, 0, w, h);
24
+ ctx.strokeStyle = "#2f2f2f";
25
+ ctx.lineWidth = 1;
26
+ ctx.beginPath();
27
+ ctx.moveTo(pad, pad);
28
+ ctx.lineTo(pad, h - pad);
29
+ ctx.lineTo(w - pad, h - pad);
30
+ ctx.stroke();
31
+ }
32
+
33
+ function LineCanvas({ pointsA, pointsB, labelA, labelB }) {
34
+ const ref = useRef(null);
35
+
36
+ useEffect(() => {
37
+ const canvas = ref.current;
38
+ if (!canvas) return;
39
+ const ctx = canvas.getContext("2d");
40
+ const w = canvas.width;
41
+ const h = canvas.height;
42
+ const pad = 34;
43
+
44
+ drawAxes(ctx, w, h, pad);
45
+
46
+ const all = [...pointsA, ...pointsB];
47
+ if (!all.length) return;
48
+
49
+ const yMax = Math.max(...all, 1);
50
+ const draw = (arr, color) => {
51
+ if (!arr.length) return;
52
+ const stepX = (w - pad * 2) / Math.max(arr.length - 1, 1);
53
+ ctx.strokeStyle = color;
54
+ ctx.lineWidth = 2;
55
+ ctx.beginPath();
56
+ arr.forEach((v, i) => {
57
+ const x = pad + i * stepX;
58
+ const y = h - pad - (v / yMax) * (h - pad * 2);
59
+ if (i === 0) ctx.moveTo(x, y);
60
+ else ctx.lineTo(x, y);
61
+ });
62
+ ctx.stroke();
63
+ };
64
+
65
+ draw(pointsA, "#ffffff");
66
+ draw(pointsB, "#808080");
67
+
68
+ ctx.fillStyle = "#d5d5d5";
69
+ ctx.font = "12px Segoe UI";
70
+ ctx.fillText(labelA, pad + 6, pad + 8);
71
+ ctx.fillText(labelB, pad + 92, pad + 8);
72
+ }, [pointsA, pointsB, labelA, labelB]);
73
+
74
+ return React.createElement("canvas", { ref, width: 1200, height: 320 });
75
+ }
76
+
77
+ function CompareCanvas({ baselineScore, rlScore }) {
78
+ const ref = useRef(null);
79
+
80
+ useEffect(() => {
81
+ const canvas = ref.current;
82
+ if (!canvas) return;
83
+ const ctx = canvas.getContext("2d");
84
+ const w = canvas.width;
85
+ const h = canvas.height;
86
+ const pad = 36;
87
+ drawAxes(ctx, w, h, pad);
88
+
89
+ if (baselineScore == null || rlScore == null) return;
90
+
91
+ const bars = [
92
+ { name: "baseline", score: baselineScore, color: "#9a9a9a", x: w * 0.35 },
93
+ { name: "phase2", score: rlScore, color: "#ffffff", x: w * 0.65 },
94
+ ];
95
+
96
+ bars.forEach((bar) => {
97
+ const barW = 120;
98
+ const barH = (h - pad * 2) * Math.max(0, Math.min(1, bar.score));
99
+ const y = h - pad - barH;
100
+ ctx.fillStyle = bar.color;
101
+ ctx.fillRect(bar.x - barW / 2, y, barW, barH);
102
+ ctx.fillStyle = "#dddddd";
103
+ ctx.font = "13px Segoe UI";
104
+ ctx.textAlign = "center";
105
+ ctx.fillText(`${bar.name}: ${bar.score.toFixed(3)}`, bar.x, h - 10);
106
+ });
107
+
108
+ ctx.textAlign = "start";
109
+ }, [baselineScore, rlScore]);
110
+
111
+ return React.createElement("canvas", { ref, width: 1200, height: 300 });
112
+ }
113
+
114
+ function formatNumber(value, digits = 2) {
115
+ if (value == null || Number.isNaN(Number(value))) return "-";
116
+ return Number(value).toFixed(digits);
117
+ }
118
+
119
+ function App() {
120
+ const [loading, setLoading] = useState(false);
121
+ const [status, setStatus] = useState("Initializing...");
122
+
123
+ const [tasks, setTasks] = useState([]);
124
+ const [agents, setAgents] = useState([]);
125
+ const [components, setComponents] = useState([]);
126
+ const [models, setModels] = useState([]);
127
+
128
+ const [taskId, setTaskId] = useState("district_backlog_easy");
129
+ const [agentPolicy, setAgentPolicy] = useState("backlog_clearance");
130
+ const [steps, setSteps] = useState(40);
131
+ const [sessionId, setSessionId] = useState("");
132
+
133
+ const [manualSeed, setManualSeed] = useState("");
134
+ const [manualActionJson, setManualActionJson] = useState('{\n "action_type": "advance_time"\n}');
135
+ const [manualOutput, setManualOutput] = useState("{}");
136
+
137
+ const [baselineTrace, setBaselineTrace] = useState([]);
138
+ const [graderScore, setGraderScore] = useState(null);
139
+
140
+ const [benchmarkRows, setBenchmarkRows] = useState([]);
141
+
142
+ const [modelPath, setModelPath] = useState("results/best_model/phase2_final.zip");
143
+ const [modelType, setModelType] = useState("maskable");
144
+ const [rlMaxSteps, setRlMaxSteps] = useState(80);
145
+ const [rlRun, setRlRun] = useState(null);
146
+ const [rlEval, setRlEval] = useState([]);
147
+
148
+ const [compareData, setCompareData] = useState({ baseline: null, rl: null });
149
+ const [workflowOutput, setWorkflowOutput] = useState("");
150
+ const [workflowMeta, setWorkflowMeta] = useState(null);
151
+
152
+ useEffect(() => {
153
+ const init = async () => {
154
+ setLoading(true);
155
+ try {
156
+ const [health, tasksRes, agentsRes, componentsRes, modelsRes] = await Promise.all([
157
+ api("/health"),
158
+ api("/tasks"),
159
+ api("/agents"),
160
+ api("/workflows/components"),
161
+ api("/rl/models"),
162
+ ]);
163
+
164
+ const taskList = tasksRes.tasks || [];
165
+ const agentList = agentsRes || [];
166
+ const modelList = (modelsRes.models || []).filter((m) => m.exists);
167
+
168
+ setTasks(taskList);
169
+ setAgents(agentList);
170
+ setComponents(componentsRes.components || []);
171
+ setModels(modelsRes.models || []);
172
+
173
+ const defaultTask = taskList.includes("district_backlog_easy") ? "district_backlog_easy" : taskList[0];
174
+ setTaskId(defaultTask || "district_backlog_easy");
175
+
176
+ const defaultAgent = agentList.includes("backlog_clearance") ? "backlog_clearance" : (agentList[0] || "backlog_clearance");
177
+ setAgentPolicy(defaultAgent);
178
+
179
+ const phase2 = modelList.find((m) => m.path.toLowerCase().includes("phase2_final")) || modelList[0];
180
+ if (phase2) {
181
+ setModelPath(phase2.path);
182
+ setModelType(phase2.model_type);
183
+ }
184
+
185
+ setStatus(`API ready (v${health.version}).`);
186
+ } catch (err) {
187
+ setStatus(err.message);
188
+ } finally {
189
+ setLoading(false);
190
+ }
191
+ };
192
+
193
+ init();
194
+ }, []);
195
+
196
+ const baselineRewards = useMemo(() => baselineTrace.map((x) => Math.max(0, Number(x.reward || 0))), [baselineTrace]);
197
+ const baselineBacklog = useMemo(() => baselineTrace.map((x) => Number(x.backlog || 0)), [baselineTrace]);
198
+
199
+ const baselineKpi = useMemo(() => {
200
+ const totalReward = baselineTrace.reduce((sum, row) => sum + Number(row.reward || 0), 0);
201
+ const last = baselineTrace.length ? baselineTrace[baselineTrace.length - 1] : null;
202
+ return {
203
+ reward: totalReward,
204
+ backlog: last ? last.backlog : 0,
205
+ completed: last ? last.completed : 0,
206
+ sla: last ? last.sla_breaches : 0,
207
+ fairness: last ? last.fairness_gap : 0,
208
+ };
209
+ }, [baselineTrace]);
210
+
211
+ const activeModel = useMemo(() => models.find((m) => m.path === modelPath), [models, modelPath]);
212
+
213
+ const manualReset = async () => {
214
+ setLoading(true);
215
+ try {
216
+ const payload = {
217
+ task_id: taskId,
218
+ };
219
+ if (manualSeed.trim()) {
220
+ payload.seed = Number(manualSeed.trim());
221
+ }
222
+ const res = await api("/reset", {
223
+ method: "POST",
224
+ body: JSON.stringify(payload),
225
+ });
226
+ setSessionId(res.session_id);
227
+ setManualOutput(JSON.stringify(res, null, 2));
228
+ setStatus(`Session created: ${res.session_id}`);
229
+ } catch (err) {
230
+ setStatus(err.message);
231
+ } finally {
232
+ setLoading(false);
233
+ }
234
+ };
235
+
236
+ const manualStep = async () => {
237
+ if (!sessionId) {
238
+ setStatus("Create a session first with Reset.");
239
+ return;
240
+ }
241
+ setLoading(true);
242
+ try {
243
+ const action = JSON.parse(manualActionJson);
244
+ const res = await api("/step", {
245
+ method: "POST",
246
+ body: JSON.stringify({ session_id: sessionId, action }),
247
+ });
248
+ setManualOutput(JSON.stringify(res, null, 2));
249
+ setStatus(`Manual step done. reward=${formatNumber(res.reward)}`);
250
+ } catch (err) {
251
+ setStatus(err.message);
252
+ } finally {
253
+ setLoading(false);
254
+ }
255
+ };
256
+
257
+ const manualState = async () => {
258
+ if (!sessionId) {
259
+ setStatus("Create a session first with Reset.");
260
+ return;
261
+ }
262
+ setLoading(true);
263
+ try {
264
+ const res = await api("/state", {
265
+ method: "POST",
266
+ body: JSON.stringify({ session_id: sessionId, include_action_history: true }),
267
+ });
268
+ setManualOutput(JSON.stringify(res, null, 2));
269
+ setStatus("State fetched.");
270
+ } catch (err) {
271
+ setStatus(err.message);
272
+ } finally {
273
+ setLoading(false);
274
+ }
275
+ };
276
+
277
+ const manualGrade = async () => {
278
+ if (!sessionId) {
279
+ setStatus("Create a session first with Reset.");
280
+ return;
281
+ }
282
+ setLoading(true);
283
+ try {
284
+ const res = await api("/grade", {
285
+ method: "POST",
286
+ body: JSON.stringify({ session_id: sessionId }),
287
+ });
288
+ setManualOutput(JSON.stringify(res, null, 2));
289
+ setStatus(`Grade score=${formatNumber(res.score, 3)} (${res.grader_name})`);
290
+ } catch (err) {
291
+ setStatus(err.message);
292
+ } finally {
293
+ setLoading(false);
294
+ }
295
+ };
296
+
297
+ const resetBaselineSession = async () => {
298
+ const res = await api("/reset", {
299
+ method: "POST",
300
+ body: JSON.stringify({ task_id: taskId }),
301
+ });
302
+ setSessionId(res.session_id);
303
+ setBaselineTrace([]);
304
+ setGraderScore(null);
305
+ return res.session_id;
306
+ };
307
+
308
+ const runBaseline = async () => {
309
+ setLoading(true);
310
+ try {
311
+ let sid = sessionId;
312
+ if (!sid) {
313
+ sid = await resetBaselineSession();
314
+ }
315
+
316
+ const rows = [];
317
+ for (let i = 0; i < Number(steps); i += 1) {
318
+ const stepRes = await api("/autostep", {
319
+ method: "POST",
320
+ body: JSON.stringify({ session_id: sid, agent_policy: agentPolicy }),
321
+ });
322
+
323
+ rows.push({
324
+ step: rows.length + 1,
325
+ day: stepRes.observation.day,
326
+ action: stepRes.action.action_type,
327
+ reward: Number(stepRes.reward || 0),
328
+ backlog: stepRes.observation.total_backlog,
329
+ completed: stepRes.observation.total_completed,
330
+ sla_breaches: stepRes.observation.total_sla_breaches,
331
+ fairness_gap: Number(stepRes.observation.fairness_gap || 0),
332
+ done: stepRes.done,
333
+ });
334
+
335
+ if (stepRes.done) break;
336
+ }
337
+
338
+ setBaselineTrace(rows);
339
+
340
+ const gradeRes = await api("/grade", {
341
+ method: "POST",
342
+ body: JSON.stringify({ session_id: sid }),
343
+ });
344
+ setGraderScore(Number(gradeRes.score));
345
+ setStatus(`Baseline run done. score=${formatNumber(gradeRes.score, 3)}`);
346
+ } catch (err) {
347
+ setStatus(err.message);
348
+ } finally {
349
+ setLoading(false);
350
+ }
351
+ };
352
+
353
+ const runBenchmark = async () => {
354
+ if (!agents.length) {
355
+ setStatus("No baseline agents available.");
356
+ return;
357
+ }
358
+ setLoading(true);
359
+ try {
360
+ const res = await api("/benchmark", {
361
+ method: "POST",
362
+ body: JSON.stringify({
363
+ task_id: taskId,
364
+ runs: 3,
365
+ max_steps: Number(steps),
366
+ agent_policies: agents,
367
+ }),
368
+ });
369
+ setBenchmarkRows(res.agent_results || []);
370
+ setStatus("Baseline benchmark done.");
371
+ } catch (err) {
372
+ setStatus(err.message);
373
+ } finally {
374
+ setLoading(false);
375
+ }
376
+ };
377
+
378
+ const runTrainedEpisode = async () => {
379
+ setLoading(true);
380
+ try {
381
+ const res = await api("/rl/run", {
382
+ method: "POST",
383
+ body: JSON.stringify({
384
+ task_id: taskId,
385
+ model_path: modelPath,
386
+ model_type: modelType,
387
+ max_steps: Number(rlMaxSteps),
388
+ }),
389
+ });
390
+ setRlRun(res);
391
+ setStatus(`Trained run done. score=${formatNumber(res.grader_score, 3)} (${res.grader_name})`);
392
+ } catch (err) {
393
+ setStatus(err.message);
394
+ } finally {
395
+ setLoading(false);
396
+ }
397
+ };
398
+
399
+ const evaluateTrainedModel = async () => {
400
+ setLoading(true);
401
+ try {
402
+ const res = await api("/rl/evaluate", {
403
+ method: "POST",
404
+ body: JSON.stringify({
405
+ model_path: modelPath,
406
+ model_type: modelType,
407
+ episodes: 3,
408
+ task_ids: tasks,
409
+ }),
410
+ });
411
+ setRlEval(res.results || []);
412
+ setStatus(`Trained evaluation done. avg=${formatNumber(res.average_grader_score, 3)}`);
413
+ } catch (err) {
414
+ setStatus(err.message);
415
+ } finally {
416
+ setLoading(false);
417
+ }
418
+ };
419
+
420
+ const compareBaselineVsPhase2 = async () => {
421
+ setLoading(true);
422
+ try {
423
+ const [base, rl] = await Promise.all([
424
+ api("/benchmark", {
425
+ method: "POST",
426
+ body: JSON.stringify({
427
+ task_id: taskId,
428
+ runs: 3,
429
+ max_steps: Number(steps),
430
+ agent_policies: [agentPolicy],
431
+ }),
432
+ }),
433
+ api("/rl/evaluate", {
434
+ method: "POST",
435
+ body: JSON.stringify({
436
+ model_path: modelPath,
437
+ model_type: modelType,
438
+ episodes: 3,
439
+ task_ids: [taskId],
440
+ }),
441
+ }),
442
+ ]);
443
+
444
+ const baselineScore = base.agent_results && base.agent_results.length
445
+ ? Number(base.agent_results[0].average_score)
446
+ : null;
447
+ const rlScore = rl.results && rl.results.length
448
+ ? Number(rl.results[0].grader_score)
449
+ : null;
450
+
451
+ setCompareData({ baseline: baselineScore, rl: rlScore });
452
+ setStatus("Comparison done.");
453
+ } catch (err) {
454
+ setStatus(err.message);
455
+ } finally {
456
+ setLoading(false);
457
+ }
458
+ };
459
+
460
+ const workflowIdForComponent = (componentName) => {
461
+ if (componentName === "baseline_openai.py") return "baseline_openai";
462
+ if (componentName === "inference.py") return "inference";
463
+ if (componentName === "phase2_final.zip") return "phase2_eval";
464
+ return null;
465
+ };
466
+
467
+ const runWorkflowFromUi = async (workflowId) => {
468
+ setLoading(true);
469
+ try {
470
+ const payload = {
471
+ workflow_id: workflowId,
472
+ max_steps: Number(steps),
473
+ episodes: 3,
474
+ model_path: modelPath,
475
+ model_type: modelType,
476
+ timeout_seconds: 240,
477
+ };
478
+ const res = await api("/workflows/run", {
479
+ method: "POST",
480
+ body: JSON.stringify(payload),
481
+ });
482
+ setWorkflowMeta({
483
+ workflow_id: res.workflow_id,
484
+ exit_code: res.exit_code,
485
+ duration_seconds: res.duration_seconds,
486
+ timed_out: res.timed_out,
487
+ command: res.command,
488
+ });
489
+ const out = [
490
+ "$ " + (res.command || []).join(" "),
491
+ "",
492
+ "STDOUT:",
493
+ res.stdout || "",
494
+ "",
495
+ "STDERR:",
496
+ res.stderr || "",
497
+ ].join("\n");
498
+ setWorkflowOutput(out);
499
+ setStatus(
500
+ `Workflow ${res.workflow_id} finished. exit_code=${res.exit_code}, duration=${formatNumber(res.duration_seconds, 2)}s`
501
+ );
502
+ } catch (err) {
503
+ setStatus(err.message);
504
+ } finally {
505
+ setLoading(false);
506
+ }
507
+ };
508
+
509
+ return React.createElement(
510
+ "div",
511
+ { className: "shell" },
512
+ React.createElement(
513
+ "header",
514
+ { className: "hero" },
515
+ React.createElement("h1", null, "Gov Workflow OpenEnv - React Console"),
516
+ React.createElement(
517
+ "p",
518
+ null,
519
+ "Shows OpenEnv API execution, baseline/inference workflow visibility, and trained Phase 2 RL model behavior from one screen."
520
+ )
521
+ ),
522
+
523
+ React.createElement("div", { className: "status" }, status),
524
+
525
+ React.createElement(
526
+ "section",
527
+ { className: "panel" },
528
+ React.createElement("h2", null, "Workflow Components Visibility"),
529
+ React.createElement(
530
+ "div",
531
+ { className: "grid cols-2" },
532
+ ...components.map((c) =>
533
+ React.createElement(
534
+ "article",
535
+ { key: c.component, className: "panel" },
536
+ React.createElement("h3", null, c.component),
537
+ React.createElement("div", { className: `badge ${c.available ? "ok" : ""}` }, c.available ? "available" : "missing"),
538
+ React.createElement("p", { className: "small" }, c.description),
539
+ c.command ? React.createElement("pre", null, c.command) : null,
540
+ workflowIdForComponent(c.component)
541
+ ? React.createElement(
542
+ "div",
543
+ { className: "btn-row", style: { marginTop: "8px" } },
544
+ React.createElement(
545
+ "button",
546
+ {
547
+ className: "secondary",
548
+ onClick: () => runWorkflowFromUi(workflowIdForComponent(c.component)),
549
+ disabled: loading,
550
+ },
551
+ "Run In Frontend"
552
+ )
553
+ )
554
+ : null,
555
+ c.notes ? React.createElement("p", { className: "small" }, c.notes) : null
556
+ )
557
+ )
558
+ ),
559
+ workflowMeta
560
+ ? React.createElement(
561
+ "div",
562
+ { className: "small", style: { marginTop: "10px" } },
563
+ `Last run: ${workflowMeta.workflow_id} | exit=${workflowMeta.exit_code} | timeout=${workflowMeta.timed_out ? "true" : "false"} | duration=${formatNumber(workflowMeta.duration_seconds, 2)}s`
564
+ )
565
+ : null,
566
+ workflowOutput ? React.createElement("pre", { style: { marginTop: "10px" } }, workflowOutput) : null
567
+ ),
568
+
569
+ React.createElement(
570
+ "section",
571
+ { className: "panel" },
572
+ React.createElement("h2", null, "OpenEnv API Runner (step/reset/state/grade)"),
573
+ React.createElement(
574
+ "div",
575
+ { className: "form-row" },
576
+ React.createElement(
577
+ "label",
578
+ null,
579
+ "Task",
580
+ React.createElement(
581
+ "select",
582
+ { value: taskId, onChange: (e) => setTaskId(e.target.value) },
583
+ ...tasks.map((t) => React.createElement("option", { key: t, value: t }, t))
584
+ )
585
+ ),
586
+ React.createElement(
587
+ "label",
588
+ null,
589
+ "Seed (optional)",
590
+ React.createElement("input", {
591
+ value: manualSeed,
592
+ onChange: (e) => setManualSeed(e.target.value),
593
+ placeholder: "11",
594
+ })
595
+ ),
596
+ React.createElement(
597
+ "label",
598
+ null,
599
+ "Session ID",
600
+ React.createElement("input", {
601
+ value: sessionId,
602
+ onChange: (e) => setSessionId(e.target.value),
603
+ placeholder: "auto after reset",
604
+ })
605
+ )
606
+ ),
607
+ React.createElement(
608
+ "label",
609
+ { style: { marginTop: "10px" } },
610
+ "Action JSON for /step",
611
+ React.createElement("textarea", {
612
+ value: manualActionJson,
613
+ onChange: (e) => setManualActionJson(e.target.value),
614
+ })
615
+ ),
616
+ React.createElement(
617
+ "div",
618
+ { className: "btn-row", style: { marginTop: "10px" } },
619
+ React.createElement("button", { onClick: manualReset, disabled: loading }, "Reset"),
620
+ React.createElement("button", { onClick: manualStep, disabled: loading }, "Step"),
621
+ React.createElement("button", { onClick: manualState, disabled: loading }, "State"),
622
+ React.createElement("button", { onClick: manualGrade, disabled: loading }, "Grade"),
623
+ ),
624
+ React.createElement("pre", { style: { marginTop: "10px" } }, manualOutput)
625
+ ),
626
+
627
+ React.createElement(
628
+ "section",
629
+ { className: "panel" },
630
+ React.createElement("h2", null, "Baseline Agent Runner (backend policy)"),
631
+ React.createElement(
632
+ "div",
633
+ { className: "form-row" },
634
+ React.createElement(
635
+ "label",
636
+ null,
637
+ "Baseline Agent",
638
+ React.createElement(
639
+ "select",
640
+ { value: agentPolicy, onChange: (e) => setAgentPolicy(e.target.value) },
641
+ ...agents.map((a) => React.createElement("option", { key: a, value: a }, a))
642
+ )
643
+ ),
644
+ React.createElement(
645
+ "label",
646
+ null,
647
+ "Steps",
648
+ React.createElement("input", {
649
+ type: "number",
650
+ min: 1,
651
+ max: 500,
652
+ value: steps,
653
+ onChange: (e) => setSteps(e.target.value),
654
+ })
655
+ )
656
+ ),
657
+ React.createElement(
658
+ "div",
659
+ { className: "btn-row", style: { marginTop: "10px" } },
660
+ React.createElement("button", { onClick: runBaseline, disabled: loading }, "Run Baseline"),
661
+ React.createElement("button", { className: "secondary", onClick: resetBaselineSession, disabled: loading }, "Reset Session"),
662
+ React.createElement("button", { className: "secondary", onClick: runBenchmark, disabled: loading }, "Run Benchmark"),
663
+ ),
664
+ React.createElement(
665
+ "div",
666
+ { className: "kpis", style: { marginTop: "10px" } },
667
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Total Reward"), React.createElement("div", { className: "v" }, formatNumber(baselineKpi.reward))),
668
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Backlog"), React.createElement("div", { className: "v" }, baselineKpi.backlog)),
669
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Completed"), React.createElement("div", { className: "v" }, baselineKpi.completed)),
670
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "SLA Breaches"), React.createElement("div", { className: "v" }, baselineKpi.sla)),
671
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Fairness Gap"), React.createElement("div", { className: "v" }, formatNumber(baselineKpi.fairness))),
672
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Grader Score"), React.createElement("div", { className: "v" }, graderScore == null ? "-" : formatNumber(graderScore, 3))),
673
+ ),
674
+ React.createElement("div", { style: { marginTop: "10px" } }, React.createElement(LineCanvas, {
675
+ pointsA: baselineRewards,
676
+ pointsB: baselineBacklog,
677
+ labelA: "reward",
678
+ labelB: "backlog",
679
+ })),
680
+ React.createElement(
681
+ "div",
682
+ { className: "table-wrap", style: { marginTop: "10px" } },
683
+ React.createElement(
684
+ "table",
685
+ null,
686
+ React.createElement(
687
+ "thead",
688
+ null,
689
+ React.createElement(
690
+ "tr",
691
+ null,
692
+ React.createElement("th", null, "Step"),
693
+ React.createElement("th", null, "Day"),
694
+ React.createElement("th", null, "Action"),
695
+ React.createElement("th", null, "Reward"),
696
+ React.createElement("th", null, "Backlog"),
697
+ React.createElement("th", null, "Completed"),
698
+ React.createElement("th", null, "SLA"),
699
+ React.createElement("th", null, "Done")
700
+ )
701
+ ),
702
+ React.createElement(
703
+ "tbody",
704
+ null,
705
+ ...baselineTrace.map((r) =>
706
+ React.createElement(
707
+ "tr",
708
+ { key: `b-${r.step}` },
709
+ React.createElement("td", null, r.step),
710
+ React.createElement("td", null, r.day),
711
+ React.createElement("td", null, r.action),
712
+ React.createElement("td", null, formatNumber(r.reward)),
713
+ React.createElement("td", null, r.backlog),
714
+ React.createElement("td", null, r.completed),
715
+ React.createElement("td", null, r.sla_breaches),
716
+ React.createElement("td", null, r.done ? "true" : "false")
717
+ )
718
+ )
719
+ )
720
+ )
721
+ ),
722
+ benchmarkRows.length
723
+ ? React.createElement(
724
+ "div",
725
+ { className: "table-wrap", style: { marginTop: "10px" } },
726
+ React.createElement(
727
+ "table",
728
+ null,
729
+ React.createElement(
730
+ "thead",
731
+ null,
732
+ React.createElement(
733
+ "tr",
734
+ null,
735
+ React.createElement("th", null, "Agent"),
736
+ React.createElement("th", null, "Avg Score"),
737
+ React.createElement("th", null, "Min"),
738
+ React.createElement("th", null, "Max")
739
+ )
740
+ ),
741
+ React.createElement(
742
+ "tbody",
743
+ null,
744
+ ...benchmarkRows.map((r) =>
745
+ React.createElement(
746
+ "tr",
747
+ { key: `bench-${r.agent_policy}` },
748
+ React.createElement("td", null, r.agent_policy),
749
+ React.createElement("td", null, formatNumber(r.average_score, 3)),
750
+ React.createElement("td", null, formatNumber(r.min_score, 3)),
751
+ React.createElement("td", null, formatNumber(r.max_score, 3))
752
+ )
753
+ )
754
+ )
755
+ )
756
+ )
757
+ : null
758
+ ),
759
+
760
+ React.createElement(
761
+ "section",
762
+ { className: "panel" },
763
+ React.createElement("h2", null, "Trained RL Model (Phase 2 / Phase 3)"),
764
+ React.createElement(
765
+ "div",
766
+ { className: "form-row" },
767
+ React.createElement(
768
+ "label",
769
+ null,
770
+ "Model",
771
+ React.createElement(
772
+ "select",
773
+ {
774
+ value: modelPath,
775
+ onChange: (e) => {
776
+ const p = e.target.value;
777
+ setModelPath(p);
778
+ const hit = models.find((m) => m.path === p);
779
+ if (hit) setModelType(hit.model_type);
780
+ },
781
+ },
782
+ ...models.filter((m) => m.exists).map((m) =>
783
+ React.createElement("option", { key: m.path, value: m.path }, `${m.label}`)
784
+ )
785
+ )
786
+ ),
787
+ React.createElement(
788
+ "label",
789
+ null,
790
+ "Model Type",
791
+ React.createElement(
792
+ "select",
793
+ { value: modelType, onChange: (e) => setModelType(e.target.value) },
794
+ React.createElement("option", { value: "maskable" }, "maskable"),
795
+ React.createElement("option", { value: "recurrent" }, "recurrent")
796
+ )
797
+ ),
798
+ React.createElement(
799
+ "label",
800
+ null,
801
+ "Max Steps",
802
+ React.createElement("input", {
803
+ type: "number",
804
+ min: 1,
805
+ max: 1000,
806
+ value: rlMaxSteps,
807
+ onChange: (e) => setRlMaxSteps(e.target.value),
808
+ })
809
+ )
810
+ ),
811
+ React.createElement(
812
+ "div",
813
+ { className: "btn-row", style: { marginTop: "10px" } },
814
+ React.createElement("button", { onClick: runTrainedEpisode, disabled: loading }, "Run Trained Episode"),
815
+ React.createElement("button", { className: "secondary", onClick: evaluateTrainedModel, disabled: loading }, "Evaluate Model"),
816
+ React.createElement("button", { className: "secondary", onClick: compareBaselineVsPhase2, disabled: loading }, "Compare vs Baseline"),
817
+ ),
818
+ activeModel
819
+ ? React.createElement("p", { className: "small", style: { marginTop: "10px" } }, `Using: ${activeModel.path}`)
820
+ : null,
821
+
822
+ rlRun
823
+ ? React.createElement(
824
+ "div",
825
+ { style: { marginTop: "10px" } },
826
+ React.createElement(
827
+ "div",
828
+ { className: "kpis" },
829
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Task"), React.createElement("div", { className: "v" }, rlRun.task_id)),
830
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Seed"), React.createElement("div", { className: "v" }, rlRun.seed)),
831
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Total Reward"), React.createElement("div", { className: "v" }, formatNumber(rlRun.total_reward))),
832
+ React.createElement("div", { className: "kpi" }, React.createElement("div", { className: "k" }, "Grader Score"), React.createElement("div", { className: "v" }, formatNumber(rlRun.grader_score, 3))),
833
+ ),
834
+ React.createElement("div", { style: { marginTop: "10px" } }, React.createElement(LineCanvas, {
835
+ pointsA: (rlRun.trace || []).map((x) => Math.max(0, Number(x.reward || 0))),
836
+ pointsB: (rlRun.trace || []).map((x) => Number(x.backlog || 0)),
837
+ labelA: "rl reward",
838
+ labelB: "rl backlog",
839
+ })),
840
+ React.createElement(
841
+ "div",
842
+ { className: "table-wrap", style: { marginTop: "10px" } },
843
+ React.createElement(
844
+ "table",
845
+ null,
846
+ React.createElement(
847
+ "thead",
848
+ null,
849
+ React.createElement(
850
+ "tr",
851
+ null,
852
+ React.createElement("th", null, "Step"),
853
+ React.createElement("th", null, "Action Index"),
854
+ React.createElement("th", null, "Action"),
855
+ React.createElement("th", null, "Reward"),
856
+ React.createElement("th", null, "Backlog"),
857
+ React.createElement("th", null, "Completed"),
858
+ React.createElement("th", null, "SLA")
859
+ )
860
+ ),
861
+ React.createElement(
862
+ "tbody",
863
+ null,
864
+ ...(rlRun.trace || []).map((r) =>
865
+ React.createElement(
866
+ "tr",
867
+ { key: `rl-${r.step}` },
868
+ React.createElement("td", null, r.step),
869
+ React.createElement("td", null, r.action_index),
870
+ React.createElement("td", null, r.action_label),
871
+ React.createElement("td", null, formatNumber(r.reward)),
872
+ React.createElement("td", null, r.backlog),
873
+ React.createElement("td", null, r.completed),
874
+ React.createElement("td", null, r.sla_breaches)
875
+ )
876
+ )
877
+ )
878
+ )
879
+ )
880
+ )
881
+ : null,
882
+
883
+ rlEval.length
884
+ ? React.createElement(
885
+ "div",
886
+ { className: "table-wrap", style: { marginTop: "10px" } },
887
+ React.createElement(
888
+ "table",
889
+ null,
890
+ React.createElement(
891
+ "thead",
892
+ null,
893
+ React.createElement(
894
+ "tr",
895
+ null,
896
+ React.createElement("th", null, "Task"),
897
+ React.createElement("th", null, "Score"),
898
+ React.createElement("th", null, "Reward"),
899
+ React.createElement("th", null, "Completed"),
900
+ React.createElement("th", null, "SLA Breaches")
901
+ )
902
+ ),
903
+ React.createElement(
904
+ "tbody",
905
+ null,
906
+ ...rlEval.map((r) =>
907
+ React.createElement(
908
+ "tr",
909
+ { key: `eval-${r.task_id}` },
910
+ React.createElement("td", null, r.task_id),
911
+ React.createElement("td", null, formatNumber(r.grader_score, 3)),
912
+ React.createElement("td", null, formatNumber(r.total_reward, 2)),
913
+ React.createElement("td", null, r.total_completed),
914
+ React.createElement("td", null, r.total_sla_breaches)
915
+ )
916
+ )
917
+ )
918
+ )
919
+ )
920
+ : null,
921
+
922
+ React.createElement("div", { style: { marginTop: "12px" } }, React.createElement(CompareCanvas, {
923
+ baselineScore: compareData.baseline,
924
+ rlScore: compareData.rl,
925
+ }))
926
+ )
927
+ );
928
+ }
929
+
930
+ const rootEl = document.getElementById("app-root");
931
+ const root = createRoot(rootEl);
932
+ root.render(React.createElement(App));
933
+ window.__APP_MOUNTED__ = true;
app/web/styles.css ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --bg: #050505;
3
+ --panel: #0f0f0f;
4
+ --panel-2: #141414;
5
+ --line: #2b2b2b;
6
+ --text: #f4f4f4;
7
+ --muted: #b6b6b6;
8
+ --accent: #ffffff;
9
+ }
10
+
11
+ * {
12
+ box-sizing: border-box;
13
+ }
14
+
15
+ html,
16
+ body {
17
+ margin: 0;
18
+ min-height: 100%;
19
+ background: radial-gradient(circle at 0% 0%, #1b1b1b 0%, #070707 40%, #000 100%);
20
+ color: var(--text);
21
+ font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
22
+ }
23
+
24
+ .app-root {
25
+ min-height: 100vh;
26
+ }
27
+
28
+ .boot {
29
+ width: min(1000px, 92vw);
30
+ margin: 32px auto;
31
+ padding: 16px;
32
+ border: 1px solid var(--line);
33
+ border-radius: 12px;
34
+ background: var(--panel);
35
+ }
36
+
37
+ .boot-error h2 {
38
+ margin-top: 0;
39
+ }
40
+
41
+ .shell {
42
+ width: min(1300px, 94vw);
43
+ margin: 0 auto;
44
+ padding: 20px;
45
+ display: grid;
46
+ gap: 14px;
47
+ }
48
+
49
+ .hero {
50
+ padding: 18px;
51
+ border-radius: 14px;
52
+ color: #000;
53
+ background: linear-gradient(130deg, #fff 0%, #d0d0d0 40%, #7b7b7b 100%);
54
+ }
55
+
56
+ .hero h1 {
57
+ margin: 0;
58
+ font-size: 28px;
59
+ }
60
+
61
+ .hero p {
62
+ margin: 8px 0 0;
63
+ font-weight: 600;
64
+ }
65
+
66
+ .grid {
67
+ display: grid;
68
+ gap: 12px;
69
+ }
70
+
71
+ .cols-3 {
72
+ grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
73
+ }
74
+
75
+ .cols-2 {
76
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
77
+ }
78
+
79
+ .panel {
80
+ border: 1px solid var(--line);
81
+ border-radius: 12px;
82
+ background: var(--panel);
83
+ padding: 14px;
84
+ }
85
+
86
+ .panel h2 {
87
+ margin: 0 0 10px;
88
+ font-size: 18px;
89
+ }
90
+
91
+ .panel h3 {
92
+ margin: 0 0 8px;
93
+ font-size: 15px;
94
+ }
95
+
96
+ .form-row {
97
+ display: grid;
98
+ grid-template-columns: repeat(auto-fit, minmax(170px, 1fr));
99
+ gap: 10px;
100
+ }
101
+
102
+ label {
103
+ display: grid;
104
+ gap: 6px;
105
+ font-size: 12px;
106
+ color: var(--muted);
107
+ }
108
+
109
+ input,
110
+ select,
111
+ textarea,
112
+ button {
113
+ width: 100%;
114
+ border-radius: 8px;
115
+ border: 1px solid #3a3a3a;
116
+ background: var(--panel-2);
117
+ color: var(--text);
118
+ padding: 8px 10px;
119
+ font-size: 13px;
120
+ }
121
+
122
+ textarea {
123
+ min-height: 95px;
124
+ resize: vertical;
125
+ }
126
+
127
+ button {
128
+ cursor: pointer;
129
+ border: none;
130
+ background: var(--accent);
131
+ color: #000;
132
+ font-weight: 700;
133
+ }
134
+
135
+ button.secondary {
136
+ background: transparent;
137
+ border: 1px solid #505050;
138
+ color: var(--text);
139
+ }
140
+
141
+ button:disabled {
142
+ opacity: 0.55;
143
+ cursor: wait;
144
+ }
145
+
146
+ .btn-row {
147
+ display: flex;
148
+ gap: 8px;
149
+ flex-wrap: wrap;
150
+ }
151
+
152
+ .btn-row button {
153
+ width: auto;
154
+ }
155
+
156
+ .kpis {
157
+ display: grid;
158
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
159
+ gap: 10px;
160
+ }
161
+
162
+ .kpi {
163
+ border: 1px solid var(--line);
164
+ border-radius: 10px;
165
+ background: #0a0a0a;
166
+ padding: 10px;
167
+ }
168
+
169
+ .kpi .k {
170
+ color: var(--muted);
171
+ font-size: 12px;
172
+ }
173
+
174
+ .kpi .v {
175
+ margin-top: 5px;
176
+ font-size: 19px;
177
+ font-weight: 700;
178
+ }
179
+
180
+ .table-wrap {
181
+ overflow: auto;
182
+ border: 1px solid #222;
183
+ border-radius: 8px;
184
+ }
185
+
186
+ .table-wrap table {
187
+ border-collapse: collapse;
188
+ width: 100%;
189
+ font-size: 12px;
190
+ }
191
+
192
+ .table-wrap th,
193
+ .table-wrap td {
194
+ border-bottom: 1px solid #202020;
195
+ text-align: left;
196
+ padding: 8px;
197
+ white-space: nowrap;
198
+ }
199
+
200
+ .table-wrap thead th {
201
+ background: #0c0c0c;
202
+ position: sticky;
203
+ top: 0;
204
+ }
205
+
206
+ canvas {
207
+ width: 100%;
208
+ border: 1px solid #292929;
209
+ border-radius: 8px;
210
+ background: #050505;
211
+ }
212
+
213
+ .status {
214
+ padding: 10px;
215
+ border-radius: 8px;
216
+ border: 1px solid #303030;
217
+ background: #0b0b0b;
218
+ color: var(--muted);
219
+ font-size: 12px;
220
+ }
221
+
222
+ .small {
223
+ font-size: 12px;
224
+ color: var(--muted);
225
+ }
226
+
227
+ pre {
228
+ margin: 0;
229
+ border: 1px solid #232323;
230
+ border-radius: 8px;
231
+ background: #080808;
232
+ padding: 10px;
233
+ max-height: 240px;
234
+ overflow: auto;
235
+ font-size: 12px;
236
+ }
237
+
238
+ .badge {
239
+ display: inline-block;
240
+ border: 1px solid #515151;
241
+ border-radius: 999px;
242
+ padding: 2px 8px;
243
+ font-size: 11px;
244
+ color: #dcdcdc;
245
+ }
246
+
247
+ .badge.ok {
248
+ border-color: #bdbdbd;
249
+ color: #ffffff;
250
+ }
251
+
252
+ @media (max-width: 680px) {
253
+ .hero h1 {
254
+ font-size: 23px;
255
+ }
256
+ }
audit.py ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import inspect
5
+ import requests
6
+ import numpy as np
7
+ import yaml
8
+ import gymnasium as gym
9
+
10
+ from stable_baselines3.common.env_checker import check_env
11
+ from sb3_contrib import MaskablePPO
12
+
13
+ def print_result(check_num, desc, status, detail=""):
14
+ print(f"[CHECK {check_num}] {desc}\nSTATUS: {status}\nDETAIL: {detail}\n")
15
+
16
+ # B1
17
+ try:
18
+ from app.models import (
19
+ ServiceType, StageType, PriorityMode, ActionType,
20
+ OfficerPool, QueueSnapshot, ObservationModel, ActionModel,
21
+ RewardModel, EpisodeStateModel, StepInfoModel,
22
+ SimulationConfig, TaskConfig, GraderResult,
23
+ BenchmarkResult, LiveRunResult, EpisodeMetrics
24
+ )
25
+ print_result("B1", "All 17 Schemas Present", "PASS", "All 17 names resolve")
26
+ except Exception as e:
27
+ print_result("B1", "All 17 Schemas Present", "FAIL", str(e))
28
+
29
+ # B2
30
+ try:
31
+ fields = QueueSnapshot.model_fields
32
+ assert 'total_pending' in fields, "total_pending missing"
33
+ assert 'blocked_missing_docs' in fields, "blocked_missing_docs missing"
34
+ assert 'active_cases' not in fields, "legacy field active_cases found"
35
+ assert 'missing_docs_cases' not in fields, "legacy field found"
36
+
37
+ m_fields = EpisodeMetrics.model_fields
38
+ assert 'total_invalid_actions' in m_fields, "total_invalid_actions missing"
39
+ print_result("B2", "Canonical Field Name Verification", "PASS", "Fields verified")
40
+ except Exception as e:
41
+ print_result("B2", "Canonical Field Name Verification", "FAIL", str(e))
42
+
43
+ # B3
44
+ try:
45
+ from app.simulator import SimulationAgentMode
46
+ assert hasattr(SimulationAgentMode, 'BASELINE_POLICY'), "BASELINE_POLICY missing"
47
+ assert hasattr(SimulationAgentMode, 'RANDOM'), "RANDOM missing"
48
+ assert hasattr(SimulationAgentMode, 'LLM_AGENT'), "LLM_AGENT missing"
49
+ assert hasattr(SimulationAgentMode, 'HEURISTIC'), "HEURISTIC missing"
50
+ try:
51
+ _ = SimulationAgentMode.baseline_policy
52
+ print_result("B3", "Enum Casing Check", "FAIL", "lowercase alias exists")
53
+ except AttributeError:
54
+ print_result("B3", "Enum Casing Check", "PASS", "No lowercase alias")
55
+ except Exception as e:
56
+ print_result("B3", "Enum Casing Check", "FAIL", str(e))
57
+
58
+ # C1
59
+ try:
60
+ from app.env import GovWorkflowEnv
61
+ env = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
62
+ obs, info = env.reset(seed=42)
63
+ assert isinstance(obs, dict), f"obs is {type(obs)}, expected dict"
64
+ assert isinstance(info, dict), f"info is {type(info)}, expected dict"
65
+ assert len(obs) > 0, "empty observation"
66
+ print_result("C1", "reset() Returns (observation, info)", "PASS", "Valid dicts returned")
67
+ except Exception as e:
68
+ print_result("C1", "reset() Returns (observation, info)", "FAIL", str(e))
69
+
70
+ # C2
71
+ try:
72
+ from app.models import ActionModel, ActionType
73
+ action = ActionModel(action_type=ActionType.ADVANCE_TIME)
74
+ result = env.step(action)
75
+ assert len(result) == 5, f"step() returned {len(result)} values, expected 5"
76
+ obs2, reward, terminated, truncated, info2 = result
77
+ assert isinstance(reward, float), f"reward type {type(reward)}"
78
+ assert isinstance(terminated, bool), "terminated not bool"
79
+ assert isinstance(truncated, bool), "truncated not bool"
80
+ print_result("C2", "step() Returns (obs, reward, terminated, truncated, info)", "PASS", "Valid step signature")
81
+ except Exception as e:
82
+ print_result("C2", "step() Returns (obs, reward, terminated, truncated, info)", "FAIL", str(e))
83
+
84
+ # C3 (Skipping dictionary check since MaskablePPO actually uses rl.gov_workflow_env for gym.Env spaces, doing that in J instead)
85
+ # Wait, let's just check the wrapper.
86
+ try:
87
+ from rl.gov_workflow_env import GovWorkflowGymEnv
88
+ genv = GovWorkflowGymEnv(task_id="district_backlog_easy", seed=42)
89
+ gobs, _ = genv.reset(seed=42)
90
+ def check_dtype(obs_dict, path="obs"):
91
+ for k, v in obs_dict.items():
92
+ if isinstance(v, np.ndarray):
93
+ assert v.dtype == np.float32 or v.dtype == np.int64, f"FAIL: {path}.{k} dtype={v.dtype}"
94
+ elif isinstance(v, dict):
95
+ check_dtype(v, f"{path}.{k}")
96
+ check_dtype(gobs)
97
+ print_result("C3", "Observation Space Dtype (SB3 Requirement)", "PASS", "Wrapper dict is fine")
98
+ except Exception as e:
99
+ print_result("C3", "Observation Space Dtype (SB3 Requirement)", "FAIL", str(e))
100
+
101
+ # C4
102
+ try:
103
+ env1 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
104
+ env2 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
105
+ obs1, _ = env1.reset(seed=42)
106
+ obs2, _ = env2.reset(seed=42)
107
+
108
+ # Strip volatile message field before comparison (as in tests)
109
+ obs1.last_action_explanation = ""
110
+ obs2.last_action_explanation = ""
111
+ obs1.episode_id = ""
112
+ obs2.episode_id = ""
113
+
114
+ assert json.dumps(obs1.model_dump(), sort_keys=True, default=str) == json.dumps(obs2.model_dump(), sort_keys=True, default=str), "Different observations"
115
+ print_result("C4", "Determinism Check", "PASS", "Observations match")
116
+ except Exception as e:
117
+ print_result("C4", "Determinism Check", "FAIL", str(e))
118
+
119
+ # C5
120
+ try:
121
+ env_c5 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
122
+ obs, _ = env_c5.reset(seed=42)
123
+ terminated = False
124
+ truncated = False
125
+ steps = 0
126
+ max_steps = 500
127
+ while not (terminated or truncated) and steps < max_steps:
128
+ action = ActionModel(action_type=ActionType.ADVANCE_TIME)
129
+ obs, reward, terminated, truncated, info = env_c5.step(action)
130
+ steps += 1
131
+ assert terminated or truncated, f"episode never ended after {max_steps} steps"
132
+ print_result("C5", "Episode Termination Check", "PASS", f"ended at step {steps}")
133
+ except Exception as e:
134
+ print_result("C5", "Episode Termination Check", "FAIL", str(e))
135
+
136
+ # D1
137
+ try:
138
+ env_d1 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
139
+ obs, _ = env_d1.reset(seed=42)
140
+ rewards = []
141
+ for _ in range(20):
142
+ action = ActionModel(action_type=ActionType.ADVANCE_TIME)
143
+ obs, reward, term, trunc, info = env_d1.step(action)
144
+ rewards.append(reward)
145
+ if term or trunc: break
146
+ nonzero = sum(1 for r in rewards if abs(r) > 1e-6)
147
+ assert nonzero > len(rewards) * 0.5, f"Only {nonzero}/{len(rewards)} steps had nonzero reward"
148
+ print_result("D1", "Reward is Dense", "PASS", f"{nonzero}/{len(rewards)} steps nonzero")
149
+ except Exception as e:
150
+ print_result("D1", "Reward is Dense", "FAIL", str(e))
151
+
152
+ # D2
153
+ try:
154
+ for r in rewards:
155
+ assert -100 <= r <= 100, f"reward {r} outside [-100, 100]"
156
+ print_result("D2", "Reward Range Sanity Check", "PASS", "Rewards in bounds")
157
+ except Exception as e:
158
+ print_result("D2", "Reward Range Sanity Check", "FAIL", str(e))
159
+
160
+ # D3
161
+ try:
162
+ from app.models import ServiceType
163
+ env_d3 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
164
+ obs, _ = env_d3.reset(seed=42)
165
+ # Using a valid enum but perhaps invalid context to cause penalty
166
+ # The framework doesn't allow 'nonexistent' string if it's an Enum, so let's use valid enum but no cases.
167
+ bad_action = ActionModel(action_type=ActionType.ESCALATE_SERVICE, service_target=ServiceType.PASSPORT)
168
+ obs, reward, term, trunc, info = env_d3.step(bad_action)
169
+ assert reward <= 0, f"invalid action produced positive reward {reward}"
170
+ print_result("D3", "Invalid Action Penalty Fires", "PASS", f"reward={reward:.3f}")
171
+ except Exception as e:
172
+ print_result("D3", "Invalid Action Penalty Fires", "FAIL", str(e))
173
+
174
+ # E1
175
+ try:
176
+ from app.tasks import get_task
177
+ for task_id in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
178
+ cfg = get_task(task_id)
179
+ assert cfg.seed is not None, f"{task_id} has no seed"
180
+ assert cfg.max_days > 0, f"{task_id} max_days={cfg.max_days}"
181
+ print_result("E1", "All 3 Tasks Loadable", "PASS", "All config loaded")
182
+ except Exception as e:
183
+ print_result("E1", "All 3 Tasks Loadable", "FAIL", str(e))
184
+
185
+ # E2
186
+ try:
187
+ from app.graders import grade_episode
188
+ for task_id in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
189
+ env_e2 = GovWorkflowEnv(task_id=task_id, seed=42)
190
+ obs, _ = env_e2.reset(seed=42)
191
+ terminated = truncated = False
192
+ while not (terminated or truncated):
193
+ obs, reward, terminated, truncated, info = env_e2.step(ActionModel(action_type=ActionType.ADVANCE_TIME))
194
+ episode_state = env_e2.state()
195
+ score_res = grade_episode(episode_state)
196
+ assert isinstance(score_res.score, float), f"grader returned {type(score_res.score)}"
197
+ assert 0.0 <= score_res.score <= 1.0, f"score={score_res.score} outside [0.0, 1.0]"
198
+ print_result("E2", "Graders Return [0.0, 1.0]", "PASS", "Valid scores returned")
199
+ except Exception as e:
200
+ print_result("E2", "Graders Return [0.0, 1.0]", "FAIL", str(e))
201
+
202
+ # E3
203
+ try:
204
+ scores = []
205
+ for _ in range(2):
206
+ env_e3 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
207
+ obs, _ = env_e3.reset(seed=42)
208
+ terminated = truncated = False
209
+ while not (terminated or truncated):
210
+ obs, r, terminated, truncated, info = env_e3.step(ActionModel(action_type=ActionType.ADVANCE_TIME))
211
+ scores.append(grade_episode(env_e3.state()).score)
212
+ assert scores[0] == scores[1], f"grader is non-deterministic: {scores}"
213
+ print_result("E3", "Grader Scores Are Deterministic", "PASS", f"score={scores[0]:.4f} both runs")
214
+ except Exception as e:
215
+ print_result("E3", "Grader Scores Are Deterministic", "FAIL", str(e))
216
+
217
+ # F1
218
+ try:
219
+ from app.state_machine import StateMachine, StageType, WorkflowAction
220
+ sm = StateMachine()
221
+ stages = [StageType.SUBMISSION, StageType.DOCUMENT_VERIFICATION, StageType.FIELD_VERIFICATION, StageType.APPROVAL, StageType.ISSUANCE]
222
+ for i in range(len(stages) - 1):
223
+ current = stages[i]
224
+ next_stage = stages[i + 1]
225
+ result = sm.transition(current, WorkflowAction.ADVANCE)
226
+ assert result == next_stage, f"{current} -> {result}, expected {next_stage}"
227
+ print_result("F1", "All Legal Transitions Work", "PASS", "Transitions validated")
228
+ except Exception as e:
229
+ print_result("F1", "All Legal Transitions Work", "FAIL", str(e))
230
+
231
+ # F2
232
+ try:
233
+ assert sm.is_terminal(StageType.ISSUANCE) == True, "issuance not recognized as terminal"
234
+ assert sm.is_terminal(StageType.SUBMISSION) == False, "submission wrongly marked terminal"
235
+ print_result("F2", "Terminal State Recognized", "PASS", "Terminal states correct")
236
+ except Exception as e:
237
+ print_result("F2", "Terminal State Recognized", "FAIL", str(e))
238
+
239
+ # G1
240
+ try:
241
+ import app.simulator as sim_module
242
+ source = inspect.getfile(sim_module.LiveSimulationSession)
243
+ assert 'engine' in source.lower(), f"LiveSimulationSession defined in {source}, not engine.py"
244
+ print_result("G1", "simulator.py Is a Pure Shim", "PASS", "Shim logic confirmed")
245
+ except Exception as e:
246
+ print_result("G1", "simulator.py Is a Pure Shim", "FAIL", str(e))
247
+
248
+ # G2
249
+ try:
250
+ from app.simulator import LiveSimulationSession, SimulationAgentMode, run_simulation
251
+ assert callable(run_simulation), "run_simulation not callable"
252
+ assert callable(LiveSimulationSession), "LiveSimulationSession not callable"
253
+ print_result("G2", "All 3 Engine Exports Importable", "PASS", "Exports valid")
254
+ except Exception as e:
255
+ print_result("G2", "All 3 Engine Exports Importable", "FAIL", str(e))
256
+
257
+ # G3
258
+ try:
259
+ session = LiveSimulationSession(
260
+ task_id="district_backlog_easy",
261
+ agent_mode=SimulationAgentMode.BASELINE_POLICY,
262
+ seed=42,
263
+ max_steps=10
264
+ )
265
+ start_info = session.start_line()
266
+ assert isinstance(start_info, str), "start_line() did not return str"
267
+ step_result, _, _ = session.step_once()
268
+ assert "observation" in step_result, "step_once missing 'observation'"
269
+ assert "reward" in step_result, "step_once missing 'reward'"
270
+ print_result("G3", "LiveSimulationSession Full Lifecycle", "PASS", "Lifecycle valid")
271
+ session.close()
272
+ except Exception as e:
273
+ print_result("G3", "LiveSimulationSession Full Lifecycle", "FAIL", str(e))
274
+
275
+ # H2 / H3
276
+ # We will do H checks via curl/pytest in bash to test the live server.
277
+
278
+ # I1
279
+ try:
280
+ from app.baselines import (
281
+ random_policy,
282
+ backlog_clearance_policy as baseline_policy,
283
+ greedy_sla_policy,
284
+ fairness_aware_policy,
285
+ )
286
+ for name, fn in [
287
+ ("random_policy", random_policy),
288
+ ("baseline_policy", baseline_policy),
289
+ ("greedy_sla_policy", greedy_sla_policy),
290
+ ("fairness_aware_policy", fairness_aware_policy),
291
+ ]:
292
+ assert callable(fn), f"{name} is not callable"
293
+ print_result("I1", "All 4 Policies Are Callable", "PASS", "Policies callable")
294
+ except Exception as e:
295
+ print_result("I1", "All 4 Policies Are Callable", "FAIL", str(e))
296
+
297
+ # I2
298
+ try:
299
+ from app.baselines import greedy_sla_policy
300
+ env_i2 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
301
+ obs_i2, _ = env_i2.reset(seed=42)
302
+ action_i2 = greedy_sla_policy(obs_i2)
303
+ assert isinstance(action_i2, ActionModel), f"policy returned {type(action_i2)}"
304
+ print_result("I2", "Policy Returns Valid Action", "PASS", f"action_type={action_i2.action_type}")
305
+ except Exception as e:
306
+ print_result("I2", "Policy Returns Valid Action", "FAIL", str(e))
307
+
308
+ # J1
309
+ try:
310
+ env_j1 = GovWorkflowGymEnv(task_id="district_backlog_easy", seed=42)
311
+ assert hasattr(env_j1, 'observation_space'), "no observation_space"
312
+ assert hasattr(env_j1, 'action_space'), "no action_space"
313
+ print_result("J1", "Gymnasium API Compliance", "PASS", "Spaces defined")
314
+ except Exception as e:
315
+ print_result("J1", "Gymnasium API Compliance", "FAIL", str(e))
316
+
317
+ # J2
318
+ try:
319
+ obs, _ = env_j1.reset(seed=42)
320
+ assert hasattr(env_j1, 'action_masks'), "action_masks() method missing"
321
+ masks = env_j1.action_masks()
322
+ assert hasattr(masks, '__len__'), "action_masks() must return array-like"
323
+ assert len(masks) == env_j1.action_space.n, f"mask length {len(masks)} != action_space.n {env_j1.action_space.n}"
324
+ print_result("J2", "action_masks() Method Required by MaskablePPO", "PASS", f"n={len(masks)}")
325
+ except Exception as e:
326
+ print_result("J2", "action_masks() Method Required by MaskablePPO", "FAIL", str(e))
327
+
328
+ # J3
329
+ try:
330
+ check_env(env_j1, warn=True)
331
+ print_result("J3", "SB3 VecEnv Compatibility", "PASS", "check_env passed")
332
+ except Exception as e:
333
+ print_result("J3", "SB3 VecEnv Compatibility", "FAIL", str(e))
334
+
335
+ # J4
336
+ try:
337
+ model = MaskablePPO("MlpPolicy", env_j1, verbose=0, seed=42)
338
+ print_result("J4", "MaskablePPO Can Initialize", "PASS", "Model initialized")
339
+ except Exception as e:
340
+ print_result("J4", "MaskablePPO Can Initialize", "FAIL", str(e))
341
+
342
+ # J5
343
+ try:
344
+ obs, _ = env_j1.reset(seed=42)
345
+ for step in range(10):
346
+ masks = env_j1.action_masks()
347
+ valid_actions = [i for i, m in enumerate(masks) if m]
348
+ action = valid_actions[0] if valid_actions else 0
349
+ obs, reward, terminated, truncated, info = env_j1.step(action)
350
+ if terminated or truncated:
351
+ obs, _ = env_j1.reset(seed=42)
352
+ print_result("J5", "10-Step Rollout Without Crash", "PASS", "Rollout passed")
353
+ except Exception as e:
354
+ print_result("J5", "10-Step Rollout Without Crash", "FAIL", str(e))
355
+
356
+ # M1
357
+ try:
358
+ with open("openenv.yaml", "r") as f:
359
+ config = yaml.safe_load(f)
360
+ assert "tasks" in config, "openenv.yaml missing 'tasks' key"
361
+ task_ids = [t["id"] for t in config["tasks"]]
362
+ for required in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
363
+ assert required in task_ids, f"{required} missing from openenv.yaml"
364
+ print_result("M1", "YAML Loads and Contains All 3 Tasks", "PASS", f"{len(task_ids)} tasks registered")
365
+ except Exception as e:
366
+ print_result("M1", "YAML Loads and Contains All 3 Tasks", "FAIL", str(e))
367
+
baseline_openai.py ADDED
@@ -0,0 +1,983 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # ── Path bootstrap ──────────────────────────────────────────────────────────
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ _ROOT = Path(__file__).resolve().parent
8
+ if str(_ROOT) not in sys.path:
9
+ sys.path.insert(0, str(_ROOT))
10
+
11
+ # ── Load .env ────────────────────────────────────────────────────────────────
12
+ from dotenv import load_dotenv
13
+ load_dotenv(dotenv_path=_ROOT / ".env", override=False)
14
+
15
+ import argparse
16
+ import json
17
+ import os
18
+ import random as _random
19
+ import re
20
+ import time
21
+ from dataclasses import asdict, dataclass, field
22
+ from datetime import datetime
23
+ from typing import Any
24
+
25
+ from app.env import GovWorkflowEnv
26
+ from app.models import (
27
+ ActionModel,
28
+ ActionType,
29
+ ObservationModel,
30
+ PriorityMode,
31
+ ServiceType,
32
+ StepInfoModel,
33
+ )
34
+ from app.tasks import get_task, list_tasks
35
+ from app.api_gateway import create_env_gateway, TransportMode
36
+
37
+
38
+ # ══════════════════════════════════════════════════════════════════════════════
39
+ # SECTION 1 — Model Registry & Per-Task Pools
40
+ # ══════════════════════════════════════════════════════════════════════════════
41
+
42
+ NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1"
43
+
44
+ # ── Global 10-Model Sequential Pool (April 2026 — Verified on NVIDIA NIM) ────
45
+ #
46
+ # CHANGES FROM PREVIOUS VERSION:
47
+ # REMOVED (invalid/unavailable IDs):
48
+ # qwen/qwen3-next-80b-a3b-instruct → invalid model ID
49
+ # moonshotai/kimi-k2-instruct-0905 → not on NVIDIA NIM
50
+ # deepseek-ai/deepseek-v3.2 → wrong ID (use deepseek-v3)
51
+ # google/gemma-3-27b-it → outdated (gemma-4 released)
52
+ # mistralai/mixtral-8x22b-instruct-v0.1 → replaced by newer models
53
+ # ADDED (verified April 2026):
54
+ # deepseek-ai/deepseek-v4-flash → FREE endpoint, 1M context
55
+ # deepseek-ai/deepseek-r1 → reasoning, 685B MoE
56
+ # nvidia/nemotron-3-super-120b-a12b → hybrid Mamba-Transformer, 1M ctx
57
+ # minimaxai/minimax-m2.7 → FREE endpoint, 230B
58
+ # google/gemma-4-31b-it → latest Gemma on NVIDIA NIM
59
+ # qwen/qwen3.5-122b-a10b → latest Qwen on NVIDIA NIM
60
+
61
+ GLOBAL_MODEL_POOL: list[str] = [
62
+ "meta/llama-3.3-70b-instruct", # 1. Primary
63
+ "deepseek-ai/deepseek-v4-flash", # 2. FREE endpoint — 1M context
64
+ "deepseek-ai/deepseek-r1", # 3. Reasoning — 685B MoE
65
+ "nvidia/nemotron-3-super-120b-a12b", # 4. NVIDIA native — 1M ctx
66
+ "qwen/qwen3.5-122b-a10b", # 5. Qwen3.5 — tool calling
67
+ "deepseek-ai/deepseek-v3", # 6. DeepSeek V3 — hybrid mode
68
+ "minimaxai/minimax-m2.7", # 7. FREE endpoint — 230B
69
+ "google/gemma-4-31b-it", # 8. Dense 31B — agentic workflows
70
+ "microsoft/phi-4-mini-instruct", # 9. Reliable small — last resort
71
+ "meta/llama-3.1-8b-instruct", # 10. Fastest safety fallback
72
+ ]
73
+
74
+ # ── Free endpoint pool (KEY 2 — NVIDIA_API_KEY_2 fallback) ───────────────────
75
+ FREE_POOL: list[str] = [
76
+ "deepseek-ai/deepseek-v4-flash",
77
+ "minimaxai/minimax-m2.7",
78
+ "microsoft/phi-4-mini-instruct",
79
+ "meta/llama-3.1-8b-instruct",
80
+ ]
81
+
82
+ # ── Fixed seeds ────────────────────────────────────────────────────────────────
83
+ TASK_SEEDS: dict[str, int] = {
84
+ "district_backlog_easy": 11,
85
+ "mixed_urgency_medium": 22,
86
+ "cross_department_hard": 33,
87
+ }
88
+
89
+ LLM_TEMPERATURE = 0.2
90
+ LLM_TOP_P = 0.7
91
+ LLM_MAX_TOKENS = 512
92
+ MAX_LLM_STEPS = 80
93
+
94
+ LLM_CALL_DELAY = float(os.environ.get("LLM_CALL_DELAY", "12.0"))
95
+ LLM_CALL_JITTER = 1.0
96
+
97
+ # ── Enum fields that MUST be lowercase for Pydantic StrEnum ──────────────────
98
+ _ENUM_FIELDS = {"action_type", "priority_mode", "service", "target_service"}
99
+
100
+ # ── Canonical field names (Phase 2 update — do NOT use legacy names) ─────────
101
+ # CORRECT WRONG (legacy)
102
+ # snap.blocked_missing_docs ← snap.missing_docs_cases
103
+ # snap.total_pending ← snap.active_cases
104
+ # obs.fairness_gap ← obs.fairness_index
105
+
106
+
107
+ # ═══════════════════════════════════════════════════════════════���══════════════
108
+ # SECTION 2 — Model Rotator
109
+ # ══════════════════════════════════════════════════════════════════════════════
110
+
111
+ class ModelRotator:
112
+ def __init__(self, task_id: str) -> None:
113
+ self._sequence: list[str] = GLOBAL_MODEL_POOL.copy()
114
+ self._index = 0
115
+ self._task_id = task_id
116
+ self._rotation_log: list[dict[str, str]] = []
117
+
118
+ @property
119
+ def current(self) -> str:
120
+ return self._sequence[self._index]
121
+
122
+ @property
123
+ def current_key_id(self) -> int:
124
+ return 2 if self.current in FREE_POOL else 1
125
+
126
+ @property
127
+ def pool_exhausted(self) -> bool:
128
+ return len(self._rotation_log) >= 50
129
+
130
+ def rotate(self, reason: str = "error") -> str | None:
131
+ old = self.current
132
+ self._rotation_log.append({"from": old, "reason": reason})
133
+ self._index = (self._index + 1) % len(self._sequence)
134
+ new = self._sequence[self._index]
135
+ print(
136
+ f"\n 🔄 Model rotated: "
137
+ f"{old.split('/')[-1]} → {new.split('/')[-1]} ({reason})"
138
+ )
139
+ return new
140
+
141
+ def summary(self) -> list[dict]:
142
+ return list(self._rotation_log)
143
+
144
+
145
+ # ══════════════════════════════════════════════════════════════════════════════
146
+ # SECTION 3 — Result Dataclasses
147
+ # ══════════════════════════════════════════════════════════════════════════════
148
+
149
+ @dataclass
150
+ class StepRecord:
151
+ step: int
152
+ day: int
153
+ action_type: str
154
+ reward: float
155
+ invalid: bool
156
+ total_backlog: int
157
+ total_completed: int
158
+ model_used: str
159
+ notes: list[str]
160
+
161
+
162
+ @dataclass
163
+ class EpisodeResult:
164
+ task_id: str
165
+ agent: str
166
+ primary_model: str
167
+ seed: int
168
+ score: float
169
+ grader_name: str
170
+ total_steps: int
171
+ total_reward: float
172
+ total_completed: int
173
+ total_sla_breaches: int
174
+ total_invalid_actions: int
175
+ final_day: int
176
+ terminated: bool
177
+ truncated: bool
178
+ grader_metrics: dict[str, float]
179
+ step_log: list[StepRecord]
180
+ elapsed_seconds: float
181
+ model_rotations: list[dict]
182
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
183
+
184
+ def summary(self) -> str:
185
+ usage: dict[str, int] = {}
186
+ for r in self.step_log:
187
+ usage[r.model_used] = usage.get(r.model_used, 0) + 1
188
+ usage_str = ", ".join(
189
+ f"{m.split('/')[-1]} ({c})" for m, c in usage.items()
190
+ )
191
+ return (
192
+ f"[{self.task_id}] agent={self.agent} "
193
+ f"score={self.score:.3f} reward={self.total_reward:.2f} "
194
+ f"completed={self.total_completed} breaches={self.total_sla_breaches} "
195
+ f"invalid={self.total_invalid_actions} "
196
+ f"rotations={len(self.model_rotations)} "
197
+ f"day={self.final_day} steps={self.total_steps} "
198
+ f"time={self.elapsed_seconds:.1f}s\n"
199
+ f" Model usage: {usage_str}"
200
+ )
201
+
202
+
203
+ # ══════════════════════════════════════════════════════════════════════════════
204
+ # SECTION 4 — Direct Environment Wrapper
205
+ # ══════════════════════════════════════════════════════════════════════════════
206
+
207
+ class DirectEnvClient:
208
+ """
209
+ FIX: grade() now calls grade_episode(task_id, episode_state) correctly.
210
+ Previous version called grade_episode(self.env.state()) — wrong signature.
211
+ get_episode_state() returns EpisodeStateModel, not ObservationModel.
212
+ """
213
+
214
+ def __init__(self, task_id: str, seed: int) -> None:
215
+ self.env = GovWorkflowEnv(task_id=task_id)
216
+ self._seed = seed
217
+ self._task_id = task_id
218
+ self.terminated = False
219
+ self.truncated = False
220
+
221
+ def reset(self) -> ObservationModel:
222
+ obs, _ = self.env.reset(seed=self._seed)
223
+ self.terminated = False
224
+ self.truncated = False
225
+ return obs
226
+
227
+ def step(
228
+ self, action: ActionModel
229
+ ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
230
+ obs, reward, terminated, truncated, info = self.env.step(action)
231
+ self.terminated = terminated
232
+ self.truncated = truncated
233
+ return obs, reward, terminated, truncated, info
234
+
235
+ def grade(self) -> tuple[float, str, dict[str, float]]:
236
+ from app.graders import grade_episode
237
+ episode_state = self.env.state()
238
+ result = grade_episode(episode_state)
239
+ return result.score, result.grader_name, result.metrics
240
+
241
+
242
+ # ══════════════════════════════════════════════════════════════════════════════
243
+ # SECTION 5 — HTTP Environment Wrapper
244
+ # ══════════════════════════════════════════════════════════════════════════════
245
+
246
+ class HttpEnvClient:
247
+ def __init__(
248
+ self, task_id: str, seed: int, base_url: str = "http://localhost:7860"
249
+ ) -> None:
250
+ try:
251
+ import requests as _req
252
+ self._req = _req
253
+ except ImportError:
254
+ raise ImportError("pip install requests — required for --mode http")
255
+ self._task_id = task_id
256
+ self._seed = seed
257
+ self._base_url = base_url.rstrip("/")
258
+ self._session_id: str | None = None
259
+ self.terminated = False
260
+ self.truncated = False
261
+
262
+ def _post(self, path: str, body: dict) -> dict:
263
+ r = self._req.post(
264
+ f"{self._base_url}{path}", json=body, timeout=30
265
+ )
266
+ r.raise_for_status()
267
+ return r.json()
268
+
269
+ def reset(self) -> ObservationModel:
270
+ data = self._post("/reset", {"task_id": self._task_id, "seed": self._seed})
271
+ self._session_id = data["session_id"]
272
+ self.terminated = False
273
+ self.truncated = False
274
+ return ObservationModel(**data["observation"])
275
+
276
+ def step(
277
+ self, action: ActionModel
278
+ ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
279
+ data = self._post("/step", {
280
+ "session_id": self._session_id,
281
+ "action": action.model_dump(exclude_none=True),
282
+ })
283
+ obs = ObservationModel(**data["observation"])
284
+ info = StepInfoModel(**data["info"])
285
+ self.terminated = data["terminated"]
286
+ self.truncated = data["truncated"]
287
+ return obs, data["reward"], data["terminated"], data["truncated"], info
288
+
289
+ def grade(self) -> tuple[float, str, dict[str, float]]:
290
+ data = self._post("/grade", {"session_id": self._session_id})
291
+ return data["score"], data["grader_name"], data["metrics"]
292
+
293
+
294
+ # ══════════════════════════════════════════════════════════════════════════════
295
+ # SECTION 6 — Heuristic Baseline Agent
296
+ # ══════════════════════════════════════════════════════════════════════════════
297
+
298
+ class HeuristicAgent:
299
+ """
300
+ Rule-based agent. Requires no API key.
301
+
302
+ FIXED field names (Phase 2 canonical):
303
+ snap.blocked_missing_docs ← was snap.missing_docs_cases
304
+ snap.total_pending ← was snap.active_cases
305
+ """
306
+
307
+ def __init__(self) -> None:
308
+ self._priority_set = False
309
+ self._admin_action_day: int | None = None
310
+ self._last_doc_request_day: int | None = None
311
+
312
+ def reset(self) -> None:
313
+ self._priority_set = False
314
+ self._admin_action_day = None
315
+ self._last_doc_request_day = None
316
+
317
+ current_model = "heuristic"
318
+
319
+ def rotation_summary(self) -> list[dict]:
320
+ return []
321
+
322
+ def update_reward(self, _: float) -> None:
323
+ pass
324
+
325
+ @staticmethod
326
+ def _svc_key(service: str | ServiceType) -> str:
327
+ return service.value if isinstance(service, ServiceType) else str(service)
328
+
329
+ def act(self, obs: ObservationModel) -> ActionModel:
330
+ snapshots = list(obs.queue_snapshots.values())
331
+
332
+ # One admin action per simulated day; then always advance time.
333
+ if self._admin_action_day == obs.day:
334
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
335
+
336
+ # 1. Set priority mode once
337
+ if not self._priority_set:
338
+ self._priority_set = True
339
+ self._admin_action_day = obs.day
340
+ return ActionModel(
341
+ action_type=ActionType.SET_PRIORITY_MODE,
342
+ priority_mode=PriorityMode.URGENT_FIRST,
343
+ )
344
+
345
+ # 2. Allocate any idle officer to the currently most loaded service.
346
+ if obs.officer_pool.idle_officers > 0 and snapshots:
347
+ most_loaded = max(snapshots, key=lambda s: s.total_pending)
348
+ self._admin_action_day = obs.day
349
+ return ActionModel(
350
+ action_type=ActionType.ASSIGN_CAPACITY,
351
+ capacity_assignment={most_loaded.service_type.value: 1},
352
+ )
353
+
354
+ days_left = obs.max_days - obs.day
355
+
356
+ # 3. Reallocate one officer if load/officer ratio is clearly imbalanced.
357
+ allocated = {
358
+ self._svc_key(svc): int(off)
359
+ for svc, off in obs.officer_pool.allocated.items()
360
+ }
361
+ if snapshots and len(allocated) >= 2:
362
+ case_counts = {s.service_type.value: s.total_pending for s in snapshots}
363
+
364
+ best_src: tuple[str, int] | None = None
365
+ best_tgt: tuple[str, int] | None = None
366
+ src_ratio = float("inf")
367
+ tgt_ratio = -1.0
368
+
369
+ for svc, officers in allocated.items():
370
+ if officers <= 1:
371
+ continue
372
+ ratio = case_counts.get(svc, 0) / max(officers, 1)
373
+ if ratio < src_ratio:
374
+ src_ratio = ratio
375
+ best_src = (svc, officers)
376
+
377
+ for svc, officers in allocated.items():
378
+ ratio = case_counts.get(svc, 0) / max(officers, 1)
379
+ if ratio > tgt_ratio:
380
+ tgt_ratio = ratio
381
+ best_tgt = (svc, officers)
382
+
383
+ if best_src and best_tgt and best_src[0] != best_tgt[0] and tgt_ratio > src_ratio * 1.8:
384
+ self._admin_action_day = obs.day
385
+ return ActionModel(
386
+ action_type=ActionType.REALLOCATE_OFFICERS,
387
+ reallocation_delta={best_src[0]: -1, best_tgt[0]: 1},
388
+ )
389
+
390
+ # 4. Request missing docs conservatively to avoid repeatedly resetting
391
+ # resolution days for already-requested cases.
392
+ can_request_docs = (
393
+ any(s.blocked_missing_docs > 0 for s in snapshots)
394
+ and (
395
+ self._last_doc_request_day is None
396
+ or (obs.day - self._last_doc_request_day) >= 3
397
+ or obs.pending_doc_resolutions == 0
398
+ )
399
+ )
400
+ if can_request_docs:
401
+ target_docs = max(
402
+ snapshots,
403
+ key=lambda s: (s.blocked_missing_docs, s.current_sla_risk, s.total_pending),
404
+ )
405
+ if target_docs.blocked_missing_docs > 0:
406
+ self._admin_action_day = obs.day
407
+ self._last_doc_request_day = obs.day
408
+ return ActionModel(
409
+ action_type=ActionType.REQUEST_MISSING_DOCUMENTS,
410
+ service_target=target_docs.service_type,
411
+ )
412
+
413
+ # 5. Escalate in the final window when urgency is present.
414
+ if obs.escalation_budget_remaining > 0:
415
+ urgent_snaps = [s for s in snapshots if s.urgent_pending > 0]
416
+ if urgent_snaps and days_left <= 5:
417
+ target = max(urgent_snaps, key=lambda s: s.urgent_pending)
418
+ self._admin_action_day = obs.day
419
+ return ActionModel(
420
+ action_type=ActionType.ESCALATE_SERVICE,
421
+ escalation_target=target.service_type,
422
+ )
423
+
424
+ # 6. Default — progress simulation.
425
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
426
+
427
+
428
+ # ══════════════════════════════════════════════════════════════════════════════
429
+ # SECTION 7 — System Prompt
430
+ # ══════════════════════════════════════════════════════════════════════════════
431
+
432
+ SYSTEM_PROMPT = """You are an expert government-office workflow manager AI.
433
+ Your job is to control a simulated government district office processing citizen
434
+ applications across multiple services.
435
+
436
+ SERVICES: passport, driving_license, gst_registration, income_certificate,
437
+ caste_certificate, birth_certificate, land_registration
438
+
439
+ WORKFLOW STAGES (in order):
440
+ submission → document_verification → field_verification → approval → issuance
441
+
442
+ YOUR GOAL: Maximise the episode score (0.0 to 1.0) by:
443
+ - Completing as many applications as possible within SLA deadlines
444
+ - Prioritising urgent cases (urgency level 3 > 2 > 1)
445
+ - Keeping all services fairly served (no service left behind)
446
+ - Using escalations sparingly — only when a case is about to breach SLA
447
+ - Keeping officers productively busy (not idle)
448
+
449
+ QUEUE STATUS FIELDS EXPLAINED:
450
+ backlog = total_pending applications in queue
451
+ missing_docs = blocked_missing_docs (stuck waiting for documents)
452
+ urgent = urgent_cases (high-urgency applications)
453
+ breached = breached_cases (already past SLA deadline)
454
+
455
+ AVAILABLE ACTIONS — return exactly ONE per turn as JSON:
456
+
457
+ 1. Set queue processing order (do this FIRST on day 0 only):
458
+ {"action_type": "set_priority_mode", "priority_mode": "urgent_first"}
459
+ priority_mode options: urgent_first | oldest_first | balanced | backlog_clearance
460
+
461
+ 2. Deploy a reserve officer to a service (day 0 only if reserves available):
462
+ {"action_type": "assign_capacity", "service": "driving_license", "officer_delta": 1}
463
+
464
+ 3. Unblock a stuck application with missing documents:
465
+ {"action_type": "request_missing_documents", "service": "driving_license"}
466
+
467
+ 4. Escalate one case to emergency priority (VERY LIMITED — use wisely):
468
+ {"action_type": "escalate_service", "service": "income_certificate"}
469
+
470
+ 5. Move officer between services (only when load ratio > 4x):
471
+ {"action_type": "reallocate_officers", "service": "birth_certificate",
472
+ "target_service": "driving_license", "officer_delta": 1}
473
+
474
+ 6. Let one working day pass — THE ONLY ACTION THAT PROCESSES APPLICATIONS:
475
+ {"action_type": "advance_time"}
476
+
477
+ CRITICAL RULES:
478
+ - ALL values MUST be lowercase: driving_license NOT DRIVING_LICENSE
479
+ - advance_time is the ONLY action that earns progress reward
480
+ - Do NOT chain more than 2 admin actions before calling advance_time
481
+ - Do NOT escalate before (max_days - 5) unless case already breached SLA
482
+ - Do NOT reallocate if source service has fewer than 2 officers
483
+
484
+ OPTIMAL STRATEGY:
485
+ Day 0: set_priority_mode → assign_capacity (if reserves > 0) → advance_time
486
+ Every day: request_missing_documents (ONE service, highest missing_docs) → advance_time
487
+ Final 5: escalate_service (urgent/breached only) → advance_time
488
+
489
+ RESPONSE FORMAT — return ONLY a raw JSON object, nothing else:
490
+ CORRECT: {"action_type": "advance_time"}
491
+ CORRECT: {"action_type": "request_missing_documents", "service": "driving_license"}
492
+ WRONG: ```json\n{"action_type": "ADVANCE_TIME"}```
493
+ """
494
+
495
+
496
+ # ══════════════════════════════════════════════════════════════════════════════
497
+ # SECTION 8 — JSON Extraction with Lowercase Normaliser
498
+ # ══════════════════════════════════════════════════════════════════════════════
499
+
500
+ def _extract_json_action(raw: str) -> dict[str, Any]:
501
+ cleaned = re.sub(r"```(?:json)?", "", raw).strip()
502
+ parsed: dict[str, Any] | None = None
503
+
504
+ try:
505
+ parsed = json.loads(cleaned)
506
+ except json.JSONDecodeError:
507
+ pass
508
+
509
+ if parsed is None:
510
+ match = re.search(r"\{[^{}]*\}", cleaned, re.DOTALL)
511
+ if match:
512
+ try:
513
+ parsed = json.loads(match.group())
514
+ except json.JSONDecodeError:
515
+ pass
516
+
517
+ if parsed is None:
518
+ print(f" ⚠ JSON parse failed, falling back to advance_time. Raw: {raw[:120]!r}")
519
+ return {"action_type": "advance_time"}
520
+
521
+ for enum_field in _ENUM_FIELDS:
522
+ if enum_field in parsed and isinstance(parsed[enum_field], str):
523
+ parsed[enum_field] = parsed[enum_field].lower()
524
+
525
+ return parsed
526
+
527
+
528
+ # ══════════════════════════════════════════════════════════════════════════════
529
+ # SECTION 9 — Observation → User Message Builder
530
+ # ══════════════════════════════════════════════════════════════════════════════
531
+
532
+ def _build_user_message(
533
+ obs: ObservationModel, step_num: int, cumulative_reward: float
534
+ ) -> str:
535
+ """
536
+ FIXED field names (Phase 2 canonical):
537
+ snap.total_pending ← was snap.active_cases
538
+ snap.blocked_missing_docs ← was snap.missing_docs_cases
539
+ """
540
+ queue_lines = []
541
+ for snap in obs.queue_snapshots:
542
+ officers = obs.officer_pool.allocations.get(snap.service, 0)
543
+ queue_lines.append(
544
+ f" {snap.service:<22}: "
545
+ f"backlog={snap.total_pending:>3} "
546
+ f"officers={officers} "
547
+ f"missing_docs={snap.blocked_missing_docs:>2} "
548
+ f"urgent={snap.urgent_cases} "
549
+ f"breached={snap.breached_cases} "
550
+ f"avg_age={snap.avg_age_days:.1f}d"
551
+ )
552
+ return (
553
+ f"STEP {step_num} | Day {obs.day}/{obs.max_days} "
554
+ f"| Days remaining: {obs.max_days - obs.day}\n"
555
+ f"Cumulative reward: {cumulative_reward:.2f}\n"
556
+ f"Priority mode: {obs.priority_mode}\n"
557
+ f"Reserve officers: {obs.officer_pool.reserve_officers}\n"
558
+ f"Escalation budget remaining: {obs.escalation_budget_remaining}\n"
559
+ f"Total pending: {obs.total_backlog} "
560
+ f"| Completed: {obs.total_completed} "
561
+ f"| SLA breaches: {obs.total_sla_breaches}\n"
562
+ f"Fairness gap: {obs.fairness_gap:.3f}\n\n"
563
+ f"QUEUE STATUS:\n" + "\n".join(queue_lines) + "\n\n"
564
+ f"Return a single JSON action object. All values lowercase."
565
+ )
566
+
567
+
568
+ # ════════════════════════════════════════════════════════════════��═════════════
569
+ # SECTION 10 — LLM Agent with Model Rotation
570
+ # ══════════════════════════════════════════════════════════════════════════════
571
+
572
+ class LLMAgent:
573
+ def __init__(
574
+ self,
575
+ task_id: str,
576
+ model_override: str | None = None,
577
+ api_key: str | None = None,
578
+ ) -> None:
579
+ try:
580
+ from openai import OpenAI
581
+ self._OpenAI = OpenAI
582
+ except ImportError:
583
+ raise ImportError("pip install openai — required for LLM agent")
584
+
585
+ resolved_key = api_key or os.environ.get("NVIDIA_API_KEY", "")
586
+ self._api_key_2 = os.environ.get("NVIDIA_API_KEY_2", "")
587
+
588
+ if not resolved_key:
589
+ raise ValueError(
590
+ "NVIDIA_API_KEY not set.\n"
591
+ " .env file : NVIDIA_API_KEY=nvapi-xxxxxxxxxxxx\n"
592
+ " Get free key: https://build.nvidia.com/explore/discover"
593
+ )
594
+
595
+ self._api_key = resolved_key
596
+ self._task_id = task_id
597
+ self._rotator = ModelRotator(task_id)
598
+
599
+ if model_override:
600
+ seq = [model_override] + [
601
+ m for m in self._rotator._sequence if m != model_override
602
+ ]
603
+ self._rotator._sequence = seq
604
+
605
+ self._client = self._OpenAI(base_url=NVIDIA_BASE_URL, api_key=self._api_key)
606
+ self._client_2 = (
607
+ self._OpenAI(base_url=NVIDIA_BASE_URL, api_key=self._api_key_2)
608
+ if self._api_key_2 else None
609
+ )
610
+ self._history: list[dict[str, str]] = []
611
+ self._cumulative_reward = 0.0
612
+
613
+ @property
614
+ def current_model(self) -> str:
615
+ return self._rotator.current
616
+
617
+ def reset(self) -> None:
618
+ self._history = []
619
+ self._cumulative_reward = 0.0
620
+ self._rotator = ModelRotator(self._task_id)
621
+
622
+ def update_reward(self, reward: float) -> None:
623
+ self._cumulative_reward += reward
624
+
625
+ def rotation_summary(self) -> list[dict]:
626
+ return self._rotator.summary()
627
+
628
+ def act(self, obs: ObservationModel, step_num: int) -> ActionModel:
629
+ if self._rotator.pool_exhausted:
630
+ print(" ⚠ Pool exhausted — returning advance_time")
631
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
632
+
633
+ user_message = _build_user_message(obs, step_num, self._cumulative_reward)
634
+ self._history.append({"role": "user", "content": user_message})
635
+
636
+ if len(self._history) > 20:
637
+ self._history = self._history[-20:]
638
+
639
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}] + self._history
640
+ raw_reply = ""
641
+
642
+ while True:
643
+ try:
644
+ active_client = self._client
645
+ if self._rotator.current_key_id == 2 and self._client_2:
646
+ active_client = self._client_2
647
+
648
+ response = active_client.chat.completions.create(
649
+ model=self._rotator.current,
650
+ messages=messages,
651
+ temperature=LLM_TEMPERATURE,
652
+ top_p=LLM_TOP_P,
653
+ max_tokens=LLM_MAX_TOKENS,
654
+ timeout=30,
655
+ )
656
+ raw_reply = response.choices.message.content or ""
657
+ break
658
+
659
+ except KeyboardInterrupt:
660
+ raise
661
+
662
+ except Exception as exc:
663
+ err_name = type(exc).__name__
664
+ err_msg = str(exc)[:120]
665
+ print(f" ⚠ {err_name} on {self._rotator.current.split('/')[-1]}: {err_msg}")
666
+ self._rotator.rotate(reason=err_name)
667
+ time.sleep(1.0)
668
+ if self._rotator.pool_exhausted:
669
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
670
+
671
+ self._history.append({"role": "assistant", "content": raw_reply})
672
+ action_dict = _extract_json_action(raw_reply)
673
+
674
+ try:
675
+ return ActionModel(**action_dict)
676
+ except Exception as exc:
677
+ print(f" ⚠ ActionModel parse failed ({exc}), using advance_time")
678
+ return ActionModel(action_type=ActionType.ADVANCE_TIME)
679
+
680
+
681
+ # ══════════════════════════════════════════════════════════════════════════════
682
+ # SECTION 11 — Episode Runner
683
+ # ══════════════════════════════════════════════════════════════════════════════
684
+
685
+ def run_episode(
686
+ task_id: str,
687
+ agent_type: str,
688
+ model_override: str | None,
689
+ mode: TransportMode,
690
+ server_url: str,
691
+ api_key: str | None,
692
+ verbose: bool,
693
+ max_steps: int = MAX_LLM_STEPS,
694
+ delay_override: float | None = None,
695
+ ) -> EpisodeResult:
696
+ seed = TASK_SEEDS.get(task_id, get_task(task_id).seed)
697
+ delay = delay_override if delay_override is not None else LLM_CALL_DELAY
698
+
699
+ force_fastapi = os.getenv("FORCE_FASTAPI_GATEWAY", "0").strip().lower() in {
700
+ "1",
701
+ "true",
702
+ "yes",
703
+ "on",
704
+ }
705
+ env_api_prefix = os.getenv("OPENENV_ENV_API_PREFIX", "").strip()
706
+ client = create_env_gateway(
707
+ task_id=task_id,
708
+ seed=seed,
709
+ mode=mode, # type: ignore[arg-type]
710
+ base_url=server_url,
711
+ api_prefix=env_api_prefix,
712
+ enforce_fastapi=force_fastapi,
713
+ )
714
+
715
+ if agent_type == "llm":
716
+ agent: HeuristicAgent | LLMAgent = LLMAgent(
717
+ task_id=task_id,
718
+ model_override=model_override,
719
+ api_key=api_key,
720
+ )
721
+ primary_label = agent.current_model
722
+ else:
723
+ agent = HeuristicAgent()
724
+ primary_label = "heuristic"
725
+
726
+ agent.reset()
727
+ obs = client.reset()
728
+
729
+ step_log: list[StepRecord] = []
730
+ total_reward = 0.0
731
+ total_invalid = 0
732
+ step_num = 0
733
+ start = time.perf_counter()
734
+
735
+ print(f"\n{'═'*65}")
736
+ print(f" Task : {task_id}")
737
+ if agent_type == "llm":
738
+ k1 = "✅ loaded" if os.environ.get("NVIDIA_API_KEY", "") else "❌ MISSING"
739
+ k2 = "✅ loaded" if os.environ.get("NVIDIA_API_KEY_2", "") else "⚠ not set"
740
+ print(f" KEY 1 : {k1} KEY 2 : {k2}")
741
+ pool_short = " → ".join(m.split("/")[-1][:14] for m in GLOBAL_MODEL_POOL)
742
+ print(f" Pool : {pool_short}")
743
+ resolved_mode = getattr(client, "transport", mode)
744
+ print(f" Agent : {agent_type} | Mode: {resolved_mode} | Seed: {seed}")
745
+ print(f" Max steps: {max_steps} | Delay: {delay}s")
746
+ print(f"{'═'*65}")
747
+
748
+ while not (client.terminated or client.truncated) and step_num < max_steps:
749
+ step_num += 1
750
+ current_model = agent.current_model
751
+
752
+ if agent_type == "llm":
753
+ action = agent.act(obs, step_num)
754
+ else:
755
+ action = agent.act(obs)
756
+
757
+ obs, reward, terminated, truncated, info = client.step(action)
758
+ agent.update_reward(reward)
759
+
760
+ total_reward += reward
761
+ if info.invalid_action:
762
+ total_invalid += 1
763
+
764
+ step_notes: list[str] = []
765
+ legacy_notes = getattr(info, "notes", None)
766
+ if isinstance(legacy_notes, list):
767
+ step_notes.extend(str(n).strip() for n in legacy_notes if str(n).strip())
768
+ elif isinstance(legacy_notes, str) and legacy_notes.strip():
769
+ step_notes.append(legacy_notes.strip())
770
+
771
+ if info.action_explanation.strip():
772
+ step_notes.append(info.action_explanation.strip())
773
+ step_notes.extend(s.strip() for s in info.effects_resolved_this_step if s.strip())
774
+ step_notes = list(dict.fromkeys(step_notes))
775
+
776
+ record = StepRecord(
777
+ step=step_num,
778
+ day=obs.day,
779
+ action_type=action.action_type.value,
780
+ reward=round(reward, 4),
781
+ invalid=info.invalid_action,
782
+ total_backlog=obs.total_backlog,
783
+ total_completed=obs.total_completed,
784
+ model_used=current_model,
785
+ notes=step_notes,
786
+ )
787
+ step_log.append(record)
788
+
789
+ if verbose:
790
+ status = "❌" if info.invalid_action else "✅"
791
+ model_tag = (
792
+ f"[{current_model.split('/')[-1][:22]}]"
793
+ if agent_type == "llm" else ""
794
+ )
795
+ print(
796
+ f" step={step_num:3d} day={obs.day:2d} "
797
+ f"action={action.action_type.value:<28} "
798
+ f"reward={reward:+.3f} {status} {model_tag}"
799
+ )
800
+ if step_notes:
801
+ print(f" notes: {step_notes}")
802
+
803
+ if agent_type == "llm":
804
+ actual_delay = delay + _random.uniform(-LLM_CALL_JITTER, LLM_CALL_JITTER)
805
+ if not verbose:
806
+ print(
807
+ f" Step {step_num}/{max_steps} — sleeping {actual_delay:.1f}s "
808
+ f"[{current_model.split('/')[-1][:20]}]",
809
+ end="\r", flush=True,
810
+ )
811
+ time.sleep(max(1.0, actual_delay))
812
+ if not verbose:
813
+ print(" " * 80, end="\r", flush=True)
814
+
815
+ score, grader_name, grader_metrics = client.grade()
816
+ elapsed = round(time.perf_counter() - start, 2)
817
+ rotations = agent.rotation_summary()
818
+
819
+ print(f"\n{'-'*65}")
820
+ print(f" SCORE : {score:.3f} / 1.000 (grader: {grader_name})")
821
+ print(f" Reward : {total_reward:.2f} | Steps: {step_num}")
822
+ print(f" Completed: {obs.total_completed} | SLA breaches: {obs.total_sla_breaches}")
823
+ print(f" Invalid actions: {total_invalid} | Model rotations: {len(rotations)}")
824
+ print(f" Time: {elapsed}s")
825
+ print(f" Grader metrics:")
826
+ for metric, value in grader_metrics.items():
827
+ bar = "█" * int(value * 20)
828
+ print(f" {metric:<34} {value:.3f} {bar}")
829
+ if rotations:
830
+ print(f" Rotation log:")
831
+ for r in rotations:
832
+ print(f" {r['from'].split('/')[-1]:<30} → rotated ({r['reason']})")
833
+ print(f"{'-'*65}")
834
+
835
+ return EpisodeResult(
836
+ task_id=task_id,
837
+ agent=agent_type,
838
+ primary_model=primary_label,
839
+ seed=seed,
840
+ score=score,
841
+ grader_name=grader_name,
842
+ total_steps=step_num,
843
+ total_reward=round(total_reward, 4),
844
+ total_completed=obs.total_completed,
845
+ total_sla_breaches=obs.total_sla_breaches,
846
+ total_invalid_actions=total_invalid,
847
+ final_day=obs.day,
848
+ terminated=client.terminated,
849
+ truncated=client.truncated,
850
+ grader_metrics=grader_metrics,
851
+ step_log=step_log,
852
+ elapsed_seconds=elapsed,
853
+ model_rotations=rotations,
854
+ )
855
+
856
+
857
+ # ══════════════════════════════════════════════════════════════════════════════
858
+ # SECTION 12 — Reporter
859
+ # ══════════════════════════════════════════════════════════════════════════════
860
+
861
+ def save_results(results: list[EpisodeResult], out_dir: Path) -> Path:
862
+ out_dir.mkdir(parents=True, exist_ok=True)
863
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
864
+ out_path = out_dir / f"baseline_run_{ts}.json"
865
+ payload = {
866
+ "run_timestamp": datetime.now().isoformat(),
867
+ "total_episodes": len(results),
868
+ "average_score": round(sum(r.score for r in results) / len(results), 4),
869
+ "model_pool": GLOBAL_MODEL_POOL,
870
+ "free_pool": FREE_POOL,
871
+ "episodes": [asdict(r) for r in results],
872
+ }
873
+ out_path.write_text(json.dumps(payload, indent=2))
874
+ return out_path
875
+
876
+
877
+ def print_leaderboard(results: list[EpisodeResult]) -> None:
878
+ print(f"\n{'═'*72}")
879
+ print(" LEADERBOARD")
880
+ print(f"{'═'*72}")
881
+ header = (
882
+ f" {'TASK':<32} {'MODEL':<24} {'SCORE':>7} "
883
+ f"{'REWARD':>8} {'DONE':>5} {'ROT':>4}"
884
+ )
885
+ print(header)
886
+ print(f" {'-'*32} {'-'*24} {'-'*7} {'-'*8} {'-'*5} {'-'*4}")
887
+ for r in sorted(results, key=lambda x: -x.score):
888
+ model_label = r.primary_model.split("/")[-1][:23]
889
+ print(
890
+ f" {r.task_id:<32} {model_label:<24} {r.score:>7.3f} "
891
+ f"{r.total_reward:>8.2f} {r.total_completed:>5} "
892
+ f"{len(r.model_rotations):>4}"
893
+ )
894
+ avg = sum(r.score for r in results) / len(results)
895
+ print(f" {'-'*32} {'-'*24} {'-'*7} {'-'*8} {'-'*5} {'-'*4}")
896
+ print(f" {'AVERAGE':<32} {'':<24} {avg:>7.3f}")
897
+ print(f"{'═'*72}\n")
898
+
899
+
900
+ # ══════════════════════════════════════════════════════════════════════════════
901
+ # SECTION 13 — CLI Entry Point
902
+ # ══════════════════════════════════════════════════════════════════════════════
903
+
904
+ def build_parser() -> argparse.ArgumentParser:
905
+ p = argparse.ArgumentParser(
906
+ description="Gov Workflow OpenEnv — Multi-Model Rotating LLM Baseline",
907
+ formatter_class=argparse.RawDescriptionHelpFormatter,
908
+ epilog="""
909
+ 10-model pool (April 2026):
910
+ llama-3.3-70b → deepseek-v4-flash → deepseek-r1 → nemotron-3-super →
911
+ qwen3.5-122b → deepseek-v3 → minimax-m2.7 → gemma-4-31b →
912
+ phi-4-mini → llama-3.1-8b
913
+
914
+ Examples:
915
+ python baseline_openai.py --agent heuristic --verbose
916
+ python baseline_openai.py --agent llm --task district_backlog_easy --verbose
917
+ python baseline_openai.py --agent llm --task all --save-results
918
+ python baseline_openai.py --agent llm --model deepseek-ai/deepseek-v4-flash
919
+ python baseline_openai.py --mode http --url http://localhost:7860 --agent llm
920
+ python baseline_openai.py --mode auto --url http://localhost:7860 --agent llm
921
+ """,
922
+ )
923
+ p.add_argument("--agent", choices=["llm", "heuristic"], default="heuristic")
924
+ p.add_argument("--task", choices=list_tasks() + ["all"], default="all")
925
+ p.add_argument("--model", default=None)
926
+ p.add_argument("--mode", choices=["direct", "http", "auto"], default="auto")
927
+ p.add_argument("--url", default="http://localhost:7860")
928
+ p.add_argument("--max-steps", type=int, default=MAX_LLM_STEPS)
929
+ p.add_argument("--delay", type=float, default=None)
930
+ p.add_argument("--api-key", default=None)
931
+ p.add_argument("--verbose", action="store_true")
932
+ p.add_argument("--save-results", action="store_true")
933
+ return p
934
+
935
+
936
+ def main() -> None:
937
+ args = build_parser().parse_args()
938
+ tasks = list_tasks() if args.task == "all" else [args.task]
939
+
940
+ print(f"\n{'═'*65}")
941
+ print(" Gov Workflow OpenEnv — Baseline Runner (April 2026)")
942
+ print(f" Agent : {args.agent.upper()}")
943
+ if args.agent == "llm":
944
+ pool_disp = " → ".join(m.split("/")[-1][:12] for m in GLOBAL_MODEL_POOL)
945
+ print(f" Pool : {pool_disp}")
946
+ print(f" Mode : {args.mode} | Tasks: {', '.join(tasks)}")
947
+ print(f"{'═'*65}")
948
+
949
+ if args.agent == "llm":
950
+ key = args.api_key or os.environ.get("NVIDIA_API_KEY", "")
951
+ if not key:
952
+ print("\n❌ NVIDIA_API_KEY not set.")
953
+ print(" .env file : NVIDIA_API_KEY=nvapi-xxxx")
954
+ print(" PowerShell : $env:NVIDIA_API_KEY='nvapi-xxxx'")
955
+ print(" Get free key: https://build.nvidia.com/explore/discover\n")
956
+ sys.exit(1)
957
+ else:
958
+ key = None
959
+
960
+ results: list[EpisodeResult] = []
961
+ for task_id in tasks:
962
+ result = run_episode(
963
+ task_id=task_id,
964
+ agent_type=args.agent,
965
+ model_override=args.model,
966
+ mode=args.mode,
967
+ server_url=args.url,
968
+ api_key=key,
969
+ verbose=args.verbose,
970
+ max_steps=args.max_steps,
971
+ delay_override=args.delay,
972
+ )
973
+ results.append(result)
974
+
975
+ print_leaderboard(results)
976
+
977
+ if args.save_results:
978
+ out = save_results(results, Path("results"))
979
+ print(f" Results saved → {out}\n")
980
+
981
+
982
+ if __name__ == "__main__":
983
+ main()
client.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Typed HTTP client for Gov Workflow OpenEnv.
3
+
4
+ This keeps a simple OpenEnv-style client interface:
5
+ reset() -> observation wrapper
6
+ step(action) -> step wrapper
7
+ state() -> state wrapper
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Any, TYPE_CHECKING
14
+
15
+ import requests
16
+ try:
17
+ from openenv.core import EnvClient
18
+ from openenv.core.env_client import StepResult
19
+ except ModuleNotFoundError:
20
+ EnvClient = None # type: ignore[assignment]
21
+ StepResult = None # type: ignore[assignment]
22
+
23
+ if TYPE_CHECKING:
24
+ from app.models import ActionModel, EpisodeStateModel, ObservationModel, StepInfoModel
25
+
26
+
27
+ @dataclass
28
+ class ClientStepResult:
29
+ observation: "ObservationModel"
30
+ reward: float
31
+ done: bool
32
+ terminated: bool
33
+ truncated: bool
34
+ info: "StepInfoModel"
35
+
36
+
37
+ class GovWorkflowClient:
38
+ """Small typed client for the FastAPI deployment."""
39
+
40
+ def __init__(self, base_url: str) -> None:
41
+ self.base_url = base_url.rstrip("/")
42
+ self.session_id: str | None = None
43
+
44
+ def _post(self, path: str, body: dict[str, Any]) -> dict[str, Any]:
45
+ response = requests.post(f"{self.base_url}{path}", json=body, timeout=30)
46
+ response.raise_for_status()
47
+ return response.json()
48
+
49
+ def reset(self, task_id: str = "district_backlog_easy", seed: int | None = None) -> "ObservationModel":
50
+ from app.models import ObservationModel
51
+
52
+ payload: dict[str, Any] = {"task_id": task_id}
53
+ if seed is not None:
54
+ payload["seed"] = seed
55
+ data = self._post("/reset", payload)
56
+ self.session_id = data["session_id"]
57
+ return ObservationModel(**data["observation"])
58
+
59
+ def step(self, action: "ActionModel") -> ClientStepResult:
60
+ from app.models import ObservationModel, StepInfoModel
61
+
62
+ if not self.session_id:
63
+ raise RuntimeError("Session not initialized. Call reset() first.")
64
+ data = self._post(
65
+ "/step",
66
+ {
67
+ "session_id": self.session_id,
68
+ "action": action.model_dump(exclude_none=True),
69
+ },
70
+ )
71
+ return ClientStepResult(
72
+ observation=ObservationModel(**data["observation"]),
73
+ reward=float(data["reward"]),
74
+ done=bool(data["done"]),
75
+ terminated=bool(data["terminated"]),
76
+ truncated=bool(data["truncated"]),
77
+ info=StepInfoModel(**data["info"]),
78
+ )
79
+
80
+ def state(self, include_action_history: bool = False) -> "EpisodeStateModel":
81
+ from app.models import EpisodeStateModel
82
+
83
+ if not self.session_id:
84
+ raise RuntimeError("Session not initialized. Call reset() first.")
85
+ data = self._post(
86
+ "/state",
87
+ {
88
+ "session_id": self.session_id,
89
+ "include_action_history": include_action_history,
90
+ },
91
+ )
92
+ return EpisodeStateModel(**data["state"])
93
+
94
+
95
+ if EnvClient is not None and StepResult is not None:
96
+ class GovWorkflowOpenEnvClient(
97
+ EnvClient["ActionModel", "ObservationModel", "EpisodeStateModel"]
98
+ ):
99
+ """
100
+ OpenEnv-native websocket client.
101
+
102
+ This class is additive and does not replace the existing HTTP client above.
103
+ """
104
+
105
+ def _step_payload(self, action: "ActionModel") -> dict[str, Any]:
106
+ return action.model_dump(exclude_none=True, mode="json")
107
+
108
+ def _parse_result(self, payload: dict[str, Any]) -> StepResult["ObservationModel"]:
109
+ from app.models import ObservationModel
110
+
111
+ observation_payload = payload.get("observation", {})
112
+ obs = ObservationModel(**observation_payload)
113
+ return StepResult(
114
+ observation=obs,
115
+ reward=payload.get("reward"),
116
+ done=bool(payload.get("done", False)),
117
+ )
118
+
119
+ def _parse_state(self, payload: dict[str, Any]) -> "EpisodeStateModel":
120
+ from app.models import EpisodeStateModel
121
+
122
+ state_payload = payload.get("state", payload)
123
+ return EpisodeStateModel(**state_payload)
124
+ else:
125
+ class GovWorkflowOpenEnvClient: # type: ignore[no-redef]
126
+ """
127
+ Placeholder when optional `openenv` package is unavailable.
128
+ """
129
+
130
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
131
+ raise ModuleNotFoundError(
132
+ "GovWorkflowOpenEnvClient requires the optional 'openenv' package. "
133
+ "Install it to use websocket OpenEnv client features."
134
+ )
docs/FRONTEND_WORKFLOW.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Frontend Workflow
2
+
3
+ The frontend is React-based, backend-driven, and served directly by FastAPI.
4
+
5
+ ## Access
6
+
7
+ - UI: `/ui`
8
+ - Assets: `/ui/assets/*`
9
+ - API namespace: `/api/*`
10
+
11
+ ## What Is Visible in UI
12
+
13
+ 1. OpenEnv API execution (`reset` / `step` / `state` / `grade`)
14
+ 2. Heuristic baseline agent runs (`/api/autostep`, `/api/benchmark`)
15
+ 3. Trained RL model execution (Phase 2/3 checkpoints via `/api/rl/run`)
16
+ 4. Trained RL evaluation across tasks (`/api/rl/evaluate`)
17
+ 5. Script-level workflow visibility for:
18
+ - `baseline_openai.py`
19
+ - `inference.py`
20
+
21
+ ## Frontend API Surface
22
+
23
+ - Core:
24
+ - `GET /api/health`
25
+ - `GET /api/tasks`
26
+ - `GET /api/agents`
27
+ - `POST /api/reset`
28
+ - `POST /api/step`
29
+ - `POST /api/state`
30
+ - `POST /api/grade`
31
+ - `GET /api/sessions`
32
+ - `DELETE /api/sessions/{session_id}`
33
+ - Baseline execution:
34
+ - `POST /api/autostep`
35
+ - `POST /api/benchmark`
36
+ - Workflow visibility:
37
+ - `GET /api/workflows/components`
38
+ - `POST /api/workflows/run`
39
+ - RL visibility/execution:
40
+ - `GET /api/rl/models`
41
+ - `POST /api/rl/run`
42
+ - `POST /api/rl/evaluate`
43
+
44
+ ## Deployment Notes
45
+
46
+ - No Node.js build is required for serving the current frontend.
47
+ - Backend startup remains `app.main:app`.
48
+ - Frontend does not call external LLM providers directly.
docs/PHASE2_IMPLEMENTATION.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phase 2 Implementation Notes
2
+
3
+ Phase 2 goal: Curriculum PPO across easy, medium, and hard tasks with deterministic evaluation discipline.
4
+
5
+ ## Implemented Components
6
+
7
+ - `rl/curriculum.py`
8
+ - `CurriculumScheduler` with staged task sampling:
9
+ - Stage 1 (0%-30%): easy only
10
+ - Stage 2 (30%-70%): easy + medium
11
+ - Stage 3 (70%-100%): all 3 tasks with configurable weights
12
+ - `rl/configs/curriculum.yaml`
13
+ - curriculum fractions and weights
14
+ - PPO hyperparameters for Phase 2
15
+ - `rl/train_ppo.py`
16
+ - `--phase 2` training path wired to curriculum scheduler
17
+ - default config path uses `rl/configs/curriculum.yaml`
18
+ - backward compatibility fallback to `rl/configs/ppo_curriculum.yaml`
19
+ - explicit CLI args: `--phase1-config`, `--phase2-config`
20
+ - `tests/test_curriculum.py`
21
+ - stage transitions
22
+ - stage-1 easy-only enforcement
23
+ - stage-3 all-task sampling
24
+ - deterministic task seed invariants
25
+
26
+ ## Operational Notes
27
+
28
+ - Existing 28-action design is preserved.
29
+ - Existing task IDs and grader logic are unchanged.
30
+ - No files were deleted as part of structure cleanup.
31
+
32
+ ## Commands (using existing .venv313)
33
+
34
+ - Train Phase 1:
35
+ - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 1 --timesteps 200000 --n-envs 4 --seed 42`
36
+ - Train Phase 2:
37
+ - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 2 --timesteps 500000 --n-envs 4 --seed 42 --phase2-config rl/configs/curriculum.yaml`
38
+ - Train Phase 2 (tuned continuation):
39
+ - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 2 --timesteps 300000 --n-envs 4 --seed 42 --phase2-config rl/configs/curriculum_tuned.yaml`
40
+ - Evaluate trained model:
41
+ - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/phase2_final.zip --episodes 3`
docs/PHASE3_IMPLEMENTATION.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phase 3 Implementation Notes
2
+
3
+ Phase 3 goal: Recurrent PPO (LSTM policy) to capture temporal dependencies such as SLA trend and escalation history.
4
+
5
+ ## Implemented Components
6
+
7
+ - `rl/train_recurrent.py`
8
+ - RecurrentPPO training with `MlpLstmPolicy`
9
+ - LSTM hidden size configurable (default 128)
10
+ - curriculum sampling retained (easy -> medium -> hard)
11
+ - optional transfer of compatible policy tensors from best Phase 2 checkpoint
12
+ - `rl/configs/recurrent.yaml`
13
+ - declarative recurrent training and curriculum settings
14
+ - `rl/evaluate.py`
15
+ - model loading modes: `auto`, `maskable`, `recurrent`
16
+ - recurrent inference path with LSTM state handling + action-mask sanitization
17
+ - helper `compare_recurrent_vs_flat(...)`
18
+ - `rl/callbacks.py`
19
+ - `RecurrentEvalCallback` for periodic grader-based checkpointing in Phase 3
20
+ - recurrent best checkpoints saved as `best_grader_recurrent_<task>.zip` (no collision with Phase 2 files)
21
+ - `rl/gym_wrapper.py`
22
+ - optional `hard_action_mask` mode (default off) for safe action execution
23
+ - `tests/test_rl_evaluate.py`
24
+ - recurrent hidden-state persistence
25
+ - LSTM reset behavior on episode boundary
26
+ - recurrent >= flat comparison utility check
27
+
28
+ ## Commands (using existing .venv313)
29
+
30
+ - Train Phase 3:
31
+ - `.\\.venv313\\Scripts\\python.exe -m rl.train_recurrent --timesteps 600000 --n-envs 4 --seed 42 --config rl/configs/recurrent.yaml`
32
+ - Train Phase 3-v2 (recommended tuning run):
33
+ - `.\\.venv313\\Scripts\\python.exe -m rl.train_recurrent --timesteps 700000 --n-envs 4 --seed 42 --config rl/configs/recurrent_v2.yaml`
34
+ - Evaluate Phase 3 model:
35
+ - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/phase3_final.zip --episodes 3 --model-type recurrent`
36
+ - Evaluate best recurrent checkpoint (saved during Phase 3 eval):
37
+ - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/best_grader_recurrent_mixed_urgency_medium.zip --episodes 3 --model-type recurrent`
38
+ - Compare recurrent vs flat on medium task:
39
+ - `.\\.venv313\\Scripts\\python.exe -c "from rl.evaluate import compare_recurrent_vs_flat; print(compare_recurrent_vs_flat('results/best_model/phase2_final.zip','results/best_model/phase3_final.zip'))"`
docs/PROJECT_STRUCTURE.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project Structure (Judge-Friendly)
2
+
3
+ This repository keeps runtime-critical files in their original paths for deployment safety.
4
+ No existing files were deleted.
5
+
6
+ ## Top-Level Layout
7
+
8
+ - `app/` - core environment logic and FastAPI server
9
+ - `app/web/` - deployed React frontend assets served by backend at `/ui`
10
+ - `frontend/` - frontend ownership docs and reserved source folder for future split components
11
+ - `rl/` - reinforcement-learning wrappers, training, evaluation, configs
12
+ - `tests/` - deterministic unit/integration test suites
13
+ - `scripts/` - operational scripts (local run, validation, benchmark ladder)
14
+ - `docs/` - judge-facing documentation and phase notes
15
+ - `openenv.yaml` - OpenEnv manifest
16
+ - `inference.py` - OpenEnv inference entrypoint
17
+ - `baseline_openai.py` - CLI baseline workflow
18
+ - `Dockerfile` - deployment image
19
+
20
+ ## Deployment-Critical Paths
21
+
22
+ - API app import path: `app.main:app`
23
+ - Frontend route: `/ui` (served from `app/web/index.html`)
24
+ - RL training entrypoint: `python -m rl.train_ppo`
25
+ - RL evaluation entrypoint: `python -m rl.evaluate`
26
+ - OpenEnv config: `openenv.yaml`
27
+
28
+ ## Phase Mapping
29
+
30
+ - Phase 1: `rl/feature_builder.py`, `rl/action_mask.py`, `rl/gym_wrapper.py`, `rl/train_ppo.py`
31
+ - Phase 2: `rl/curriculum.py`, `rl/configs/curriculum.yaml`, `tests/test_curriculum.py`
32
+ - Phase 3: `rl/train_recurrent.py`, `rl/configs/recurrent.yaml`, `tests/test_rl_evaluate.py`
33
+ - Phase 3+: reserved in existing `rl/` module structure
34
+
35
+ ## Judge Quick Navigation
36
+
37
+ 1. Environment behavior: `app/env.py`, `app/reward.py`, `app/graders.py`
38
+ 2. OpenEnv compliance + inference: `openenv.yaml`, `inference.py`
39
+ 3. Frontend behavior: `app/web/react_app.js`, `docs/FRONTEND_WORKFLOW.md`
40
+ 4. RL implementation: `rl/`
41
+ 5. Validation: `tests/`, `scripts/validate_env.py`, `scripts/validate-submission.sh`
examples/sample_actions.json ADDED
File without changes
examples/sample_observations.json ADDED
File without changes
frontend/README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # frontend/
2
+
3
+ Frontend ownership and structure.
4
+
5
+ - Source-managed React frontend lives in [frontend/react](C:/Users/siddh/OPENENV_RL/frontend/react).
6
+ - Built with Vite and served by FastAPI at `/ui`.
7
+ - UI is now module-based:
8
+ - `Overview`
9
+ - `Simulation Lab`
10
+ - `Training Studio`
11
+ - `Model Comparison`
12
+ - Backend APIs remain under `/api/*`.
13
+
14
+ Local frontend dev:
15
+
16
+ 1. Start backend:
17
+ - `.\.venv313\Scripts\python.exe scripts\run_local.py --host 0.0.0.0 --port 7860`
18
+ 2. Start Vite dev server:
19
+ - `cd frontend/react`
20
+ - `npm install`
21
+ - `npm run dev`
22
+ 3. Open:
23
+ - `http://localhost:5173`
24
+
25
+ Build for backend serving:
26
+
27
+ - `cd frontend/react`
28
+ - `npm run build`
29
+
30
+ Deployment path:
31
+
32
+ - UI route: `/ui`
33
+ - Asset route: `/ui/assets/*`
frontend/react/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ node_modules/
2
+ dist/
frontend/react/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # react/
2
+
3
+ Vite + React frontend for the Gov Workflow OpenEnv console.
4
+
5
+ Commands:
6
+
7
+ - `npm install`
8
+ - `npm run dev` (local dev on `http://localhost:5173`, proxies `/api` to `http://localhost:7860`)
9
+ - `npm run build` (production build for Docker/HF)
10
+ - `npm run preview`
11
+
12
+ If you see `ERR_CONNECTION_REFUSED` on `/api/*`:
13
+
14
+ - Start backend first on port `7860`
15
+ - Or set a custom dev proxy target:
16
+ - PowerShell: `$env:VITE_DEV_API_TARGET='http://127.0.0.1:7860'`
17
+ - Then run `npm run dev`
18
+
19
+ Modules:
20
+
21
+ - `Overview`: project and environment summary
22
+ - `Simulation Lab`: dynamic real-world workflow simulation (baseline / inference-like / trained RL)
23
+ - `Training Studio`: launch and monitor background RL training jobs
24
+ - `Model Comparison`: baseline vs trained model score comparison on the same task
frontend/react/index.html ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en" class="dark">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Gov Workflow OpenEnv Console</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;700;900&amp;family=Inter:wght@400;600;700&amp;display=swap" rel="stylesheet" />
8
+ <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&amp;display=swap" rel="stylesheet" />
9
+ </head>
10
+ <body>
11
+ <div id="app-root" class="app-root">
12
+ <div class="boot">Loading frontend...</div>
13
+ </div>
14
+ <script type="module" src="/src/main.jsx"></script>
15
+ </body>
16
+ </html>
frontend/react/package-lock.json ADDED
@@ -0,0 +1,2050 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "openenv-rl-frontend",
3
+ "version": "0.1.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "openenv-rl-frontend",
9
+ "version": "0.1.0",
10
+ "dependencies": {
11
+ "react": "^18.3.1",
12
+ "react-dom": "^18.3.1"
13
+ },
14
+ "devDependencies": {
15
+ "@vitejs/plugin-react": "^6.0.1",
16
+ "autoprefixer": "^10.5.0",
17
+ "postcss": "^8.5.10",
18
+ "tailwindcss": "^3.4.19",
19
+ "vite": "^8.0.7"
20
+ }
21
+ },
22
+ "node_modules/@alloc/quick-lru": {
23
+ "version": "5.2.0",
24
+ "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
25
+ "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==",
26
+ "dev": true,
27
+ "license": "MIT",
28
+ "engines": {
29
+ "node": ">=10"
30
+ },
31
+ "funding": {
32
+ "url": "https://github.com/sponsors/sindresorhus"
33
+ }
34
+ },
35
+ "node_modules/@emnapi/core": {
36
+ "version": "1.9.1",
37
+ "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.1.tgz",
38
+ "integrity": "sha512-mukuNALVsoix/w1BJwFzwXBN/dHeejQtuVzcDsfOEsdpCumXb/E9j8w11h5S54tT1xhifGfbbSm/ICrObRb3KA==",
39
+ "dev": true,
40
+ "license": "MIT",
41
+ "optional": true,
42
+ "dependencies": {
43
+ "@emnapi/wasi-threads": "1.2.0",
44
+ "tslib": "^2.4.0"
45
+ }
46
+ },
47
+ "node_modules/@emnapi/runtime": {
48
+ "version": "1.9.1",
49
+ "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.1.tgz",
50
+ "integrity": "sha512-VYi5+ZVLhpgK4hQ0TAjiQiZ6ol0oe4mBx7mVv7IflsiEp0OWoVsp/+f9Vc1hOhE0TtkORVrI1GvzyreqpgWtkA==",
51
+ "dev": true,
52
+ "license": "MIT",
53
+ "optional": true,
54
+ "dependencies": {
55
+ "tslib": "^2.4.0"
56
+ }
57
+ },
58
+ "node_modules/@emnapi/wasi-threads": {
59
+ "version": "1.2.0",
60
+ "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.0.tgz",
61
+ "integrity": "sha512-N10dEJNSsUx41Z6pZsXU8FjPjpBEplgH24sfkmITrBED1/U2Esum9F3lfLrMjKHHjmi557zQn7kR9R+XWXu5Rg==",
62
+ "dev": true,
63
+ "license": "MIT",
64
+ "optional": true,
65
+ "dependencies": {
66
+ "tslib": "^2.4.0"
67
+ }
68
+ },
69
+ "node_modules/@jridgewell/gen-mapping": {
70
+ "version": "0.3.13",
71
+ "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
72
+ "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==",
73
+ "dev": true,
74
+ "license": "MIT",
75
+ "dependencies": {
76
+ "@jridgewell/sourcemap-codec": "^1.5.0",
77
+ "@jridgewell/trace-mapping": "^0.3.24"
78
+ }
79
+ },
80
+ "node_modules/@jridgewell/resolve-uri": {
81
+ "version": "3.1.2",
82
+ "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
83
+ "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
84
+ "dev": true,
85
+ "license": "MIT",
86
+ "engines": {
87
+ "node": ">=6.0.0"
88
+ }
89
+ },
90
+ "node_modules/@jridgewell/sourcemap-codec": {
91
+ "version": "1.5.5",
92
+ "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
93
+ "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
94
+ "dev": true,
95
+ "license": "MIT"
96
+ },
97
+ "node_modules/@jridgewell/trace-mapping": {
98
+ "version": "0.3.31",
99
+ "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz",
100
+ "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==",
101
+ "dev": true,
102
+ "license": "MIT",
103
+ "dependencies": {
104
+ "@jridgewell/resolve-uri": "^3.1.0",
105
+ "@jridgewell/sourcemap-codec": "^1.4.14"
106
+ }
107
+ },
108
+ "node_modules/@napi-rs/wasm-runtime": {
109
+ "version": "1.1.2",
110
+ "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz",
111
+ "integrity": "sha512-sNXv5oLJ7ob93xkZ1XnxisYhGYXfaG9f65/ZgYuAu3qt7b3NadcOEhLvx28hv31PgX8SZJRYrAIPQilQmFpLVw==",
112
+ "dev": true,
113
+ "license": "MIT",
114
+ "optional": true,
115
+ "dependencies": {
116
+ "@tybys/wasm-util": "^0.10.1"
117
+ },
118
+ "funding": {
119
+ "type": "github",
120
+ "url": "https://github.com/sponsors/Brooooooklyn"
121
+ },
122
+ "peerDependencies": {
123
+ "@emnapi/core": "^1.7.1",
124
+ "@emnapi/runtime": "^1.7.1"
125
+ }
126
+ },
127
+ "node_modules/@nodelib/fs.scandir": {
128
+ "version": "2.1.5",
129
+ "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
130
+ "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==",
131
+ "dev": true,
132
+ "license": "MIT",
133
+ "dependencies": {
134
+ "@nodelib/fs.stat": "2.0.5",
135
+ "run-parallel": "^1.1.9"
136
+ },
137
+ "engines": {
138
+ "node": ">= 8"
139
+ }
140
+ },
141
+ "node_modules/@nodelib/fs.stat": {
142
+ "version": "2.0.5",
143
+ "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz",
144
+ "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==",
145
+ "dev": true,
146
+ "license": "MIT",
147
+ "engines": {
148
+ "node": ">= 8"
149
+ }
150
+ },
151
+ "node_modules/@nodelib/fs.walk": {
152
+ "version": "1.2.8",
153
+ "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz",
154
+ "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==",
155
+ "dev": true,
156
+ "license": "MIT",
157
+ "dependencies": {
158
+ "@nodelib/fs.scandir": "2.1.5",
159
+ "fastq": "^1.6.0"
160
+ },
161
+ "engines": {
162
+ "node": ">= 8"
163
+ }
164
+ },
165
+ "node_modules/@oxc-project/types": {
166
+ "version": "0.123.0",
167
+ "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.123.0.tgz",
168
+ "integrity": "sha512-YtECP/y8Mj1lSHiUWGSRzy/C6teUKlS87dEfuVKT09LgQbUsBW1rNg+MiJ4buGu3yuADV60gbIvo9/HplA56Ew==",
169
+ "dev": true,
170
+ "license": "MIT",
171
+ "funding": {
172
+ "url": "https://github.com/sponsors/Boshen"
173
+ }
174
+ },
175
+ "node_modules/@rolldown/binding-android-arm64": {
176
+ "version": "1.0.0-rc.13",
177
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.13.tgz",
178
+ "integrity": "sha512-5ZiiecKH2DXAVJTNN13gNMUcCDg4Jy8ZjbXEsPnqa248wgOVeYRX0iqXXD5Jz4bI9BFHgKsI2qmyJynstbmr+g==",
179
+ "cpu": [
180
+ "arm64"
181
+ ],
182
+ "dev": true,
183
+ "license": "MIT",
184
+ "optional": true,
185
+ "os": [
186
+ "android"
187
+ ],
188
+ "engines": {
189
+ "node": "^20.19.0 || >=22.12.0"
190
+ }
191
+ },
192
+ "node_modules/@rolldown/binding-darwin-arm64": {
193
+ "version": "1.0.0-rc.13",
194
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.13.tgz",
195
+ "integrity": "sha512-tz/v/8G77seu8zAB3A5sK3UFoOl06zcshEzhUO62sAEtrEuW/H1CcyoupOrD+NbQJytYgA4CppXPzlrmp4JZKA==",
196
+ "cpu": [
197
+ "arm64"
198
+ ],
199
+ "dev": true,
200
+ "license": "MIT",
201
+ "optional": true,
202
+ "os": [
203
+ "darwin"
204
+ ],
205
+ "engines": {
206
+ "node": "^20.19.0 || >=22.12.0"
207
+ }
208
+ },
209
+ "node_modules/@rolldown/binding-darwin-x64": {
210
+ "version": "1.0.0-rc.13",
211
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.13.tgz",
212
+ "integrity": "sha512-8DakphqOz8JrMYWTJmWA+vDJxut6LijZ8Xcdc4flOlAhU7PNVwo2MaWBF9iXjJAPo5rC/IxEFZDhJ3GC7NHvug==",
213
+ "cpu": [
214
+ "x64"
215
+ ],
216
+ "dev": true,
217
+ "license": "MIT",
218
+ "optional": true,
219
+ "os": [
220
+ "darwin"
221
+ ],
222
+ "engines": {
223
+ "node": "^20.19.0 || >=22.12.0"
224
+ }
225
+ },
226
+ "node_modules/@rolldown/binding-freebsd-x64": {
227
+ "version": "1.0.0-rc.13",
228
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.13.tgz",
229
+ "integrity": "sha512-4wBQFfjDuXYN/SVI8inBF3Aa+isq40rc6VMFbk5jcpolUBTe5cYnMsHZ51nFWsx3PVyyNN3vgoESki0Hmr/4BA==",
230
+ "cpu": [
231
+ "x64"
232
+ ],
233
+ "dev": true,
234
+ "license": "MIT",
235
+ "optional": true,
236
+ "os": [
237
+ "freebsd"
238
+ ],
239
+ "engines": {
240
+ "node": "^20.19.0 || >=22.12.0"
241
+ }
242
+ },
243
+ "node_modules/@rolldown/binding-linux-arm-gnueabihf": {
244
+ "version": "1.0.0-rc.13",
245
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.13.tgz",
246
+ "integrity": "sha512-JW/e4yPIXLms+jmnbwwy5LA/LxVwZUWLN8xug+V200wzaVi5TEGIWQlh8o91gWYFxW609euI98OCCemmWGuPrw==",
247
+ "cpu": [
248
+ "arm"
249
+ ],
250
+ "dev": true,
251
+ "license": "MIT",
252
+ "optional": true,
253
+ "os": [
254
+ "linux"
255
+ ],
256
+ "engines": {
257
+ "node": "^20.19.0 || >=22.12.0"
258
+ }
259
+ },
260
+ "node_modules/@rolldown/binding-linux-arm64-gnu": {
261
+ "version": "1.0.0-rc.13",
262
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.13.tgz",
263
+ "integrity": "sha512-ZfKWpXiUymDnavepCaM6KG/uGydJ4l2nBmMxg60Ci4CbeefpqjPWpfaZM7PThOhk2dssqBAcwLc6rAyr0uTdXg==",
264
+ "cpu": [
265
+ "arm64"
266
+ ],
267
+ "dev": true,
268
+ "license": "MIT",
269
+ "optional": true,
270
+ "os": [
271
+ "linux"
272
+ ],
273
+ "engines": {
274
+ "node": "^20.19.0 || >=22.12.0"
275
+ }
276
+ },
277
+ "node_modules/@rolldown/binding-linux-arm64-musl": {
278
+ "version": "1.0.0-rc.13",
279
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.13.tgz",
280
+ "integrity": "sha512-bmRg3O6Z0gq9yodKKWCIpnlH051sEfdVwt+6m5UDffAQMUUqU0xjnQqqAUm+Gu7ofAAly9DqiQDtKu2nPDEABA==",
281
+ "cpu": [
282
+ "arm64"
283
+ ],
284
+ "dev": true,
285
+ "license": "MIT",
286
+ "optional": true,
287
+ "os": [
288
+ "linux"
289
+ ],
290
+ "engines": {
291
+ "node": "^20.19.0 || >=22.12.0"
292
+ }
293
+ },
294
+ "node_modules/@rolldown/binding-linux-ppc64-gnu": {
295
+ "version": "1.0.0-rc.13",
296
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.13.tgz",
297
+ "integrity": "sha512-8Wtnbw4k7pMYN9B/mOEAsQ8HOiq7AZ31Ig4M9BKn2So4xRaFEhtCSa4ZJaOutOWq50zpgR4N5+L/opnlaCx8wQ==",
298
+ "cpu": [
299
+ "ppc64"
300
+ ],
301
+ "dev": true,
302
+ "license": "MIT",
303
+ "optional": true,
304
+ "os": [
305
+ "linux"
306
+ ],
307
+ "engines": {
308
+ "node": "^20.19.0 || >=22.12.0"
309
+ }
310
+ },
311
+ "node_modules/@rolldown/binding-linux-s390x-gnu": {
312
+ "version": "1.0.0-rc.13",
313
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.13.tgz",
314
+ "integrity": "sha512-D/0Nlo8mQuxSMohNJUF2lDXWRsFDsHldfRRgD9bRgktj+EndGPj4DOV37LqDKPYS+osdyhZEH7fTakTAEcW7qg==",
315
+ "cpu": [
316
+ "s390x"
317
+ ],
318
+ "dev": true,
319
+ "license": "MIT",
320
+ "optional": true,
321
+ "os": [
322
+ "linux"
323
+ ],
324
+ "engines": {
325
+ "node": "^20.19.0 || >=22.12.0"
326
+ }
327
+ },
328
+ "node_modules/@rolldown/binding-linux-x64-gnu": {
329
+ "version": "1.0.0-rc.13",
330
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.13.tgz",
331
+ "integrity": "sha512-eRrPvat2YaVQcwwKi/JzOP6MKf1WRnOCr+VaI3cTWz3ZoLcP/654z90lVCJ4dAuMEpPdke0n+qyAqXDZdIC4rA==",
332
+ "cpu": [
333
+ "x64"
334
+ ],
335
+ "dev": true,
336
+ "license": "MIT",
337
+ "optional": true,
338
+ "os": [
339
+ "linux"
340
+ ],
341
+ "engines": {
342
+ "node": "^20.19.0 || >=22.12.0"
343
+ }
344
+ },
345
+ "node_modules/@rolldown/binding-linux-x64-musl": {
346
+ "version": "1.0.0-rc.13",
347
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.13.tgz",
348
+ "integrity": "sha512-PsdONiFRp8hR8KgVjTWjZ9s7uA3uueWL0t74/cKHfM4dR5zXYv4AjB8BvA+QDToqxAFg4ZkcVEqeu5F7inoz5w==",
349
+ "cpu": [
350
+ "x64"
351
+ ],
352
+ "dev": true,
353
+ "license": "MIT",
354
+ "optional": true,
355
+ "os": [
356
+ "linux"
357
+ ],
358
+ "engines": {
359
+ "node": "^20.19.0 || >=22.12.0"
360
+ }
361
+ },
362
+ "node_modules/@rolldown/binding-openharmony-arm64": {
363
+ "version": "1.0.0-rc.13",
364
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.13.tgz",
365
+ "integrity": "sha512-hCNXgC5dI3TVOLrPT++PKFNZ+1EtS0mLQwfXXXSUD/+rGlB65gZDwN/IDuxLpQP4x8RYYHqGomlUXzpO8aVI2w==",
366
+ "cpu": [
367
+ "arm64"
368
+ ],
369
+ "dev": true,
370
+ "license": "MIT",
371
+ "optional": true,
372
+ "os": [
373
+ "openharmony"
374
+ ],
375
+ "engines": {
376
+ "node": "^20.19.0 || >=22.12.0"
377
+ }
378
+ },
379
+ "node_modules/@rolldown/binding-wasm32-wasi": {
380
+ "version": "1.0.0-rc.13",
381
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.13.tgz",
382
+ "integrity": "sha512-viLS5C5et8NFtLWw9Sw3M/w4vvnVkbWkO7wSNh3C+7G1+uCkGpr6PcjNDSFcNtmXY/4trjPBqUfcOL+P3sWy/g==",
383
+ "cpu": [
384
+ "wasm32"
385
+ ],
386
+ "dev": true,
387
+ "license": "MIT",
388
+ "optional": true,
389
+ "dependencies": {
390
+ "@emnapi/core": "1.9.1",
391
+ "@emnapi/runtime": "1.9.1",
392
+ "@napi-rs/wasm-runtime": "^1.1.2"
393
+ },
394
+ "engines": {
395
+ "node": ">=14.0.0"
396
+ }
397
+ },
398
+ "node_modules/@rolldown/binding-win32-arm64-msvc": {
399
+ "version": "1.0.0-rc.13",
400
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.13.tgz",
401
+ "integrity": "sha512-Fqa3Tlt1xL4wzmAYxGNFV36Hb+VfPc9PYU+E25DAnswXv3ODDu/yyWjQDbXMo5AGWkQVjLgQExuVu8I/UaZhPQ==",
402
+ "cpu": [
403
+ "arm64"
404
+ ],
405
+ "dev": true,
406
+ "license": "MIT",
407
+ "optional": true,
408
+ "os": [
409
+ "win32"
410
+ ],
411
+ "engines": {
412
+ "node": "^20.19.0 || >=22.12.0"
413
+ }
414
+ },
415
+ "node_modules/@rolldown/binding-win32-x64-msvc": {
416
+ "version": "1.0.0-rc.13",
417
+ "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.13.tgz",
418
+ "integrity": "sha512-/pLI5kPkGEi44TDlnbio3St/5gUFeN51YWNAk/Gnv6mEQBOahRBh52qVFVBpmrnU01n2yysvBML9Ynu7K4kGAQ==",
419
+ "cpu": [
420
+ "x64"
421
+ ],
422
+ "dev": true,
423
+ "license": "MIT",
424
+ "optional": true,
425
+ "os": [
426
+ "win32"
427
+ ],
428
+ "engines": {
429
+ "node": "^20.19.0 || >=22.12.0"
430
+ }
431
+ },
432
+ "node_modules/@rolldown/pluginutils": {
433
+ "version": "1.0.0-rc.7",
434
+ "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.7.tgz",
435
+ "integrity": "sha512-qujRfC8sFVInYSPPMLQByRh7zhwkGFS4+tyMQ83srV1qrxL4g8E2tyxVVyxd0+8QeBM1mIk9KbWxkegRr76XzA==",
436
+ "dev": true,
437
+ "license": "MIT"
438
+ },
439
+ "node_modules/@tybys/wasm-util": {
440
+ "version": "0.10.1",
441
+ "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz",
442
+ "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==",
443
+ "dev": true,
444
+ "license": "MIT",
445
+ "optional": true,
446
+ "dependencies": {
447
+ "tslib": "^2.4.0"
448
+ }
449
+ },
450
+ "node_modules/@vitejs/plugin-react": {
451
+ "version": "6.0.1",
452
+ "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-6.0.1.tgz",
453
+ "integrity": "sha512-l9X/E3cDb+xY3SWzlG1MOGt2usfEHGMNIaegaUGFsLkb3RCn/k8/TOXBcab+OndDI4TBtktT8/9BwwW8Vi9KUQ==",
454
+ "dev": true,
455
+ "license": "MIT",
456
+ "dependencies": {
457
+ "@rolldown/pluginutils": "1.0.0-rc.7"
458
+ },
459
+ "engines": {
460
+ "node": "^20.19.0 || >=22.12.0"
461
+ },
462
+ "peerDependencies": {
463
+ "@rolldown/plugin-babel": "^0.1.7 || ^0.2.0",
464
+ "babel-plugin-react-compiler": "^1.0.0",
465
+ "vite": "^8.0.0"
466
+ },
467
+ "peerDependenciesMeta": {
468
+ "@rolldown/plugin-babel": {
469
+ "optional": true
470
+ },
471
+ "babel-plugin-react-compiler": {
472
+ "optional": true
473
+ }
474
+ }
475
+ },
476
+ "node_modules/any-promise": {
477
+ "version": "1.3.0",
478
+ "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
479
+ "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
480
+ "dev": true,
481
+ "license": "MIT"
482
+ },
483
+ "node_modules/anymatch": {
484
+ "version": "3.1.3",
485
+ "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
486
+ "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
487
+ "dev": true,
488
+ "license": "ISC",
489
+ "dependencies": {
490
+ "normalize-path": "^3.0.0",
491
+ "picomatch": "^2.0.4"
492
+ },
493
+ "engines": {
494
+ "node": ">= 8"
495
+ }
496
+ },
497
+ "node_modules/anymatch/node_modules/picomatch": {
498
+ "version": "2.3.2",
499
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
500
+ "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
501
+ "dev": true,
502
+ "license": "MIT",
503
+ "engines": {
504
+ "node": ">=8.6"
505
+ },
506
+ "funding": {
507
+ "url": "https://github.com/sponsors/jonschlinkert"
508
+ }
509
+ },
510
+ "node_modules/arg": {
511
+ "version": "5.0.2",
512
+ "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz",
513
+ "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==",
514
+ "dev": true,
515
+ "license": "MIT"
516
+ },
517
+ "node_modules/autoprefixer": {
518
+ "version": "10.5.0",
519
+ "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.5.0.tgz",
520
+ "integrity": "sha512-FMhOoZV4+qR6aTUALKX2rEqGG+oyATvwBt9IIzVR5rMa2HRWPkxf+P+PAJLD1I/H5/II+HuZcBJYEFBpq39ong==",
521
+ "dev": true,
522
+ "funding": [
523
+ {
524
+ "type": "opencollective",
525
+ "url": "https://opencollective.com/postcss/"
526
+ },
527
+ {
528
+ "type": "tidelift",
529
+ "url": "https://tidelift.com/funding/github/npm/autoprefixer"
530
+ },
531
+ {
532
+ "type": "github",
533
+ "url": "https://github.com/sponsors/ai"
534
+ }
535
+ ],
536
+ "license": "MIT",
537
+ "dependencies": {
538
+ "browserslist": "^4.28.2",
539
+ "caniuse-lite": "^1.0.30001787",
540
+ "fraction.js": "^5.3.4",
541
+ "picocolors": "^1.1.1",
542
+ "postcss-value-parser": "^4.2.0"
543
+ },
544
+ "bin": {
545
+ "autoprefixer": "bin/autoprefixer"
546
+ },
547
+ "engines": {
548
+ "node": "^10 || ^12 || >=14"
549
+ },
550
+ "peerDependencies": {
551
+ "postcss": "^8.1.0"
552
+ }
553
+ },
554
+ "node_modules/baseline-browser-mapping": {
555
+ "version": "2.10.21",
556
+ "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.21.tgz",
557
+ "integrity": "sha512-Q+rUQ7Uz8AHM7DEaNdwvfFCTq7a43lNTzuS94eiWqwyxfV/wJv+oUivef51T91mmRY4d4A1u9rcSvkeufCVXlA==",
558
+ "dev": true,
559
+ "license": "Apache-2.0",
560
+ "bin": {
561
+ "baseline-browser-mapping": "dist/cli.cjs"
562
+ },
563
+ "engines": {
564
+ "node": ">=6.0.0"
565
+ }
566
+ },
567
+ "node_modules/binary-extensions": {
568
+ "version": "2.3.0",
569
+ "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
570
+ "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
571
+ "dev": true,
572
+ "license": "MIT",
573
+ "engines": {
574
+ "node": ">=8"
575
+ },
576
+ "funding": {
577
+ "url": "https://github.com/sponsors/sindresorhus"
578
+ }
579
+ },
580
+ "node_modules/braces": {
581
+ "version": "3.0.3",
582
+ "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
583
+ "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
584
+ "dev": true,
585
+ "license": "MIT",
586
+ "dependencies": {
587
+ "fill-range": "^7.1.1"
588
+ },
589
+ "engines": {
590
+ "node": ">=8"
591
+ }
592
+ },
593
+ "node_modules/browserslist": {
594
+ "version": "4.28.2",
595
+ "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz",
596
+ "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==",
597
+ "dev": true,
598
+ "funding": [
599
+ {
600
+ "type": "opencollective",
601
+ "url": "https://opencollective.com/browserslist"
602
+ },
603
+ {
604
+ "type": "tidelift",
605
+ "url": "https://tidelift.com/funding/github/npm/browserslist"
606
+ },
607
+ {
608
+ "type": "github",
609
+ "url": "https://github.com/sponsors/ai"
610
+ }
611
+ ],
612
+ "license": "MIT",
613
+ "dependencies": {
614
+ "baseline-browser-mapping": "^2.10.12",
615
+ "caniuse-lite": "^1.0.30001782",
616
+ "electron-to-chromium": "^1.5.328",
617
+ "node-releases": "^2.0.36",
618
+ "update-browserslist-db": "^1.2.3"
619
+ },
620
+ "bin": {
621
+ "browserslist": "cli.js"
622
+ },
623
+ "engines": {
624
+ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
625
+ }
626
+ },
627
+ "node_modules/camelcase-css": {
628
+ "version": "2.0.1",
629
+ "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz",
630
+ "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==",
631
+ "dev": true,
632
+ "license": "MIT",
633
+ "engines": {
634
+ "node": ">= 6"
635
+ }
636
+ },
637
+ "node_modules/caniuse-lite": {
638
+ "version": "1.0.30001790",
639
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001790.tgz",
640
+ "integrity": "sha512-bOoxfJPyYo+ds6W0YfptaCWbFnJYjh2Y1Eow5lRv+vI2u8ganPZqNm1JwNh0t2ELQCqIWg4B3dWEusgAmsoyOw==",
641
+ "dev": true,
642
+ "funding": [
643
+ {
644
+ "type": "opencollective",
645
+ "url": "https://opencollective.com/browserslist"
646
+ },
647
+ {
648
+ "type": "tidelift",
649
+ "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
650
+ },
651
+ {
652
+ "type": "github",
653
+ "url": "https://github.com/sponsors/ai"
654
+ }
655
+ ],
656
+ "license": "CC-BY-4.0"
657
+ },
658
+ "node_modules/chokidar": {
659
+ "version": "3.6.0",
660
+ "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
661
+ "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
662
+ "dev": true,
663
+ "license": "MIT",
664
+ "dependencies": {
665
+ "anymatch": "~3.1.2",
666
+ "braces": "~3.0.2",
667
+ "glob-parent": "~5.1.2",
668
+ "is-binary-path": "~2.1.0",
669
+ "is-glob": "~4.0.1",
670
+ "normalize-path": "~3.0.0",
671
+ "readdirp": "~3.6.0"
672
+ },
673
+ "engines": {
674
+ "node": ">= 8.10.0"
675
+ },
676
+ "funding": {
677
+ "url": "https://paulmillr.com/funding/"
678
+ },
679
+ "optionalDependencies": {
680
+ "fsevents": "~2.3.2"
681
+ }
682
+ },
683
+ "node_modules/chokidar/node_modules/glob-parent": {
684
+ "version": "5.1.2",
685
+ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
686
+ "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
687
+ "dev": true,
688
+ "license": "ISC",
689
+ "dependencies": {
690
+ "is-glob": "^4.0.1"
691
+ },
692
+ "engines": {
693
+ "node": ">= 6"
694
+ }
695
+ },
696
+ "node_modules/commander": {
697
+ "version": "4.1.1",
698
+ "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
699
+ "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==",
700
+ "dev": true,
701
+ "license": "MIT",
702
+ "engines": {
703
+ "node": ">= 6"
704
+ }
705
+ },
706
+ "node_modules/cssesc": {
707
+ "version": "3.0.0",
708
+ "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
709
+ "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==",
710
+ "dev": true,
711
+ "license": "MIT",
712
+ "bin": {
713
+ "cssesc": "bin/cssesc"
714
+ },
715
+ "engines": {
716
+ "node": ">=4"
717
+ }
718
+ },
719
+ "node_modules/detect-libc": {
720
+ "version": "2.1.2",
721
+ "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
722
+ "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
723
+ "dev": true,
724
+ "license": "Apache-2.0",
725
+ "engines": {
726
+ "node": ">=8"
727
+ }
728
+ },
729
+ "node_modules/didyoumean": {
730
+ "version": "1.2.2",
731
+ "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
732
+ "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==",
733
+ "dev": true,
734
+ "license": "Apache-2.0"
735
+ },
736
+ "node_modules/dlv": {
737
+ "version": "1.1.3",
738
+ "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz",
739
+ "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==",
740
+ "dev": true,
741
+ "license": "MIT"
742
+ },
743
+ "node_modules/electron-to-chromium": {
744
+ "version": "1.5.344",
745
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.344.tgz",
746
+ "integrity": "sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg==",
747
+ "dev": true,
748
+ "license": "ISC"
749
+ },
750
+ "node_modules/es-errors": {
751
+ "version": "1.3.0",
752
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
753
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
754
+ "dev": true,
755
+ "license": "MIT",
756
+ "engines": {
757
+ "node": ">= 0.4"
758
+ }
759
+ },
760
+ "node_modules/escalade": {
761
+ "version": "3.2.0",
762
+ "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
763
+ "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
764
+ "dev": true,
765
+ "license": "MIT",
766
+ "engines": {
767
+ "node": ">=6"
768
+ }
769
+ },
770
+ "node_modules/fast-glob": {
771
+ "version": "3.3.3",
772
+ "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
773
+ "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==",
774
+ "dev": true,
775
+ "license": "MIT",
776
+ "dependencies": {
777
+ "@nodelib/fs.stat": "^2.0.2",
778
+ "@nodelib/fs.walk": "^1.2.3",
779
+ "glob-parent": "^5.1.2",
780
+ "merge2": "^1.3.0",
781
+ "micromatch": "^4.0.8"
782
+ },
783
+ "engines": {
784
+ "node": ">=8.6.0"
785
+ }
786
+ },
787
+ "node_modules/fast-glob/node_modules/glob-parent": {
788
+ "version": "5.1.2",
789
+ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
790
+ "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
791
+ "dev": true,
792
+ "license": "ISC",
793
+ "dependencies": {
794
+ "is-glob": "^4.0.1"
795
+ },
796
+ "engines": {
797
+ "node": ">= 6"
798
+ }
799
+ },
800
+ "node_modules/fastq": {
801
+ "version": "1.20.1",
802
+ "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz",
803
+ "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==",
804
+ "dev": true,
805
+ "license": "ISC",
806
+ "dependencies": {
807
+ "reusify": "^1.0.4"
808
+ }
809
+ },
810
+ "node_modules/fdir": {
811
+ "version": "6.5.0",
812
+ "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
813
+ "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
814
+ "dev": true,
815
+ "license": "MIT",
816
+ "engines": {
817
+ "node": ">=12.0.0"
818
+ },
819
+ "peerDependencies": {
820
+ "picomatch": "^3 || ^4"
821
+ },
822
+ "peerDependenciesMeta": {
823
+ "picomatch": {
824
+ "optional": true
825
+ }
826
+ }
827
+ },
828
+ "node_modules/fill-range": {
829
+ "version": "7.1.1",
830
+ "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
831
+ "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
832
+ "dev": true,
833
+ "license": "MIT",
834
+ "dependencies": {
835
+ "to-regex-range": "^5.0.1"
836
+ },
837
+ "engines": {
838
+ "node": ">=8"
839
+ }
840
+ },
841
+ "node_modules/fraction.js": {
842
+ "version": "5.3.4",
843
+ "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
844
+ "integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==",
845
+ "dev": true,
846
+ "license": "MIT",
847
+ "engines": {
848
+ "node": "*"
849
+ },
850
+ "funding": {
851
+ "type": "github",
852
+ "url": "https://github.com/sponsors/rawify"
853
+ }
854
+ },
855
+ "node_modules/fsevents": {
856
+ "version": "2.3.3",
857
+ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
858
+ "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
859
+ "dev": true,
860
+ "hasInstallScript": true,
861
+ "license": "MIT",
862
+ "optional": true,
863
+ "os": [
864
+ "darwin"
865
+ ],
866
+ "engines": {
867
+ "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
868
+ }
869
+ },
870
+ "node_modules/function-bind": {
871
+ "version": "1.1.2",
872
+ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
873
+ "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
874
+ "dev": true,
875
+ "license": "MIT",
876
+ "funding": {
877
+ "url": "https://github.com/sponsors/ljharb"
878
+ }
879
+ },
880
+ "node_modules/glob-parent": {
881
+ "version": "6.0.2",
882
+ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
883
+ "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
884
+ "dev": true,
885
+ "license": "ISC",
886
+ "dependencies": {
887
+ "is-glob": "^4.0.3"
888
+ },
889
+ "engines": {
890
+ "node": ">=10.13.0"
891
+ }
892
+ },
893
+ "node_modules/hasown": {
894
+ "version": "2.0.3",
895
+ "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
896
+ "integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
897
+ "dev": true,
898
+ "license": "MIT",
899
+ "dependencies": {
900
+ "function-bind": "^1.1.2"
901
+ },
902
+ "engines": {
903
+ "node": ">= 0.4"
904
+ }
905
+ },
906
+ "node_modules/is-binary-path": {
907
+ "version": "2.1.0",
908
+ "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
909
+ "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==",
910
+ "dev": true,
911
+ "license": "MIT",
912
+ "dependencies": {
913
+ "binary-extensions": "^2.0.0"
914
+ },
915
+ "engines": {
916
+ "node": ">=8"
917
+ }
918
+ },
919
+ "node_modules/is-core-module": {
920
+ "version": "2.16.1",
921
+ "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz",
922
+ "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==",
923
+ "dev": true,
924
+ "license": "MIT",
925
+ "dependencies": {
926
+ "hasown": "^2.0.2"
927
+ },
928
+ "engines": {
929
+ "node": ">= 0.4"
930
+ },
931
+ "funding": {
932
+ "url": "https://github.com/sponsors/ljharb"
933
+ }
934
+ },
935
+ "node_modules/is-extglob": {
936
+ "version": "2.1.1",
937
+ "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
938
+ "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
939
+ "dev": true,
940
+ "license": "MIT",
941
+ "engines": {
942
+ "node": ">=0.10.0"
943
+ }
944
+ },
945
+ "node_modules/is-glob": {
946
+ "version": "4.0.3",
947
+ "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
948
+ "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
949
+ "dev": true,
950
+ "license": "MIT",
951
+ "dependencies": {
952
+ "is-extglob": "^2.1.1"
953
+ },
954
+ "engines": {
955
+ "node": ">=0.10.0"
956
+ }
957
+ },
958
+ "node_modules/is-number": {
959
+ "version": "7.0.0",
960
+ "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
961
+ "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
962
+ "dev": true,
963
+ "license": "MIT",
964
+ "engines": {
965
+ "node": ">=0.12.0"
966
+ }
967
+ },
968
+ "node_modules/jiti": {
969
+ "version": "1.21.7",
970
+ "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz",
971
+ "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
972
+ "dev": true,
973
+ "license": "MIT",
974
+ "bin": {
975
+ "jiti": "bin/jiti.js"
976
+ }
977
+ },
978
+ "node_modules/js-tokens": {
979
+ "version": "4.0.0",
980
+ "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
981
+ "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
982
+ "license": "MIT"
983
+ },
984
+ "node_modules/lightningcss": {
985
+ "version": "1.32.0",
986
+ "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz",
987
+ "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==",
988
+ "dev": true,
989
+ "license": "MPL-2.0",
990
+ "dependencies": {
991
+ "detect-libc": "^2.0.3"
992
+ },
993
+ "engines": {
994
+ "node": ">= 12.0.0"
995
+ },
996
+ "funding": {
997
+ "type": "opencollective",
998
+ "url": "https://opencollective.com/parcel"
999
+ },
1000
+ "optionalDependencies": {
1001
+ "lightningcss-android-arm64": "1.32.0",
1002
+ "lightningcss-darwin-arm64": "1.32.0",
1003
+ "lightningcss-darwin-x64": "1.32.0",
1004
+ "lightningcss-freebsd-x64": "1.32.0",
1005
+ "lightningcss-linux-arm-gnueabihf": "1.32.0",
1006
+ "lightningcss-linux-arm64-gnu": "1.32.0",
1007
+ "lightningcss-linux-arm64-musl": "1.32.0",
1008
+ "lightningcss-linux-x64-gnu": "1.32.0",
1009
+ "lightningcss-linux-x64-musl": "1.32.0",
1010
+ "lightningcss-win32-arm64-msvc": "1.32.0",
1011
+ "lightningcss-win32-x64-msvc": "1.32.0"
1012
+ }
1013
+ },
1014
+ "node_modules/lightningcss-android-arm64": {
1015
+ "version": "1.32.0",
1016
+ "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz",
1017
+ "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==",
1018
+ "cpu": [
1019
+ "arm64"
1020
+ ],
1021
+ "dev": true,
1022
+ "license": "MPL-2.0",
1023
+ "optional": true,
1024
+ "os": [
1025
+ "android"
1026
+ ],
1027
+ "engines": {
1028
+ "node": ">= 12.0.0"
1029
+ },
1030
+ "funding": {
1031
+ "type": "opencollective",
1032
+ "url": "https://opencollective.com/parcel"
1033
+ }
1034
+ },
1035
+ "node_modules/lightningcss-darwin-arm64": {
1036
+ "version": "1.32.0",
1037
+ "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz",
1038
+ "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==",
1039
+ "cpu": [
1040
+ "arm64"
1041
+ ],
1042
+ "dev": true,
1043
+ "license": "MPL-2.0",
1044
+ "optional": true,
1045
+ "os": [
1046
+ "darwin"
1047
+ ],
1048
+ "engines": {
1049
+ "node": ">= 12.0.0"
1050
+ },
1051
+ "funding": {
1052
+ "type": "opencollective",
1053
+ "url": "https://opencollective.com/parcel"
1054
+ }
1055
+ },
1056
+ "node_modules/lightningcss-darwin-x64": {
1057
+ "version": "1.32.0",
1058
+ "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz",
1059
+ "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==",
1060
+ "cpu": [
1061
+ "x64"
1062
+ ],
1063
+ "dev": true,
1064
+ "license": "MPL-2.0",
1065
+ "optional": true,
1066
+ "os": [
1067
+ "darwin"
1068
+ ],
1069
+ "engines": {
1070
+ "node": ">= 12.0.0"
1071
+ },
1072
+ "funding": {
1073
+ "type": "opencollective",
1074
+ "url": "https://opencollective.com/parcel"
1075
+ }
1076
+ },
1077
+ "node_modules/lightningcss-freebsd-x64": {
1078
+ "version": "1.32.0",
1079
+ "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz",
1080
+ "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==",
1081
+ "cpu": [
1082
+ "x64"
1083
+ ],
1084
+ "dev": true,
1085
+ "license": "MPL-2.0",
1086
+ "optional": true,
1087
+ "os": [
1088
+ "freebsd"
1089
+ ],
1090
+ "engines": {
1091
+ "node": ">= 12.0.0"
1092
+ },
1093
+ "funding": {
1094
+ "type": "opencollective",
1095
+ "url": "https://opencollective.com/parcel"
1096
+ }
1097
+ },
1098
+ "node_modules/lightningcss-linux-arm-gnueabihf": {
1099
+ "version": "1.32.0",
1100
+ "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz",
1101
+ "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==",
1102
+ "cpu": [
1103
+ "arm"
1104
+ ],
1105
+ "dev": true,
1106
+ "license": "MPL-2.0",
1107
+ "optional": true,
1108
+ "os": [
1109
+ "linux"
1110
+ ],
1111
+ "engines": {
1112
+ "node": ">= 12.0.0"
1113
+ },
1114
+ "funding": {
1115
+ "type": "opencollective",
1116
+ "url": "https://opencollective.com/parcel"
1117
+ }
1118
+ },
1119
+ "node_modules/lightningcss-linux-arm64-gnu": {
1120
+ "version": "1.32.0",
1121
+ "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz",
1122
+ "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==",
1123
+ "cpu": [
1124
+ "arm64"
1125
+ ],
1126
+ "dev": true,
1127
+ "license": "MPL-2.0",
1128
+ "optional": true,
1129
+ "os": [
1130
+ "linux"
1131
+ ],
1132
+ "engines": {
1133
+ "node": ">= 12.0.0"
1134
+ },
1135
+ "funding": {
1136
+ "type": "opencollective",
1137
+ "url": "https://opencollective.com/parcel"
1138
+ }
1139
+ },
1140
+ "node_modules/lightningcss-linux-arm64-musl": {
1141
+ "version": "1.32.0",
1142
+ "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz",
1143
+ "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==",
1144
+ "cpu": [
1145
+ "arm64"
1146
+ ],
1147
+ "dev": true,
1148
+ "license": "MPL-2.0",
1149
+ "optional": true,
1150
+ "os": [
1151
+ "linux"
1152
+ ],
1153
+ "engines": {
1154
+ "node": ">= 12.0.0"
1155
+ },
1156
+ "funding": {
1157
+ "type": "opencollective",
1158
+ "url": "https://opencollective.com/parcel"
1159
+ }
1160
+ },
1161
+ "node_modules/lightningcss-linux-x64-gnu": {
1162
+ "version": "1.32.0",
1163
+ "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz",
1164
+ "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==",
1165
+ "cpu": [
1166
+ "x64"
1167
+ ],
1168
+ "dev": true,
1169
+ "license": "MPL-2.0",
1170
+ "optional": true,
1171
+ "os": [
1172
+ "linux"
1173
+ ],
1174
+ "engines": {
1175
+ "node": ">= 12.0.0"
1176
+ },
1177
+ "funding": {
1178
+ "type": "opencollective",
1179
+ "url": "https://opencollective.com/parcel"
1180
+ }
1181
+ },
1182
+ "node_modules/lightningcss-linux-x64-musl": {
1183
+ "version": "1.32.0",
1184
+ "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz",
1185
+ "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==",
1186
+ "cpu": [
1187
+ "x64"
1188
+ ],
1189
+ "dev": true,
1190
+ "license": "MPL-2.0",
1191
+ "optional": true,
1192
+ "os": [
1193
+ "linux"
1194
+ ],
1195
+ "engines": {
1196
+ "node": ">= 12.0.0"
1197
+ },
1198
+ "funding": {
1199
+ "type": "opencollective",
1200
+ "url": "https://opencollective.com/parcel"
1201
+ }
1202
+ },
1203
+ "node_modules/lightningcss-win32-arm64-msvc": {
1204
+ "version": "1.32.0",
1205
+ "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz",
1206
+ "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==",
1207
+ "cpu": [
1208
+ "arm64"
1209
+ ],
1210
+ "dev": true,
1211
+ "license": "MPL-2.0",
1212
+ "optional": true,
1213
+ "os": [
1214
+ "win32"
1215
+ ],
1216
+ "engines": {
1217
+ "node": ">= 12.0.0"
1218
+ },
1219
+ "funding": {
1220
+ "type": "opencollective",
1221
+ "url": "https://opencollective.com/parcel"
1222
+ }
1223
+ },
1224
+ "node_modules/lightningcss-win32-x64-msvc": {
1225
+ "version": "1.32.0",
1226
+ "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz",
1227
+ "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==",
1228
+ "cpu": [
1229
+ "x64"
1230
+ ],
1231
+ "dev": true,
1232
+ "license": "MPL-2.0",
1233
+ "optional": true,
1234
+ "os": [
1235
+ "win32"
1236
+ ],
1237
+ "engines": {
1238
+ "node": ">= 12.0.0"
1239
+ },
1240
+ "funding": {
1241
+ "type": "opencollective",
1242
+ "url": "https://opencollective.com/parcel"
1243
+ }
1244
+ },
1245
+ "node_modules/lilconfig": {
1246
+ "version": "3.1.3",
1247
+ "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
1248
+ "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
1249
+ "dev": true,
1250
+ "license": "MIT",
1251
+ "engines": {
1252
+ "node": ">=14"
1253
+ },
1254
+ "funding": {
1255
+ "url": "https://github.com/sponsors/antonk52"
1256
+ }
1257
+ },
1258
+ "node_modules/lines-and-columns": {
1259
+ "version": "1.2.4",
1260
+ "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
1261
+ "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
1262
+ "dev": true,
1263
+ "license": "MIT"
1264
+ },
1265
+ "node_modules/loose-envify": {
1266
+ "version": "1.4.0",
1267
+ "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
1268
+ "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
1269
+ "license": "MIT",
1270
+ "dependencies": {
1271
+ "js-tokens": "^3.0.0 || ^4.0.0"
1272
+ },
1273
+ "bin": {
1274
+ "loose-envify": "cli.js"
1275
+ }
1276
+ },
1277
+ "node_modules/merge2": {
1278
+ "version": "1.4.1",
1279
+ "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
1280
+ "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==",
1281
+ "dev": true,
1282
+ "license": "MIT",
1283
+ "engines": {
1284
+ "node": ">= 8"
1285
+ }
1286
+ },
1287
+ "node_modules/micromatch": {
1288
+ "version": "4.0.8",
1289
+ "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
1290
+ "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
1291
+ "dev": true,
1292
+ "license": "MIT",
1293
+ "dependencies": {
1294
+ "braces": "^3.0.3",
1295
+ "picomatch": "^2.3.1"
1296
+ },
1297
+ "engines": {
1298
+ "node": ">=8.6"
1299
+ }
1300
+ },
1301
+ "node_modules/micromatch/node_modules/picomatch": {
1302
+ "version": "2.3.2",
1303
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
1304
+ "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
1305
+ "dev": true,
1306
+ "license": "MIT",
1307
+ "engines": {
1308
+ "node": ">=8.6"
1309
+ },
1310
+ "funding": {
1311
+ "url": "https://github.com/sponsors/jonschlinkert"
1312
+ }
1313
+ },
1314
+ "node_modules/mz": {
1315
+ "version": "2.7.0",
1316
+ "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz",
1317
+ "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==",
1318
+ "dev": true,
1319
+ "license": "MIT",
1320
+ "dependencies": {
1321
+ "any-promise": "^1.0.0",
1322
+ "object-assign": "^4.0.1",
1323
+ "thenify-all": "^1.0.0"
1324
+ }
1325
+ },
1326
+ "node_modules/nanoid": {
1327
+ "version": "3.3.11",
1328
+ "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
1329
+ "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
1330
+ "dev": true,
1331
+ "funding": [
1332
+ {
1333
+ "type": "github",
1334
+ "url": "https://github.com/sponsors/ai"
1335
+ }
1336
+ ],
1337
+ "license": "MIT",
1338
+ "bin": {
1339
+ "nanoid": "bin/nanoid.cjs"
1340
+ },
1341
+ "engines": {
1342
+ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
1343
+ }
1344
+ },
1345
+ "node_modules/node-releases": {
1346
+ "version": "2.0.38",
1347
+ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz",
1348
+ "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==",
1349
+ "dev": true,
1350
+ "license": "MIT"
1351
+ },
1352
+ "node_modules/normalize-path": {
1353
+ "version": "3.0.0",
1354
+ "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
1355
+ "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
1356
+ "dev": true,
1357
+ "license": "MIT",
1358
+ "engines": {
1359
+ "node": ">=0.10.0"
1360
+ }
1361
+ },
1362
+ "node_modules/object-assign": {
1363
+ "version": "4.1.1",
1364
+ "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
1365
+ "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
1366
+ "dev": true,
1367
+ "license": "MIT",
1368
+ "engines": {
1369
+ "node": ">=0.10.0"
1370
+ }
1371
+ },
1372
+ "node_modules/object-hash": {
1373
+ "version": "3.0.0",
1374
+ "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz",
1375
+ "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==",
1376
+ "dev": true,
1377
+ "license": "MIT",
1378
+ "engines": {
1379
+ "node": ">= 6"
1380
+ }
1381
+ },
1382
+ "node_modules/path-parse": {
1383
+ "version": "1.0.7",
1384
+ "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
1385
+ "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
1386
+ "dev": true,
1387
+ "license": "MIT"
1388
+ },
1389
+ "node_modules/picocolors": {
1390
+ "version": "1.1.1",
1391
+ "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
1392
+ "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
1393
+ "dev": true,
1394
+ "license": "ISC"
1395
+ },
1396
+ "node_modules/picomatch": {
1397
+ "version": "4.0.4",
1398
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
1399
+ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
1400
+ "dev": true,
1401
+ "license": "MIT",
1402
+ "engines": {
1403
+ "node": ">=12"
1404
+ },
1405
+ "funding": {
1406
+ "url": "https://github.com/sponsors/jonschlinkert"
1407
+ }
1408
+ },
1409
+ "node_modules/pify": {
1410
+ "version": "2.3.0",
1411
+ "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz",
1412
+ "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==",
1413
+ "dev": true,
1414
+ "license": "MIT",
1415
+ "engines": {
1416
+ "node": ">=0.10.0"
1417
+ }
1418
+ },
1419
+ "node_modules/pirates": {
1420
+ "version": "4.0.7",
1421
+ "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz",
1422
+ "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==",
1423
+ "dev": true,
1424
+ "license": "MIT",
1425
+ "engines": {
1426
+ "node": ">= 6"
1427
+ }
1428
+ },
1429
+ "node_modules/postcss": {
1430
+ "version": "8.5.10",
1431
+ "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz",
1432
+ "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==",
1433
+ "dev": true,
1434
+ "funding": [
1435
+ {
1436
+ "type": "opencollective",
1437
+ "url": "https://opencollective.com/postcss/"
1438
+ },
1439
+ {
1440
+ "type": "tidelift",
1441
+ "url": "https://tidelift.com/funding/github/npm/postcss"
1442
+ },
1443
+ {
1444
+ "type": "github",
1445
+ "url": "https://github.com/sponsors/ai"
1446
+ }
1447
+ ],
1448
+ "license": "MIT",
1449
+ "dependencies": {
1450
+ "nanoid": "^3.3.11",
1451
+ "picocolors": "^1.1.1",
1452
+ "source-map-js": "^1.2.1"
1453
+ },
1454
+ "engines": {
1455
+ "node": "^10 || ^12 || >=14"
1456
+ }
1457
+ },
1458
+ "node_modules/postcss-import": {
1459
+ "version": "15.1.0",
1460
+ "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz",
1461
+ "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==",
1462
+ "dev": true,
1463
+ "license": "MIT",
1464
+ "dependencies": {
1465
+ "postcss-value-parser": "^4.0.0",
1466
+ "read-cache": "^1.0.0",
1467
+ "resolve": "^1.1.7"
1468
+ },
1469
+ "engines": {
1470
+ "node": ">=14.0.0"
1471
+ },
1472
+ "peerDependencies": {
1473
+ "postcss": "^8.0.0"
1474
+ }
1475
+ },
1476
+ "node_modules/postcss-js": {
1477
+ "version": "4.1.0",
1478
+ "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.1.0.tgz",
1479
+ "integrity": "sha512-oIAOTqgIo7q2EOwbhb8UalYePMvYoIeRY2YKntdpFQXNosSu3vLrniGgmH9OKs/qAkfoj5oB3le/7mINW1LCfw==",
1480
+ "dev": true,
1481
+ "funding": [
1482
+ {
1483
+ "type": "opencollective",
1484
+ "url": "https://opencollective.com/postcss/"
1485
+ },
1486
+ {
1487
+ "type": "github",
1488
+ "url": "https://github.com/sponsors/ai"
1489
+ }
1490
+ ],
1491
+ "license": "MIT",
1492
+ "dependencies": {
1493
+ "camelcase-css": "^2.0.1"
1494
+ },
1495
+ "engines": {
1496
+ "node": "^12 || ^14 || >= 16"
1497
+ },
1498
+ "peerDependencies": {
1499
+ "postcss": "^8.4.21"
1500
+ }
1501
+ },
1502
+ "node_modules/postcss-load-config": {
1503
+ "version": "6.0.1",
1504
+ "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz",
1505
+ "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==",
1506
+ "dev": true,
1507
+ "funding": [
1508
+ {
1509
+ "type": "opencollective",
1510
+ "url": "https://opencollective.com/postcss/"
1511
+ },
1512
+ {
1513
+ "type": "github",
1514
+ "url": "https://github.com/sponsors/ai"
1515
+ }
1516
+ ],
1517
+ "license": "MIT",
1518
+ "dependencies": {
1519
+ "lilconfig": "^3.1.1"
1520
+ },
1521
+ "engines": {
1522
+ "node": ">= 18"
1523
+ },
1524
+ "peerDependencies": {
1525
+ "jiti": ">=1.21.0",
1526
+ "postcss": ">=8.0.9",
1527
+ "tsx": "^4.8.1",
1528
+ "yaml": "^2.4.2"
1529
+ },
1530
+ "peerDependenciesMeta": {
1531
+ "jiti": {
1532
+ "optional": true
1533
+ },
1534
+ "postcss": {
1535
+ "optional": true
1536
+ },
1537
+ "tsx": {
1538
+ "optional": true
1539
+ },
1540
+ "yaml": {
1541
+ "optional": true
1542
+ }
1543
+ }
1544
+ },
1545
+ "node_modules/postcss-nested": {
1546
+ "version": "6.2.0",
1547
+ "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
1548
+ "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==",
1549
+ "dev": true,
1550
+ "funding": [
1551
+ {
1552
+ "type": "opencollective",
1553
+ "url": "https://opencollective.com/postcss/"
1554
+ },
1555
+ {
1556
+ "type": "github",
1557
+ "url": "https://github.com/sponsors/ai"
1558
+ }
1559
+ ],
1560
+ "license": "MIT",
1561
+ "dependencies": {
1562
+ "postcss-selector-parser": "^6.1.1"
1563
+ },
1564
+ "engines": {
1565
+ "node": ">=12.0"
1566
+ },
1567
+ "peerDependencies": {
1568
+ "postcss": "^8.2.14"
1569
+ }
1570
+ },
1571
+ "node_modules/postcss-selector-parser": {
1572
+ "version": "6.1.2",
1573
+ "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz",
1574
+ "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==",
1575
+ "dev": true,
1576
+ "license": "MIT",
1577
+ "dependencies": {
1578
+ "cssesc": "^3.0.0",
1579
+ "util-deprecate": "^1.0.2"
1580
+ },
1581
+ "engines": {
1582
+ "node": ">=4"
1583
+ }
1584
+ },
1585
+ "node_modules/postcss-value-parser": {
1586
+ "version": "4.2.0",
1587
+ "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
1588
+ "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
1589
+ "dev": true,
1590
+ "license": "MIT"
1591
+ },
1592
+ "node_modules/queue-microtask": {
1593
+ "version": "1.2.3",
1594
+ "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
1595
+ "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==",
1596
+ "dev": true,
1597
+ "funding": [
1598
+ {
1599
+ "type": "github",
1600
+ "url": "https://github.com/sponsors/feross"
1601
+ },
1602
+ {
1603
+ "type": "patreon",
1604
+ "url": "https://www.patreon.com/feross"
1605
+ },
1606
+ {
1607
+ "type": "consulting",
1608
+ "url": "https://feross.org/support"
1609
+ }
1610
+ ],
1611
+ "license": "MIT"
1612
+ },
1613
+ "node_modules/react": {
1614
+ "version": "18.3.1",
1615
+ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
1616
+ "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
1617
+ "license": "MIT",
1618
+ "dependencies": {
1619
+ "loose-envify": "^1.1.0"
1620
+ },
1621
+ "engines": {
1622
+ "node": ">=0.10.0"
1623
+ }
1624
+ },
1625
+ "node_modules/react-dom": {
1626
+ "version": "18.3.1",
1627
+ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
1628
+ "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
1629
+ "license": "MIT",
1630
+ "dependencies": {
1631
+ "loose-envify": "^1.1.0",
1632
+ "scheduler": "^0.23.2"
1633
+ },
1634
+ "peerDependencies": {
1635
+ "react": "^18.3.1"
1636
+ }
1637
+ },
1638
+ "node_modules/read-cache": {
1639
+ "version": "1.0.0",
1640
+ "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
1641
+ "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==",
1642
+ "dev": true,
1643
+ "license": "MIT",
1644
+ "dependencies": {
1645
+ "pify": "^2.3.0"
1646
+ }
1647
+ },
1648
+ "node_modules/readdirp": {
1649
+ "version": "3.6.0",
1650
+ "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
1651
+ "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
1652
+ "dev": true,
1653
+ "license": "MIT",
1654
+ "dependencies": {
1655
+ "picomatch": "^2.2.1"
1656
+ },
1657
+ "engines": {
1658
+ "node": ">=8.10.0"
1659
+ }
1660
+ },
1661
+ "node_modules/readdirp/node_modules/picomatch": {
1662
+ "version": "2.3.2",
1663
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
1664
+ "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
1665
+ "dev": true,
1666
+ "license": "MIT",
1667
+ "engines": {
1668
+ "node": ">=8.6"
1669
+ },
1670
+ "funding": {
1671
+ "url": "https://github.com/sponsors/jonschlinkert"
1672
+ }
1673
+ },
1674
+ "node_modules/resolve": {
1675
+ "version": "1.22.12",
1676
+ "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.12.tgz",
1677
+ "integrity": "sha512-TyeJ1zif53BPfHootBGwPRYT1RUt6oGWsaQr8UyZW/eAm9bKoijtvruSDEmZHm92CwS9nj7/fWttqPCgzep8CA==",
1678
+ "dev": true,
1679
+ "license": "MIT",
1680
+ "dependencies": {
1681
+ "es-errors": "^1.3.0",
1682
+ "is-core-module": "^2.16.1",
1683
+ "path-parse": "^1.0.7",
1684
+ "supports-preserve-symlinks-flag": "^1.0.0"
1685
+ },
1686
+ "bin": {
1687
+ "resolve": "bin/resolve"
1688
+ },
1689
+ "engines": {
1690
+ "node": ">= 0.4"
1691
+ },
1692
+ "funding": {
1693
+ "url": "https://github.com/sponsors/ljharb"
1694
+ }
1695
+ },
1696
+ "node_modules/reusify": {
1697
+ "version": "1.1.0",
1698
+ "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz",
1699
+ "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==",
1700
+ "dev": true,
1701
+ "license": "MIT",
1702
+ "engines": {
1703
+ "iojs": ">=1.0.0",
1704
+ "node": ">=0.10.0"
1705
+ }
1706
+ },
1707
+ "node_modules/rolldown": {
1708
+ "version": "1.0.0-rc.13",
1709
+ "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.13.tgz",
1710
+ "integrity": "sha512-bvVj8YJmf0rq4pSFmH7laLa6pYrhghv3PRzrCdRAr23g66zOKVJ4wkvFtgohtPLWmthgg8/rkaqRHrpUEh0Zbw==",
1711
+ "dev": true,
1712
+ "license": "MIT",
1713
+ "dependencies": {
1714
+ "@oxc-project/types": "=0.123.0",
1715
+ "@rolldown/pluginutils": "1.0.0-rc.13"
1716
+ },
1717
+ "bin": {
1718
+ "rolldown": "bin/cli.mjs"
1719
+ },
1720
+ "engines": {
1721
+ "node": "^20.19.0 || >=22.12.0"
1722
+ },
1723
+ "optionalDependencies": {
1724
+ "@rolldown/binding-android-arm64": "1.0.0-rc.13",
1725
+ "@rolldown/binding-darwin-arm64": "1.0.0-rc.13",
1726
+ "@rolldown/binding-darwin-x64": "1.0.0-rc.13",
1727
+ "@rolldown/binding-freebsd-x64": "1.0.0-rc.13",
1728
+ "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.13",
1729
+ "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.13",
1730
+ "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.13",
1731
+ "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.13",
1732
+ "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.13",
1733
+ "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.13",
1734
+ "@rolldown/binding-linux-x64-musl": "1.0.0-rc.13",
1735
+ "@rolldown/binding-openharmony-arm64": "1.0.0-rc.13",
1736
+ "@rolldown/binding-wasm32-wasi": "1.0.0-rc.13",
1737
+ "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.13",
1738
+ "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.13"
1739
+ }
1740
+ },
1741
+ "node_modules/rolldown/node_modules/@rolldown/pluginutils": {
1742
+ "version": "1.0.0-rc.13",
1743
+ "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.13.tgz",
1744
+ "integrity": "sha512-3ngTAv6F/Py35BsYbeeLeecvhMKdsKm4AoOETVhAA+Qc8nrA2I0kF7oa93mE9qnIurngOSpMnQ0x2nQY2FPviA==",
1745
+ "dev": true,
1746
+ "license": "MIT"
1747
+ },
1748
+ "node_modules/run-parallel": {
1749
+ "version": "1.2.0",
1750
+ "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
1751
+ "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==",
1752
+ "dev": true,
1753
+ "funding": [
1754
+ {
1755
+ "type": "github",
1756
+ "url": "https://github.com/sponsors/feross"
1757
+ },
1758
+ {
1759
+ "type": "patreon",
1760
+ "url": "https://www.patreon.com/feross"
1761
+ },
1762
+ {
1763
+ "type": "consulting",
1764
+ "url": "https://feross.org/support"
1765
+ }
1766
+ ],
1767
+ "license": "MIT",
1768
+ "dependencies": {
1769
+ "queue-microtask": "^1.2.2"
1770
+ }
1771
+ },
1772
+ "node_modules/scheduler": {
1773
+ "version": "0.23.2",
1774
+ "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
1775
+ "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==",
1776
+ "license": "MIT",
1777
+ "dependencies": {
1778
+ "loose-envify": "^1.1.0"
1779
+ }
1780
+ },
1781
+ "node_modules/source-map-js": {
1782
+ "version": "1.2.1",
1783
+ "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
1784
+ "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
1785
+ "dev": true,
1786
+ "license": "BSD-3-Clause",
1787
+ "engines": {
1788
+ "node": ">=0.10.0"
1789
+ }
1790
+ },
1791
+ "node_modules/sucrase": {
1792
+ "version": "3.35.1",
1793
+ "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz",
1794
+ "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==",
1795
+ "dev": true,
1796
+ "license": "MIT",
1797
+ "dependencies": {
1798
+ "@jridgewell/gen-mapping": "^0.3.2",
1799
+ "commander": "^4.0.0",
1800
+ "lines-and-columns": "^1.1.6",
1801
+ "mz": "^2.7.0",
1802
+ "pirates": "^4.0.1",
1803
+ "tinyglobby": "^0.2.11",
1804
+ "ts-interface-checker": "^0.1.9"
1805
+ },
1806
+ "bin": {
1807
+ "sucrase": "bin/sucrase",
1808
+ "sucrase-node": "bin/sucrase-node"
1809
+ },
1810
+ "engines": {
1811
+ "node": ">=16 || 14 >=14.17"
1812
+ }
1813
+ },
1814
+ "node_modules/supports-preserve-symlinks-flag": {
1815
+ "version": "1.0.0",
1816
+ "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
1817
+ "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
1818
+ "dev": true,
1819
+ "license": "MIT",
1820
+ "engines": {
1821
+ "node": ">= 0.4"
1822
+ },
1823
+ "funding": {
1824
+ "url": "https://github.com/sponsors/ljharb"
1825
+ }
1826
+ },
1827
+ "node_modules/tailwindcss": {
1828
+ "version": "3.4.19",
1829
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz",
1830
+ "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==",
1831
+ "dev": true,
1832
+ "license": "MIT",
1833
+ "dependencies": {
1834
+ "@alloc/quick-lru": "^5.2.0",
1835
+ "arg": "^5.0.2",
1836
+ "chokidar": "^3.6.0",
1837
+ "didyoumean": "^1.2.2",
1838
+ "dlv": "^1.1.3",
1839
+ "fast-glob": "^3.3.2",
1840
+ "glob-parent": "^6.0.2",
1841
+ "is-glob": "^4.0.3",
1842
+ "jiti": "^1.21.7",
1843
+ "lilconfig": "^3.1.3",
1844
+ "micromatch": "^4.0.8",
1845
+ "normalize-path": "^3.0.0",
1846
+ "object-hash": "^3.0.0",
1847
+ "picocolors": "^1.1.1",
1848
+ "postcss": "^8.4.47",
1849
+ "postcss-import": "^15.1.0",
1850
+ "postcss-js": "^4.0.1",
1851
+ "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0",
1852
+ "postcss-nested": "^6.2.0",
1853
+ "postcss-selector-parser": "^6.1.2",
1854
+ "resolve": "^1.22.8",
1855
+ "sucrase": "^3.35.0"
1856
+ },
1857
+ "bin": {
1858
+ "tailwind": "lib/cli.js",
1859
+ "tailwindcss": "lib/cli.js"
1860
+ },
1861
+ "engines": {
1862
+ "node": ">=14.0.0"
1863
+ }
1864
+ },
1865
+ "node_modules/thenify": {
1866
+ "version": "3.3.1",
1867
+ "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
1868
+ "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==",
1869
+ "dev": true,
1870
+ "license": "MIT",
1871
+ "dependencies": {
1872
+ "any-promise": "^1.0.0"
1873
+ }
1874
+ },
1875
+ "node_modules/thenify-all": {
1876
+ "version": "1.6.0",
1877
+ "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz",
1878
+ "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==",
1879
+ "dev": true,
1880
+ "license": "MIT",
1881
+ "dependencies": {
1882
+ "thenify": ">= 3.1.0 < 4"
1883
+ },
1884
+ "engines": {
1885
+ "node": ">=0.8"
1886
+ }
1887
+ },
1888
+ "node_modules/tinyglobby": {
1889
+ "version": "0.2.16",
1890
+ "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
1891
+ "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
1892
+ "dev": true,
1893
+ "license": "MIT",
1894
+ "dependencies": {
1895
+ "fdir": "^6.5.0",
1896
+ "picomatch": "^4.0.4"
1897
+ },
1898
+ "engines": {
1899
+ "node": ">=12.0.0"
1900
+ },
1901
+ "funding": {
1902
+ "url": "https://github.com/sponsors/SuperchupuDev"
1903
+ }
1904
+ },
1905
+ "node_modules/to-regex-range": {
1906
+ "version": "5.0.1",
1907
+ "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
1908
+ "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
1909
+ "dev": true,
1910
+ "license": "MIT",
1911
+ "dependencies": {
1912
+ "is-number": "^7.0.0"
1913
+ },
1914
+ "engines": {
1915
+ "node": ">=8.0"
1916
+ }
1917
+ },
1918
+ "node_modules/ts-interface-checker": {
1919
+ "version": "0.1.13",
1920
+ "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz",
1921
+ "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==",
1922
+ "dev": true,
1923
+ "license": "Apache-2.0"
1924
+ },
1925
+ "node_modules/tslib": {
1926
+ "version": "2.8.1",
1927
+ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
1928
+ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
1929
+ "dev": true,
1930
+ "license": "0BSD",
1931
+ "optional": true
1932
+ },
1933
+ "node_modules/update-browserslist-db": {
1934
+ "version": "1.2.3",
1935
+ "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
1936
+ "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==",
1937
+ "dev": true,
1938
+ "funding": [
1939
+ {
1940
+ "type": "opencollective",
1941
+ "url": "https://opencollective.com/browserslist"
1942
+ },
1943
+ {
1944
+ "type": "tidelift",
1945
+ "url": "https://tidelift.com/funding/github/npm/browserslist"
1946
+ },
1947
+ {
1948
+ "type": "github",
1949
+ "url": "https://github.com/sponsors/ai"
1950
+ }
1951
+ ],
1952
+ "license": "MIT",
1953
+ "dependencies": {
1954
+ "escalade": "^3.2.0",
1955
+ "picocolors": "^1.1.1"
1956
+ },
1957
+ "bin": {
1958
+ "update-browserslist-db": "cli.js"
1959
+ },
1960
+ "peerDependencies": {
1961
+ "browserslist": ">= 4.21.0"
1962
+ }
1963
+ },
1964
+ "node_modules/util-deprecate": {
1965
+ "version": "1.0.2",
1966
+ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
1967
+ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
1968
+ "dev": true,
1969
+ "license": "MIT"
1970
+ },
1971
+ "node_modules/vite": {
1972
+ "version": "8.0.7",
1973
+ "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.7.tgz",
1974
+ "integrity": "sha512-P1PbweD+2/udplnThz3btF4cf6AgPky7kk23RtHUkJIU5BIxwPprhRGmOAHs6FTI7UiGbTNrgNP6jSYD6JaRnw==",
1975
+ "dev": true,
1976
+ "license": "MIT",
1977
+ "dependencies": {
1978
+ "lightningcss": "^1.32.0",
1979
+ "picomatch": "^4.0.4",
1980
+ "postcss": "^8.5.8",
1981
+ "rolldown": "1.0.0-rc.13",
1982
+ "tinyglobby": "^0.2.15"
1983
+ },
1984
+ "bin": {
1985
+ "vite": "bin/vite.js"
1986
+ },
1987
+ "engines": {
1988
+ "node": "^20.19.0 || >=22.12.0"
1989
+ },
1990
+ "funding": {
1991
+ "url": "https://github.com/vitejs/vite?sponsor=1"
1992
+ },
1993
+ "optionalDependencies": {
1994
+ "fsevents": "~2.3.3"
1995
+ },
1996
+ "peerDependencies": {
1997
+ "@types/node": "^20.19.0 || >=22.12.0",
1998
+ "@vitejs/devtools": "^0.1.0",
1999
+ "esbuild": "^0.27.0 || ^0.28.0",
2000
+ "jiti": ">=1.21.0",
2001
+ "less": "^4.0.0",
2002
+ "sass": "^1.70.0",
2003
+ "sass-embedded": "^1.70.0",
2004
+ "stylus": ">=0.54.8",
2005
+ "sugarss": "^5.0.0",
2006
+ "terser": "^5.16.0",
2007
+ "tsx": "^4.8.1",
2008
+ "yaml": "^2.4.2"
2009
+ },
2010
+ "peerDependenciesMeta": {
2011
+ "@types/node": {
2012
+ "optional": true
2013
+ },
2014
+ "@vitejs/devtools": {
2015
+ "optional": true
2016
+ },
2017
+ "esbuild": {
2018
+ "optional": true
2019
+ },
2020
+ "jiti": {
2021
+ "optional": true
2022
+ },
2023
+ "less": {
2024
+ "optional": true
2025
+ },
2026
+ "sass": {
2027
+ "optional": true
2028
+ },
2029
+ "sass-embedded": {
2030
+ "optional": true
2031
+ },
2032
+ "stylus": {
2033
+ "optional": true
2034
+ },
2035
+ "sugarss": {
2036
+ "optional": true
2037
+ },
2038
+ "terser": {
2039
+ "optional": true
2040
+ },
2041
+ "tsx": {
2042
+ "optional": true
2043
+ },
2044
+ "yaml": {
2045
+ "optional": true
2046
+ }
2047
+ }
2048
+ }
2049
+ }
2050
+ }
frontend/react/package.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "openenv-rl-frontend",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite --configLoader native || vite",
8
+ "build": "vite build --configLoader native || vite build",
9
+ "preview": "vite preview --configLoader native --host 0.0.0.0 --port 4173 || vite preview --host 0.0.0.0 --port 4173"
10
+ },
11
+ "dependencies": {
12
+ "react": "^18.3.1",
13
+ "react-dom": "^18.3.1"
14
+ },
15
+ "devDependencies": {
16
+ "@vitejs/plugin-react": "^6.0.1",
17
+ "autoprefixer": "^10.5.0",
18
+ "postcss": "^8.5.10",
19
+ "tailwindcss": "^3.4.19",
20
+ "vite": "^8.0.7"
21
+ }
22
+ }
frontend/react/postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ export default {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ }
frontend/react/src/App.jsx ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect } from "react";
2
+ import { api } from "./api/client";
3
+ import { Dashboard } from "./components/story-ui/Dashboard";
4
+
5
+ export default function App() {
6
+ const [tasks, setTasks] = useState([]);
7
+
8
+ useEffect(() => {
9
+ const boot = async () => {
10
+ try {
11
+ const taskRes = await api("/tasks");
12
+ setTasks(taskRes.tasks || []);
13
+ } catch (err) {
14
+ console.error("Failed to load tasks", err);
15
+ }
16
+ };
17
+ boot();
18
+ }, []);
19
+
20
+ return <Dashboard tasks={tasks} />;
21
+ }
frontend/react/src/api/client.js ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const DEFAULT_LOCAL_API = "http://127.0.0.1:7860";
2
+ const LOCAL_PORTS = ["7860"];
3
+ const LOCAL_HOSTS = ["127.0.0.1", "localhost"];
4
+
5
+ function candidates(path) {
6
+ const urls = [];
7
+ const rootOnlyPaths = path === "/rl/models";
8
+ const compatNoApiPaths =
9
+ path.startsWith("/simulation/") ||
10
+ path.startsWith("/training/") ||
11
+ path.startsWith("/rl/") ||
12
+ path.startsWith("/openenv/") ||
13
+ path.startsWith("/benchmark") ||
14
+ path.startsWith("/history/");
15
+
16
+ let isLocalDev5173 = false;
17
+ if (typeof window !== "undefined") {
18
+ const host = window.location.hostname;
19
+ const isLocal = host === "localhost" || host === "127.0.0.1";
20
+ isLocalDev5173 = isLocal && window.location.port === "5173";
21
+ }
22
+
23
+ // Training story endpoints are mounted at /training/* (not /api/training/*).
24
+ // Avoid known-bad prefixes first to prevent noisy 404 logs in browser console.
25
+ if (path.startsWith("/training/")) {
26
+ if (isLocalDev5173) {
27
+ for (const port of LOCAL_PORTS) {
28
+ for (const lh of LOCAL_HOSTS) {
29
+ urls.push(`http://${lh}:${port}${path}`);
30
+ }
31
+ }
32
+ } else {
33
+ urls.push(path);
34
+ }
35
+ return [...new Set(urls)];
36
+ }
37
+
38
+ if (isLocalDev5173) {
39
+ // For local dev, prefer direct backend URLs first to avoid noisy Vite proxy
40
+ // connection-refused spam when backend is temporarily down.
41
+ for (const port of LOCAL_PORTS) {
42
+ for (const lh of LOCAL_HOSTS) {
43
+ if (rootOnlyPaths) {
44
+ urls.push(`http://${lh}:${port}${path}`);
45
+ } else {
46
+ urls.push(`http://${lh}:${port}/api${path}`);
47
+ urls.push(`http://${lh}:${port}/api/v1${path}`);
48
+ if (compatNoApiPaths) {
49
+ urls.push(`http://${lh}:${port}${path}`);
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
55
+
56
+ if (rootOnlyPaths) {
57
+ urls.push(path);
58
+ } else {
59
+ urls.push(`/api${path}`, `/api/v1${path}`);
60
+ if (compatNoApiPaths) {
61
+ urls.push(path);
62
+ }
63
+ }
64
+
65
+ if (isLocalDev5173 && !rootOnlyPaths) {
66
+ for (const port of LOCAL_PORTS) {
67
+ for (const lh of LOCAL_HOSTS) {
68
+ // keep original ordering as fallback candidates
69
+ urls.push(`http://${lh}:${port}/api${path}`);
70
+ urls.push(`http://${lh}:${port}/api/v1${path}`);
71
+ }
72
+ }
73
+ }
74
+
75
+ return [...new Set(urls)];
76
+ }
77
+
78
+ export async function api(path, options = {}) {
79
+ const method = String(options.method || "GET").toUpperCase();
80
+ const headers = { ...(options.headers || {}) };
81
+ if (method !== "GET" && method !== "HEAD" && !("Content-Type" in headers)) {
82
+ headers["Content-Type"] = "application/json";
83
+ }
84
+ const requestOptions = {
85
+ ...options,
86
+ method,
87
+ headers,
88
+ };
89
+ if (method === "GET" || method === "HEAD") {
90
+ delete requestOptions.body;
91
+ }
92
+
93
+ const errors = [];
94
+ for (const url of candidates(path)) {
95
+ try {
96
+ const res = await fetch(url, requestOptions);
97
+ let payload = null;
98
+ try {
99
+ payload = await res.json();
100
+ } catch (err) {
101
+ payload = null;
102
+ }
103
+ if (!res.ok) {
104
+ const detail = payload?.detail || `${res.status}`;
105
+ throw new Error(`API ${path} failed on ${url}: ${detail}`);
106
+ }
107
+ return payload;
108
+ } catch (err) {
109
+ errors.push(err);
110
+ }
111
+ }
112
+
113
+ const firstApiError = errors.find(
114
+ (e) => e instanceof Error && e.message.startsWith(`API ${path} failed`)
115
+ );
116
+ if (firstApiError) {
117
+ throw firstApiError;
118
+ }
119
+ const lastError = errors.length ? errors[errors.length - 1] : new Error("Unknown request failure.");
120
+
121
+ throw new Error(
122
+ `API ${path} connection failed. Start backend on ${DEFAULT_LOCAL_API}. Last error: ${
123
+ lastError instanceof Error ? lastError.message : String(lastError)
124
+ }`
125
+ );
126
+ }
127
+
128
+ export function fmt(value, digits = 2) {
129
+ if (value == null || Number.isNaN(Number(value))) return "-";
130
+ return Number(value).toFixed(digits);
131
+ }