Spaces:

Otter21
/

Gov_Workflow_RL

Running

Siddharaj Shirke

fix: fallback model upload storage when /data is unavailable

ee551d0 10 days ago

105 kB

	"""
	main.py — Gov Workflow OpenEnv: FastAPI HTTP wrapper.

	Session model
	─────────────
	Every POST /reset creates a new session identified by a UUID.
	All subsequent calls (step, state, grade) carry that session_id in the
	request body. Sessions are kept in a thread-safe in-memory OrderedDict.
	When the store reaches max_sessions capacity the oldest session is evicted
	automatically (oldest-first FIFO eviction).

	IMPORTANT: the in-memory store is NOT shared across multiple OS processes.
	Run with workers=1 (the default from ServerSettings) to keep this correct.

	Endpoint map
	────────────
	GET /health server + session health
	POST /reset create session, returns session_id + obs
	POST /step advance one simulation tick
	POST /state (GET /state) full episode state, action_history optional
	POST /grade task-specific deterministic grader
	GET /sessions list active session IDs
	DELETE /sessions/{id} remove a session
	POST /api/auto_step policy selects action, then steps
	POST /api/benchmark run multiple baseline episodes
	GET /api/openenv_compliance OpenEnv interface compliance check
	GET /docs Swagger UI (FastAPI auto-generated)
	GET /redoc ReDoc UI (FastAPI auto-generated)
	"""
	from __future__ import annotations

	from collections import OrderedDict
	import json
	import math
	import os
	from pathlib import Path
	import shutil
	import subprocess
	from threading import Lock
	import time
	from typing import Any, Literal
	from uuid import uuid4

	from fastapi import APIRouter, Body, FastAPI, File, HTTPException, Query, UploadFile, status
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.routing import APIRoute
	from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
	from fastapi.staticfiles import StaticFiles
	from pydantic import BaseModel, Field

	from app.baselines import POLICIES, run_policy_episode
	from app.config import env_settings, server_settings
	from app.env import GovWorkflowEnv
	from app.graders import grade_episode
	from app.models import (
	ActionModel,
	EpisodeStateModel,
	GraderResult,
	ObservationModel,
	ServiceType,
	StepInfoModel,
	)
	from app.persistence import PersistenceStore
	from app.simulator import LiveSimulationSession, SimulationAgentMode, run_simulation
	from app.tasks import TASKS, get_task, list_benchmark_tasks, list_tasks
	from app.training_jobs import TrainingJobManager
	from app.sector_profiles import get_sector_profile
	from app.story_router import router as story_router
	from rl.action_mask import ActionMaskComputer
	from rl.feature_builder import ACTION_DECODE_TABLE, N_ACTIONS

	try:
	from sse_starlette.sse import EventSourceResponse
	except Exception:
	class EventSourceResponse(StreamingResponse): # type: ignore[misc]
	def __init__(self, content: Any, status_code: int = 200, headers: dict[str, str] \| None = None):
	merged_headers = {"Cache-Control": "no-cache", "Connection": "keep-alive"}
	if headers:
	merged_headers.update(headers)
	super().__init__(
	content=content,
	status_code=status_code,
	media_type="text/event-stream",
	headers=merged_headers,
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# SESSION STORE
	# ─────────────────────────────────────────────────────────────────────────────

	class SessionStore:
	"""
	Thread-safe in-memory session registry.

	Design decisions:
	- Uses threading.Lock — safe for Uvicorn's single-worker async+thread model.
	- Uses OrderedDict so eviction is always oldest-first in O(1) via popitem.
	- Never imports from FastAPI. HTTP concerns (404 conversion) stay in endpoints.
	- KeyError propagates upward and is converted to 404 there.
	"""

	def __init__(self, max_sessions: int \| None) -> None:
	self.store: OrderedDict[str, GovWorkflowEnv] = OrderedDict()
	self.lock = Lock()
	self.max = max_sessions

	def create(
	self,
	task_id: str,
	seed: int \| None = None,
	options: dict[str, Any] \| None = None,
	) -> tuple[str, ObservationModel, dict[str, Any]]:
	env = GovWorkflowEnv(task_id=task_id)
	obs, info = env.reset(seed=seed, options=options)
	session_id = str(uuid4())
	with self.lock:
	if self.max and len(self.store) >= self.max:
	self.store.popitem(last=False) # evict oldest
	self.store[session_id] = env
	return session_id, obs, info

	def get(self, session_id: str) -> GovWorkflowEnv:
	with self.lock:
	env = self.store.get(session_id)
	if env is None:
	raise KeyError(session_id)
	return env

	def delete(self, session_id: str) -> bool:
	with self.lock:
	return self.store.pop(session_id, None) is not None

	def active_count(self) -> int:
	with self.lock:
	return len(self.store)

	def list_ids(self) -> list[str]:
	with self.lock:
	return list(self.store.keys())


	class SimulationRunStore:
	def __init__(self, max_runs: int \| None = None) -> None:
	self.store: OrderedDict[str, LiveSimulationSession] = OrderedDict()
	self.lock = Lock()
	self.max = max_runs

	def create(self, run: LiveSimulationSession) -> str:
	run_id = str(uuid4())
	with self.lock:
	if self.max and len(self.store) >= self.max:
	_, evicted = self.store.popitem(last=False)
	try:
	evicted.close()
	except Exception:
	pass
	self.store[run_id] = run
	return run_id

	def get(self, run_id: str) -> LiveSimulationSession:
	with self.lock:
	run = self.store.get(run_id)
	if run is None:
	raise KeyError(run_id)
	return run

	def delete(self, run_id: str) -> bool:
	with self.lock:
	run = self.store.pop(run_id, None)
	if run is None:
	return False
	try:
	run.close()
	except Exception:
	pass
	return True

	def list_ids(self) -> list[str]:
	with self.lock:
	return list(self.store.keys())


	# ─────────────────────────────────────────────────────────────────────────────
	# GLOBALS
	# ─────────────────────────────────────────────────────────────────────────────

	REPO_ROOT = Path(__file__).resolve().parent.parent

	persistence = PersistenceStore(repo_root=REPO_ROOT)
	sessions = SessionStore(max_sessions=env_settings.max_sessions)
	model_cache: dict[tuple[str, str], Any] = {}
	model_cache_lock = Lock()
	training_jobs = TrainingJobManager(repo_root=REPO_ROOT, persistence=persistence)
	sim_runs = SimulationRunStore(max_runs=max(env_settings.max_sessions, 50))
	session_meta: dict[str, dict[str, Any]] = {}
	session_meta_lock = Lock()


	def _set_session_meta(session_id: str, **kwargs: Any) -> None:
	with session_meta_lock:
	meta = session_meta.setdefault(session_id, {})
	meta.update(kwargs)


	def _get_session_meta(session_id: str) -> dict[str, Any]:
	with session_meta_lock:
	return dict(session_meta.get(session_id, {}))


	def _append_session_trace(session_id: str, row: dict[str, Any]) -> None:
	with session_meta_lock:
	meta = session_meta.setdefault(session_id, {})
	trace = meta.setdefault("step_trace", [])
	if isinstance(trace, list):
	trace.append(row)
	else:
	meta["step_trace"] = [row]


	def _pop_session_meta(session_id: str) -> None:
	with session_meta_lock:
	session_meta.pop(session_id, None)


	# ─────────────────────────────────────────────────────────────────────────────
	# DEPENDENCY HELPERS
	# ─────────────────────────────────────────────────────────────────────────────

	def get_or_404(session_id: str) -> GovWorkflowEnv:
	"""Fetch a session env by ID or raise HTTP 404."""
	try:
	return sessions.get(session_id)
	except KeyError:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Session '{session_id}' not found. Call POST /reset to create a new session.",
	)


	def _get_session_or_404(session_id: str) -> GovWorkflowEnv:
	return get_or_404(session_id)


	def get_sim_or_404(run_id: str) -> LiveSimulationSession:
	try:
	return sim_runs.get(run_id)
	except KeyError:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Simulation run '{run_id}' not found. Call POST /api/simulation/live/start to create a live run.",
	)


	def resolve_policy_or_422(policy_name: str):
	policy = POLICIES.get(policy_name)
	if policy is None:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown agent/policy '{policy_name}'. Available: {sorted(POLICIES.keys())}",
	)
	return policy


	def resolve_model_path_or_422(model_path: str) -> Path:
	path = Path(model_path)
	if not path.suffix:
	path = path.with_suffix(".zip")
	if not path.is_absolute():
	path = (REPO_ROOT / path).resolve()
	if not path.exists():
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Model checkpoint not found: {path}",
	)
	return path


	def load_model_cached_or_503(model_path: Path, model_type: str):
	cache_key = (str(model_path), model_type)
	with model_cache_lock:
	cached = model_cache.get(cache_key)
	if cached is not None:
	return cached
	try:
	if model_type == "maskable":
	try:
	from sb3_contrib import MaskablePPO # type: ignore[import-not-found]
	except ModuleNotFoundError:
	from sb3contrib import MaskablePPO # type: ignore[import-not-found]
	model = MaskablePPO.load(str(model_path))
	else:
	try:
	from sb3_contrib import RecurrentPPO # type: ignore[import-not-found]
	except ModuleNotFoundError:
	from sb3contrib import RecurrentPPO # type: ignore[import-not-found]
	model = RecurrentPPO.load(str(model_path))
	except ModuleNotFoundError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail="RL runtime dependencies are not available. Install requirements-rl.txt.",
	) from exc
	except Exception as exc:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Failed to load {model_type} model from {model_path}: {exc}",
	) from exc
	with model_cache_lock:
	model_cache[cache_key] = model
	return model


	def decode_action_index(action_idx: int) -> str:
	try:
	from rl.feature_builder import ACTION_DECODE_TABLE
	except ModuleNotFoundError:
	return f"action={action_idx}"
	row = ACTION_DECODE_TABLE.get(action_idx)
	if row is None:
	return f"action={action_idx}"
	action_type, service, priority_mode, delta = row
	extras = []
	if service is not None:
	extras.append(f"service={service}")
	if priority_mode is not None:
	extras.append(f"mode={priority_mode}")
	if delta is not None:
	extras.append(f"delta={delta}")
	if extras:
	return f"{action_type}[{', '.join(extras)}]"
	return action_type


	def _validate_task_id_or_422(task_id: str) -> str:
	tasks = list_tasks()
	if task_id not in set(tasks):
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown task_id '{task_id}'. Available: {tasks}",
	)
	return task_id


	def _task_prob_mean(task_cfg: Any, field_name: str, default_getter: str) -> float:
	override = getattr(task_cfg, field_name, None) or {}
	if isinstance(override, dict) and override:
	values = [float(v) for v in override.values()]
	return float(sum(values) / max(len(values), 1))

	probs: list[float] = []
	for service in getattr(task_cfg, "enabled_services", []):
	try:
	profile = get_sector_profile(service)
	probs.append(float(getattr(profile, default_getter)))
	except Exception:
	continue
	if not probs:
	return 0.0
	return float(sum(probs) / len(probs))


	def _task_summary_dict(task_id: str) -> dict[str, Any]:
	cfg = get_task(task_id)
	services = [s.value if hasattr(s, "value") else str(s) for s in getattr(cfg, "enabled_services", [])]
	pool = getattr(cfg, "initial_officer_pool", None)
	officer_pool_total = int(getattr(pool, "total_officers", 0) or 0) if pool is not None else 0
	reserve_officers = int(getattr(pool, "idle_officers", 0) or 0) if pool is not None else 0
	return {
	"task_id": str(task_id),
	"seed": int(getattr(cfg, "seed", 0) or 0),
	"max_days": int(getattr(cfg, "max_days", 0) or 0),
	"services": services,
	"officer_pool_total": officer_pool_total,
	"reserve_officers": reserve_officers,
	"escalation_budget": int(getattr(cfg, "escalation_budget", 0) or 0),
	"missing_docs_probability": _task_prob_mean(cfg, "missing_docs_probability_override", "missing_docs_probability"),
	"field_verification_probability": _task_prob_mean(
	cfg,
	"field_verification_probability_override",
	"field_verification_probability",
	),
	"scenario_mode": str(getattr(getattr(cfg, "scenario_mode", "normal"), "value", getattr(cfg, "scenario_mode", "normal"))),
	"fairness_threshold": getattr(cfg, "fairness_threshold", None),
	}


	def _action_service_hint(action: ActionModel) -> str \| None:
	for attr in ("service", "service_target", "escalation_target"):
	value = getattr(action, attr, None)
	if value is None:
	continue
	return value.value if hasattr(value, "value") else str(value)
	if getattr(action, "capacity_assignment", None):
	keys = list((action.capacity_assignment or {}).keys())
	if keys:
	key = keys[0]
	return key.value if hasattr(key, "value") else str(key)
	if getattr(action, "reallocation_delta", None):
	for key, delta in (action.reallocation_delta or {}).items():
	if int(delta) < 0:
	return key.value if hasattr(key, "value") else str(key)
	return None


	def _result_value(result: Any, key: str, default: Any = None) -> Any:
	"""Read from dict-like or attribute-like result payloads."""
	if isinstance(result, dict):
	return result.get(key, default)
	return getattr(result, key, default)


	def _log_line_text(value: Any) -> str:
	"""Normalize live-simulation log payloads to plain text."""
	if isinstance(value, str):
	return value
	if isinstance(value, dict):
	raw = value.get("log")
	if isinstance(raw, str):
	return raw
	try:
	return json.dumps(value, separators=(",", ":"))
	except Exception:
	return str(value)
	if value is None:
	return ""
	return str(value)


	def _phase_model_dirs() -> list[Path]:
	"""
	Discover model directories from multiple roots.

	Priority:
	1) Explicit OPENENV_MODEL_SEARCH_DIRS (CSV of absolute/relative paths)
	2) Persistent storage root OPENENV_DATA_DIR (HF bucket mount recommended)
	3) Repo-local results/best_model
	"""
	configured_dirs = (os.getenv("OPENENV_MODEL_SEARCH_DIRS") or "").strip()
	configured: list[Path] = []
	if configured_dirs:
	for raw in configured_dirs.split(","):
	s = raw.strip()
	if not s:
	continue
	p = Path(s)
	if not p.is_absolute():
	p = (REPO_ROOT / p).resolve()
	configured.append(p)

	data_root_raw = (os.getenv("OPENENV_DATA_DIR") or "").strip()
	data_root = Path(data_root_raw) if data_root_raw else None
	if data_root is not None and not data_root.is_absolute():
	data_root = (REPO_ROOT / data_root).resolve()

	persistence_root = getattr(persistence, "data_dir", None)
	if isinstance(persistence_root, Path):
	persistence_root = persistence_root.resolve()

	repo_base = REPO_ROOT / "results" / "best_model"

	candidates = [
	*configured,
	repo_base / "phase1",
	repo_base / "phase2",
	]
	if data_root is not None:
	candidates.extend(
	[
	data_root / "results" / "best_model" / "phase1",
	data_root / "results" / "best_model" / "phase2",
	data_root / "best_model" / "phase1",
	data_root / "best_model" / "phase2",
	]
	)
	if persistence_root is not None:
	candidates.extend(
	[
	persistence_root / "results" / "best_model" / "phase1",
	persistence_root / "results" / "best_model" / "phase2",
	persistence_root / "best_model" / "phase1",
	persistence_root / "best_model" / "phase2",
	]
	)

	# Preserve order, remove duplicates.
	deduped: list[Path] = []
	seen: set[str] = set()
	for p in candidates:
	key = str(p.resolve()) if p.exists() else str(p)
	if key in seen:
	continue
	seen.add(key)
	deduped.append(p)
	return deduped


	def _discover_phase12_zip_models() -> list[Path]:
	discovered: list[Path] = []
	for model_dir in _phase_model_dirs():
	if not model_dir.exists():
	continue
	for file_path in sorted(model_dir.glob("*.zip")):
	if file_path.is_file():
	discovered.append(file_path.resolve())
	unique = sorted({p for p in discovered if p.exists()})
	return unique


	def _model_storage_base_dir() -> Path:
	candidate_roots: list[Path] = []

	configured_root = (os.getenv("OPENENV_DATA_DIR") or "").strip()
	if configured_root:
	p = Path(configured_root)
	if not p.is_absolute():
	p = (REPO_ROOT / p).resolve()
	candidate_roots.append(p)

	persistence_root = getattr(persistence, "data_dir", None)
	if isinstance(persistence_root, Path):
	candidate_roots.append(persistence_root.resolve())

	candidate_roots.extend(
	[
	(REPO_ROOT / "outputs" / "persist").resolve(),
	Path("/tmp/openenv_rl").resolve(),
	]
	)

	seen: set[str] = set()
	unique_roots: list[Path] = []
	for root in candidate_roots:
	key = str(root)
	if key in seen:
	continue
	seen.add(key)
	unique_roots.append(root)

	last_exc: Exception \| None = None
	for root in unique_roots:
	try:
	base_dir = root / "results" / "best_model"
	base_dir.mkdir(parents=True, exist_ok=True)
	return base_dir
	except OSError as exc:
	last_exc = exc
	continue
	raise RuntimeError(f"No writable model storage directory found. last_error={last_exc!r}")


	def _phase_from_model_path(path: Path) -> int:
	parent = path.parent.name.lower()
	if parent == "phase1":
	return 1
	if parent == "phase2":
	return 2
	name = path.name.lower()
	if "phase1" in name:
	return 1
	if "phase2" in name:
	return 2
	return 0


	# ─────────────────────────────────────────────────────────────────────────────
	# API REQUEST / RESPONSE SCHEMAS
	# ─────────────────────────────────────────────────────────────────────────────

	class HealthResponse(BaseModel):
	status: str
	version: str
	phase: str \| None = None
	detail: str \| None = None
	active_sessions: int
	available_tasks: list[str]


	class ResetRequest(BaseModel):
	task_id: str = Field(
	default=env_settings.default_task_id,
	description="Task to run. One of the three benchmark task IDs.",
	)
	seed: int \| None = Field(
	default=None,
	description=(
	"RNG seed. Omit to use the task's built-in deterministic seed. "
	"Pass an explicit integer to replay the same episode."
	),
	)
	options: dict[str, Any] \| None = Field(
	default=None,
	description=(
	"Optional overrides forwarded verbatim to env.reset(options=...). "
	"Supported key: 'task_id' to switch tasks inside an existing session."
	),
	)


	class ResetResponse(BaseModel):
	session_id: str
	task_id: str \| None = None
	seed: int \| None = None
	observation: ObservationModel
	info: dict[str, Any]


	class StepRequest(BaseModel):
	session_id: str = Field(description="Session ID returned by POST /reset.")
	action: ActionModel


	class StepResponse(BaseModel):
	session_id: str
	observation: ObservationModel
	reward: float
	done: bool
	terminated: bool
	truncated: bool
	info: StepInfoModel


	class StateRequest(BaseModel):
	session_id: str = Field(description="Session ID returned by POST /reset.")
	include_action_history: bool = Field(
	default=False,
	description=(
	"When False (default) the action_history list is stripped to keep payloads small. "
	"Set True to receive the full step-by-step action log."
	),
	)


	class StateResponse(BaseModel):
	session_id: str
	state: EpisodeStateModel


	class GradeRequest(BaseModel):
	session_id: str = Field(description="Session ID returned by POST /reset.")


	class GradeResponse(BaseModel):
	session_id: str
	task_id: str \| None = None
	score: float = Field(ge=0.0, le=1.0, description="Episode score in [0.0, 1.0].")
	grader_name: str
	metrics: dict[str, float]


	class SessionListResponse(BaseModel):
	active_sessions: int
	session_ids: list[str]


	class DeleteSessionResponse(BaseModel):
	deleted: str


	class TaskListResponse(BaseModel):
	tasks: list[str]


	class TaskSummary(BaseModel):
	task_id: str
	seed: int
	max_days: int
	services: list[str]
	officer_pool_total: int
	reserve_officers: int
	escalation_budget: int
	missing_docs_probability: float
	field_verification_probability: float
	scenario_mode: str
	fairness_threshold: float \| None = None


	class ActionMaskRequest(BaseModel):
	session_id: str


	class ActionMaskResponse(BaseModel):
	session_id: str
	action_mask: list[bool]
	valid_action_indices: list[int]
	valid_action_labels: list[str]
	total_valid: int
	total_actions: int


	class RLRunV2Request(BaseModel):
	task_id: str
	model_path: str
	seed: int = 42
	max_steps: int = Field(default=80, ge=1, le=2000)
	n_episodes: int = Field(default=1, ge=1, le=100)


	class RLRunV2Response(BaseModel):
	task_id: str
	model_path: str
	seed: int
	n_episodes: int
	mean_score: float
	mean_reward: float
	mean_completed: int
	mean_sla_breaches: int
	episodes: list[dict[str, Any]]


	class ModelInfo(BaseModel):
	model_path: str
	task_id: str
	phase: int
	size_mb: float
	exists: bool


	class SimulateRequest(BaseModel):
	task_id: str = "district_backlog_easy"
	agent_mode: str = "baseline_policy"
	max_steps: int = Field(default=40, ge=1, le=500)
	seed: int = 42
	policy_name: str \| None = "backlog_clearance"
	model_path: str \| None = None


	class AutoStepRequest(BaseModel):
	session_id: str = Field(description="Session ID returned by POST /reset.")
	agent_policy: str = Field(
	default="backlog_clearance",
	description="Policy name from app.baselines.POLICIES.",
	)


	class AutoStepResponse(BaseModel):
	session_id: str
	agent_policy: str
	action: ActionModel
	observation: ObservationModel
	reward: float
	done: bool
	terminated: bool
	truncated: bool
	info: StepInfoModel


	class BenchmarkRequest(BaseModel):
	task_id: str = Field(default=env_settings.default_task_id)
	agent_policies: list[str] = Field(
	default_factory=lambda: ["urgent_first", "oldest_first", "backlog_clearance"]
	)
	runs: int = Field(default=5, ge=1, le=30)
	max_steps: int = Field(default=500, ge=1, le=2000)
	seed_base: int \| None = Field(
	default=100,
	description="Base seed — each run uses seed_base + run_index.",
	)


	class BenchmarkAgentRun(BaseModel):
	run_index: int
	seed: int \| None
	score: float
	reward_sum: float
	completed: int
	backlog: int
	steps: int


	class BenchmarkAgentSummary(BaseModel):
	agent_policy: str
	average_score: float
	min_score: float
	max_score: float
	runs: list[BenchmarkAgentRun]


	class BenchmarkResponse(BaseModel):
	task_id: str
	requested_runs: int
	agent_results: list[BenchmarkAgentSummary]


	class WorkflowComponentStatus(BaseModel):
	component: str
	description: str
	available: bool
	command: str \| None = None
	notes: str \| None = None


	class WorkflowComponentsResponse(BaseModel):
	components: list[WorkflowComponentStatus]


	class OpenEnvComplianceItem(BaseModel):
	key: str
	label: str
	status: Literal["pass", "fail", "unknown"]
	detail: str


	class OpenEnvComplianceResponse(BaseModel):
	checked_at: float
	items: list[OpenEnvComplianceItem]
	openenv_validate_exit_code: int \| None = None
	openenv_validate_stdout_tail: str \| None = None
	openenv_validate_stderr_tail: str \| None = None


	class WorkflowRunRequest(BaseModel):
	workflow_id: Literal["baseline_openai", "inference", "phase2_eval"]
	timeout_seconds: int = Field(default=180, ge=10, le=1200)
	max_steps: int = Field(default=40, ge=1, le=500)
	episodes: int = Field(default=3, ge=1, le=20)
	model_path: str = Field(default="results/best_model/phase2_final.zip")
	model_type: Literal["maskable", "recurrent"] = Field(default="maskable")


	class WorkflowRunResponse(BaseModel):
	workflow_id: str
	command: list[str]
	exit_code: int
	duration_seconds: float
	stdout: str
	stderr: str
	timed_out: bool


	class RLModelInfo(BaseModel):
	label: str
	path: str
	exists: bool
	model_type: Literal["maskable", "recurrent"]


	class RLModelsResponse(BaseModel):
	models: list[RLModelInfo]


	class RLRunRequest(BaseModel):
	task_id: str = Field(default=env_settings.default_task_id)
	model_path: str = Field(default="results/best_model/phase2_final.zip")
	model_type: Literal["maskable", "recurrent"] = Field(default="maskable")
	max_steps: int = Field(default=80, ge=1, le=1000)
	seed: int \| None = Field(default=None)


	class RLRunStep(BaseModel):
	step: int
	action_index: int
	action_label: str
	reward: float
	backlog: int
	completed: int
	sla_breaches: int
	fairness_gap: float
	done: bool


	class RLRunResponse(BaseModel):
	model_path: str
	model_type: Literal["maskable", "recurrent"]
	task_id: str
	seed: int
	total_steps: int
	total_reward: float
	grader_score: float
	grader_name: str
	trace: list[RLRunStep]


	class RLEvaluateRequest(BaseModel):
	model_path: str = Field(default="results/best_model/phase2_final.zip")
	model_type: Literal["auto", "maskable", "recurrent"] = Field(default="auto")
	episodes: int = Field(default=3, ge=1, le=20)
	task_ids: list[str] = Field(default_factory=list)


	class RLEvaluateTaskResult(BaseModel):
	task_id: str
	grader_score: float
	total_reward: float
	total_steps: int
	total_completed: int
	total_sla_breaches: int
	fairness_gap: float


	class RLEvaluateResponse(BaseModel):
	model_path: str
	model_type: Literal["auto", "maskable", "recurrent"]
	episodes: int
	average_grader_score: float
	results: list[RLEvaluateTaskResult]


	class SimulationRequest(BaseModel):
	task_id: str = Field(default=env_settings.default_task_id)
	agent_mode: SimulationAgentMode = Field(default=SimulationAgentMode.BASELINE_POLICY)
	max_steps: int = Field(default=80, ge=1, le=500)
	seed: int \| None = Field(default=None)
	policy_name: str = Field(default="backlog_clearance")
	model_path: str \| None = Field(default=None)
	model_type: Literal["maskable", "recurrent"] = Field(default="maskable")


	class SimulationStep(BaseModel):
	step: int
	day: int
	action_type: str
	action_payload: dict[str, Any]
	reward: float
	done: bool
	backlog: int
	completed: int
	sla_breaches: int
	fairness_gap: float
	escalation_budget_remaining: int
	invalid_action: bool
	last_action_error: str \| None = None
	queue_rows: list[dict[str, Any]]
	action_index: int \| None = None
	decision_source: str \| None = None
	provider: str \| None = None
	model_used: str \| None = None
	llm_attempts: int \| None = None
	llm_error: str \| None = None
	llm_key_label: str \| None = None
	repair_note: str \| None = None
	switch_note: str \| None = None


	class SimulationResponse(BaseModel):
	task_id: str
	agent_mode: SimulationAgentMode
	seed: int
	total_reward: float
	score: float
	grader_name: str
	summary: dict[str, Any]
	trace: list[SimulationStep]


	class SimulationLiveStartRequest(SimulationRequest):
	pass


	class SimulationLiveStartResponse(BaseModel):
	run_id: str
	task_id: str
	agent_mode: SimulationAgentMode
	seed: int
	max_steps: int
	start_log: str
	route_plan: list[str] = Field(default_factory=list)


	class SimulationLiveStepRequest(BaseModel):
	run_id: str


	class SimulationLiveStepResponse(BaseModel):
	run_id: str
	done: bool
	step: SimulationStep \| None = None
	step_log: str \| None = None
	end_log: str \| None = None
	total_reward: float
	score: float \| None = None
	grader_name: str \| None = None
	summary: dict[str, Any] \| None = None


	class SimulationLiveStateResponse(BaseModel):
	run_id: str
	state: dict[str, Any]


	class TrainingJobStartRequest(BaseModel):
	phase: Literal[1, 2] = Field(default=2)
	timesteps: int = Field(default=120_000, ge=10_000, le=2_000_000)
	n_envs: int = Field(default=4, ge=1, le=16)
	seed: int \| None = Field(
	default=None,
	description="When omitted, a time-based seed is auto-generated.",
	)
	config_path: str \| None = Field(default=None)


	class TrainingJobStopResponse(BaseModel):
	stopped: bool
	job_id: str
	status: str


	class TrainingJobDeleteResponse(BaseModel):
	deleted: bool
	job_id: str


	class TrainingJobsListResponse(BaseModel):
	jobs: list[dict[str, Any]]


	class SimulationHistoryListResponse(BaseModel):
	runs: list[dict[str, Any]]


	class ComparisonHistoryCreateRequest(BaseModel):
	task_id: str
	baseline_policy: str
	model_path: str
	model_type: str
	include_llm: bool = True
	runs: int
	steps: int
	episodes: int
	seed_base: int
	result: dict[str, Any]


	class ComparisonHistoryCreateResponse(BaseModel):
	comparison_id: str


	class ComparisonHistoryListResponse(BaseModel):
	comparisons: list[dict[str, Any]]


	class HistoryClearResponse(BaseModel):
	cleared: bool
	deleted_rows: int
	scope: str


	class ComparisonHistoryRepairResponse(BaseModel):
	comparison_id: str
	repaired: bool
	detail: str


	# ─────────────────────────────────────────────────────────────────────────────
	# APPLICATION
	# ─────────────────────────────────────────────────────────────────────────────

	app = FastAPI(
	title="Gov Workflow OpenEnv",
	summary="Government-service workflow control — OpenEnv-compatible HTTP API",
	description=(
	"A real-world OpenEnv-style environment where an AI agent reduces avoidable "
	"administrative delay in government-service workflows through queue prioritisation, "
	"missing-document handling, officer allocation, escalation control, SLA routing, "
	"and fairness management.\n\n"
	"Quick start\n"
	"1. `POST /reset` → get `session_id`\n"
	"2. `POST /step` with `session_id` + `action` repeatedly\n"
	"3. `POST /grade` to get the deterministic episode score\n"
	"4. `DELETE /sessions/{session_id}` to clean up"
	),
	version="0.3.0",
	docs_url="/docs",
	redoc_url="/redoc",
	)

	app.include_router(story_router)
	app.include_router(story_router, prefix="/api", include_in_schema=False)
	app.include_router(story_router, prefix="/api/v1", include_in_schema=False)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=server_settings.cors_origins,
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ── Static UI (optional Vite build) ─────────────────────────────────────────
	REPO_ROOT = Path(__file__).resolve().parent.parent
	WEB_DIR = Path(__file__).resolve().parent / "web"
	VITE_WEB_DIRS = [
	WEB_DIR / "vite_dist", # Docker image copy target
	WEB_DIR / "vite-dist", # legacy/migrated target
	REPO_ROOT / "frontend" / "react" / "dist", # local dev build
	]

	UI_INDEX_FILE: Path \| None = None
	UI_ASSETS_DIR: Path \| None = None
	for _ui_dir in VITE_WEB_DIRS:
	if _ui_dir.joinpath("index.html").exists():
	UI_INDEX_FILE = _ui_dir / "index.html"
	UI_ASSETS_DIR = _ui_dir / "assets"
	break

	if UI_ASSETS_DIR is not None and UI_ASSETS_DIR.exists():
	app.mount("/ui/assets", StaticFiles(directory=str(UI_ASSETS_DIR)), name="ui-assets")


	@app.get("/", include_in_schema=False)
	def root_redirect() -> RedirectResponse:
	if UI_INDEX_FILE is None:
	return RedirectResponse(url="/docs", status_code=status.HTTP_307_TEMPORARY_REDIRECT)
	return RedirectResponse(url="/ui", status_code=status.HTTP_307_TEMPORARY_REDIRECT)


	@app.get("/ui", include_in_schema=False)
	def ui_index() -> FileResponse:
	if UI_INDEX_FILE is None:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail="UI bundle not found. Build frontend/react with Vite first.",
	)
	return FileResponse(
	UI_INDEX_FILE,
	headers={
	# Always revalidate HTML shell so users pick up the latest hashed bundle.
	"Cache-Control": "no-store, no-cache, must-revalidate",
	"Pragma": "no-cache",
	"Expires": "0",
	},
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# CORE OpenEnv ENDPOINTS
	# ─────────────────────────────────────────────────────────────────────────────

	@app.get("/health", response_model=HealthResponse, tags=["meta"], summary="Server and session health")
	def health() -> HealthResponse:
	"""Returns server status, version, active session count, and task list."""
	detail = None
	health_status = "ok"
	try:
	from app.env import GovWorkflowEnv as _EnvHealthCheck # noqa: F401
	except ImportError as exc:
	health_status = "degraded"
	detail = str(exc)
	return HealthResponse(
	status=health_status,
	version="2.0.0",
	phase="3_rl_training",
	detail=detail,
	active_sessions=sessions.active_count(),
	available_tasks=list_tasks(),
	)


	@app.post(
	"/reset",
	response_model=ResetResponse,
	status_code=status.HTTP_200_OK,
	tags=["env"],
	summary="Create a new session and return the initial observation",
	)
	def reset(body: ResetRequest \| None = Body(default=None)) -> ResetResponse:
	"""
	Creates a fresh GovWorkflowEnv episode, registers it in the session store,
	and returns a unique session_id with the initial observation.
	Use seed for reproducible episodes.
	"""
	req = body or ResetRequest()
	_validate_task_id_or_422(req.task_id)
	session_id, obs, info = sessions.create(
	task_id=req.task_id,
	seed=req.seed,
	options=req.options,
	)
	_set_session_meta(
	session_id,
	task_id=req.task_id,
	seed=req.seed,
	step_trace=[],
	)
	return ResetResponse(
	session_id=session_id,
	task_id=req.task_id,
	seed=req.seed,
	observation=obs,
	info=info,
	)


	@app.post(
	"/step",
	response_model=StepResponse,
	tags=["env"],
	summary="Advance the simulation by one tick",
	)
	def step(body: StepRequest) -> StepResponse:
	"""
	Applies one ActionModel to the session's environment and returns the next
	observation, reward, termination flags, and step info.
	Returns 409 Conflict if the episode has already ended.
	"""
	env = get_or_404(body.session_id)
	if env.terminated or env.truncated:
	raise HTTPException(
	status_code=status.HTTP_409_CONFLICT,
	detail="Episode has already ended (terminated or truncated). Call POST /reset to start a new episode.",
	)
	obs, reward, terminated, truncated, info = env.step(body.action)
	trace = _get_session_meta(body.session_id).get("step_trace", [])
	_append_session_trace(
	body.session_id,
	{
	"step": len(trace) + 1,
	"day": int(getattr(obs, "day", 0) or 0),
	"action_type": str(
	getattr(
	getattr(body.action, "action_type", ""),
	"value",
	getattr(body.action, "action_type", ""),
	)
	),
	"service": _action_service_hint(body.action),
	"reward": round(float(reward), 4),
	"total_backlog": int(getattr(obs, "total_backlog", 0) or 0),
	"total_completed": int(getattr(obs, "total_completed", 0) or 0),
	"total_sla_breaches": int(getattr(obs, "total_sla_breaches", 0) or 0),
	"last_action_valid": bool(getattr(obs, "last_action_valid", True)),
	"notes": str(getattr(info, "action_explanation", "")),
	},
	)
	return StepResponse(
	session_id=body.session_id,
	observation=obs,
	reward=reward,
	done=terminated or truncated,
	terminated=terminated,
	truncated=truncated,
	info=info,
	)


	@app.post(
	"/state",
	response_model=StateResponse,
	tags=["env"],
	summary="Return the full internal episode state",
	)
	def state_post(body: StateRequest) -> StateResponse:
	"""
	Returns the complete EpisodeStateModel for the given session.
	Set include_action_history=true to receive the full step-by-step log.
	Default is false to keep response payloads small during agent loops.
	"""
	env = get_or_404(body.session_id)
	episode_state = env.state()
	if not body.include_action_history:
	episode_state = episode_state.model_copy(update={"action_history": None})
	return StateResponse(session_id=body.session_id, state=episode_state)


	@app.get(
	"/state",
	response_model=StateResponse,
	tags=["env"],
	summary="Return the full internal episode state (GET variant)",
	)
	def state_get(
	session_id: str = Query(description="Session ID returned by POST /reset."),
	include_action_history: bool = Query(
	default=False,
	description="When False (default) the action_history list is stripped.",
	),
	) -> StateResponse:
	"""GET variant of /state — accepts session_id as a query parameter."""
	env = get_or_404(session_id)
	episode_state = env.state()
	if not include_action_history:
	episode_state = episode_state.model_copy(update={"action_history": None})
	return StateResponse(session_id=session_id, state=episode_state)


	@app.post(
	"/grade",
	response_model=GradeResponse,
	tags=["env"],
	summary="Run the deterministic task grader for the current episode",
	)
	def grade(body: GradeRequest) -> GradeResponse:
	"""
	Runs the task-specific deterministic grader against the current episode state
	and returns a score in [0.0, 1.0] plus per-metric breakdowns.
	Can be called at any point - not only at termination.

	GraderResult fields used:
	result.score -> episode score [0.0, 1.0]
	result.grader_name -> "easy" \| "medium" \| "hard"
	result.metrics -> dict of named metric floats (property on GraderResult)
	"""
	env = get_or_404(body.session_id)
	task_id = _get_session_meta(body.session_id).get(
	"task_id",
	getattr(env, "task_id", env_settings.default_task_id),
	)
	try:
	episode_state = env.get_episode_state()
	except AttributeError:
	episode_state = env.state()
	result: GraderResult = grade_episode(episode_state)
	return GradeResponse(
	session_id=body.session_id,
	task_id=str(task_id),
	score=result.score,
	grader_name=result.grader_name,
	metrics=result.metrics,
	)


	@app.get(
	"/sessions",
	response_model=SessionListResponse,
	tags=["meta"],
	summary="List all active session IDs",
	)
	def list_sessions() -> SessionListResponse:
	"""Returns the count and IDs of all currently active sessions."""
	return SessionListResponse(
	active_sessions=sessions.active_count(),
	session_ids=sessions.list_ids(),
	)


	@app.delete(
	"/sessions/{session_id}",
	response_model=DeleteSessionResponse,
	tags=["meta"],
	summary="Delete a session and free its memory",
	)
	def delete_session(session_id: str) -> DeleteSessionResponse:
	"""Removes the session from the store and releases its GovWorkflowEnv instance."""
	deleted = sessions.delete(session_id)
	if not deleted:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Session '{session_id}' not found.",
	)
	_pop_session_meta(session_id)
	return DeleteSessionResponse(deleted=session_id)


	# ─────────────────────────────────────────────────────────────────────────────
	# /api ROUTER — frontend + extended API
	# ─────────────────────────────────────────────────────────────────────────────

	@app.get("/tasks", response_model=list[TaskSummary], tags=["Tasks"], summary="List benchmark task configurations")
	def tasks_list() -> list[TaskSummary]:
	task_rows: list[TaskSummary] = []
	for task_id in list_benchmark_tasks():
	task_rows.append(TaskSummary(**_task_summary_dict(task_id)))
	return task_rows


	@app.get("/tasks/{task_id}", response_model=TaskSummary, tags=["Tasks"], summary="Get one benchmark task configuration")
	def task_get(task_id: str) -> TaskSummary:
	available = list_benchmark_tasks()
	if task_id not in set(available):
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Task '{task_id}' not found. Available: {available}",
	)
	return TaskSummary(**_task_summary_dict(task_id))


	@app.post("/action-masks", response_model=ActionMaskResponse, tags=["Environment"], summary="Get valid actions for current session state")
	def action_masks(body: ActionMaskRequest) -> ActionMaskResponse:
	env = _get_session_or_404(body.session_id)
	obs = env._build_observation()
	priority_mode = getattr(env, "priority_mode", "balanced")
	priority_mode_str = priority_mode.value if hasattr(priority_mode, "value") else str(priority_mode)
	computer = ActionMaskComputer()
	mask_array = computer.compute(obs, priority_mode_str)
	mask_list = [bool(v) for v in mask_array.tolist()]
	valid_action_indices = [i for i, v in enumerate(mask_list) if v]
	valid_action_labels: list[str] = []
	for idx in valid_action_indices:
	decode = ACTION_DECODE_TABLE.get(idx, ())
	action_type = decode[0] if decode else f"action_{idx}"
	service = ""
	if len(decode) > 1 and decode[1]:
	service = str(decode[1])
	elif len(decode) > 2 and decode[2]:
	service = str(decode[2])
	label = f"{action_type}({service})" if service else str(action_type)
	valid_action_labels.append(label)

	return ActionMaskResponse(
	session_id=body.session_id,
	action_mask=mask_list,
	valid_action_indices=valid_action_indices,
	valid_action_labels=valid_action_labels,
	total_valid=len(valid_action_indices),
	total_actions=int(N_ACTIONS),
	)


	@app.get("/rl/models", response_model=list[ModelInfo], tags=["RL"], summary="List discovered RL model checkpoints")
	def rl_models_v2() -> list[ModelInfo]:
	unique_paths = _discover_phase12_zip_models()
	if not unique_paths:
	return [ModelInfo(model_path="none", task_id="none", phase=0, size_mb=0.0, exists=False)]

	rows: list[ModelInfo] = []
	for path in unique_paths:
	phase = _phase_from_model_path(path)

	stem = path.stem.lower()
	if "medium" in stem:
	task_id = "mixed_urgency_medium"
	else:
	task_id = "district_backlog_easy"

	rows.append(
	ModelInfo(
	model_path=str(path.with_suffix("")),
	task_id=task_id,
	phase=phase,
	size_mb=round(float(path.stat().st_size) / (1024 * 1024), 3),
	exists=True,
	)
	)
	return rows


	@app.post("/rl/run", response_model=RLRunV2Response, tags=["RL"], summary="Run trained MaskablePPO model for N episodes")
	def rl_run_v2(body: RLRunV2Request) -> RLRunV2Response:
	_validate_task_id_or_422(body.task_id)

	raw_path = Path(body.model_path)
	zip_path = raw_path.with_suffix(".zip") if raw_path.suffix != ".zip" else raw_path
	if not zip_path.is_absolute():
	zip_path = (REPO_ROOT / zip_path).resolve()
	if not zip_path.exists():
	requested = str(zip_path.with_suffix(""))
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Model not found at '{requested}.zip'",
	)

	try:
	from sb3_contrib import MaskablePPO # type: ignore[import-not-found]
	from rl.gov_workflow_env import GovWorkflowGymEnv
	except ImportError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail=f"RL dependencies not available: {exc}",
	) from exc

	try:
	model = MaskablePPO.load(str(zip_path.with_suffix("")))
	except Exception as exc:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Failed to load model from '{zip_path}': {exc}",
	) from exc

	episode_results: list[dict[str, Any]] = []
	for ep in range(body.n_episodes):
	env = GovWorkflowGymEnv(task_id=body.task_id, seed=body.seed + ep, hard_action_mask=True)
	try:
	obs, _ = env.reset(seed=body.seed + ep)
	done = False
	total_reward = 0.0
	steps = 0
	while not done and steps < body.max_steps:
	masks = env.action_masks()
	action, _ = model.predict(obs, action_masks=masks, deterministic=True)
	obs, reward, terminated, truncated, _ = env.step(int(action))
	total_reward += float(reward)
	done = bool(terminated or truncated)
	steps += 1

	episode_state = env.core_env.state()
	grade_result = grade_episode(episode_state)
	episode_results.append(
	{
	"episode": ep,
	"seed": body.seed + ep,
	"score": float(grade_result.score),
	"total_reward": round(float(total_reward), 4),
	"total_completed": int(episode_state.total_completed),
	"total_sla_breaches": int(episode_state.total_sla_breaches),
	"total_backlog": int(episode_state.total_backlog),
	"steps": int(steps),
	"grader_metrics": grade_result.metrics,
	}
	)
	finally:
	env.close()

	mean_score = float(sum(x["score"] for x in episode_results) / max(len(episode_results), 1))
	mean_reward = float(sum(x["total_reward"] for x in episode_results) / max(len(episode_results), 1))
	mean_completed = int(sum(x["total_completed"] for x in episode_results) / max(len(episode_results), 1))
	mean_breaches = int(sum(x["total_sla_breaches"] for x in episode_results) / max(len(episode_results), 1))

	return RLRunV2Response(
	task_id=body.task_id,
	model_path=str(zip_path.with_suffix("")),
	seed=body.seed,
	n_episodes=body.n_episodes,
	mean_score=mean_score,
	mean_reward=mean_reward,
	mean_completed=mean_completed,
	mean_sla_breaches=mean_breaches,
	episodes=episode_results,
	)


	@app.post("/simulate", tags=["Simulation"], summary="Run a live simulation stream (SSE)")
	def simulate_stream(body: SimulateRequest) -> EventSourceResponse:
	_validate_task_id_or_422(body.task_id)

	mode_map = {
	"baseline_policy": SimulationAgentMode.BASELINE_POLICY,
	"llm_inference": SimulationAgentMode.LLM_INFERENCE,
	"trained_rl": SimulationAgentMode.TRAINED_RL,
	}
	enum_mode = mode_map.get(str(body.agent_mode))
	if enum_mode is None:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail="Invalid agent_mode",
	)

	try:
	run = LiveSimulationSession(
	task_id=body.task_id,
	agent_mode=enum_mode,
	max_steps=body.max_steps,
	seed=body.seed,
	policy_name=body.policy_name,
	model_path=body.model_path,
	)
	except Exception as exc:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=str(exc),
	) from exc

	run_id = sim_runs.create(run)

	async def event_generator():
	try:
	while True:
	row, _, done = run.step_once()
	yield json.dumps(row, default=str)
	if done:
	yield json.dumps({"done": True, "session_id": run_id})
	break
	finally:
	run.close()

	return EventSourceResponse(event_generator())


	@app.get("/simulate/{session_id}/snapshot", tags=["Simulation"], summary="Get simulation/session snapshot")
	def simulate_snapshot(session_id: str) -> dict[str, Any]:
	try:
	run = sim_runs.get(session_id)
	return run.snapshot()
	except KeyError:
	pass

	env = _get_session_or_404(session_id)
	obs = env._build_observation()
	meta = _get_session_meta(session_id)
	return {
	"session_id": session_id,
	"task_id": str(meta.get("task_id", getattr(env, "task_id", env_settings.default_task_id))),
	"seed": meta.get("seed"),
	"terminated": bool(getattr(env, "terminated", False)),
	"truncated": bool(getattr(env, "truncated", False)),
	"step_trace_len": len(meta.get("step_trace", [])),
	"observation": obs.model_dump(mode="json"),
	}


	@app.post("/simulate/{session_id}/cancel", tags=["Simulation"], summary="Cancel/close a simulation session")
	def simulate_cancel(session_id: str) -> dict[str, str]:
	if sim_runs.delete(session_id):
	return {"session_id": session_id, "status": "cancelled"}

	if sessions.delete(session_id):
	_pop_session_meta(session_id)
	return {"session_id": session_id, "status": "cancelled"}

	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Session '{session_id}' not found or already closed.",
	)


	@app.get("/simulate/{session_id}/trace", tags=["Simulation"], summary="Get paginated trace for a simulation/session")
	def simulate_trace(
	session_id: str,
	page: int = Query(default=1, ge=1),
	page_size: int = Query(default=20, ge=1, le=500),
	) -> dict[str, Any]:
	trace: list[dict[str, Any]] \| None = None
	meta = _get_session_meta(session_id)
	if isinstance(meta.get("step_trace"), list):
	trace = list(meta.get("step_trace", []))
	else:
	try:
	run = sim_runs.get(session_id)
	trace = list(run.trace)
	except KeyError:
	trace = None

	if trace is None:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Session '{session_id}' not found. Call POST /reset first.",
	)

	total = len(trace)
	start = (page - 1) * page_size
	end = start + page_size
	items = trace[start:end]
	total_pages = max(1, math.ceil(total / max(page_size, 1)))
	return {
	"session_id": session_id,
	"total_steps": total,
	"page": page,
	"page_size": page_size,
	"total_pages": total_pages,
	"steps": items,
	}


	@app.get("/actions/schema", tags=["Environment"], summary="Self-describing action schema")
	def actions_schema() -> dict[str, Any]:
	return {
	"total_action_types": 6,
	"valid_services": [svc.value for svc in ServiceType],
	"valid_priority_modes": [
	"urgent_first",
	"oldest_first",
	"balanced",
	"backlog_clearance",
	],
	"actions": [
	{
	"action_type": "set_priority_mode",
	"description": "Change how the queue is sorted for all services.",
	"required_fields": ["action_type", "priority_mode"],
	"optional_fields": [],
	"notes": "Does not advance time. Call advance_time after.",
	"example": {"action_type": "set_priority_mode", "priority_mode": "urgent_first"},
	},
	{
	"action_type": "assign_capacity",
	"description": "Deploy one reserve officer to a service queue.",
	"required_fields": ["action_type", "service", "officer_delta"],
	"optional_fields": [],
	"notes": "Blocked if reserve_officers = 0. officer_delta must be 1.",
	"example": {"action_type": "assign_capacity", "service": "passport", "officer_delta": 1},
	},
	{
	"action_type": "request_missing_documents",
	"description": "Unblock applications waiting for missing documents.",
	"required_fields": ["action_type", "service"],
	"optional_fields": [],
	"notes": "Blocked if blocked_missing_docs = 0 for that service.",
	"example": {"action_type": "request_missing_documents", "service": "driving_license"},
	},
	{
	"action_type": "escalate_service",
	"description": "Mark one urgent case as emergency priority.",
	"required_fields": ["action_type", "service"],
	"optional_fields": [],
	"notes": "Uses 1 escalation_budget_remaining. Blocked if budget=0.",
	"example": {"action_type": "escalate_service", "service": "income_certificate"},
	},
	{
	"action_type": "reallocate_officers",
	"description": "Move one officer from source service to target service.",
	"required_fields": ["action_type", "service", "target_service", "officer_delta"],
	"optional_fields": [],
	"notes": "Source must have >= 2 officers. officer_delta must be 1.",
	"example": {
	"action_type": "reallocate_officers",
	"service": "birth_certificate",
	"target_service": "passport",
	"officer_delta": 1,
	},
	},
	{
	"action_type": "advance_time",
	"description": "Simulate one working day. THE ONLY action that processes applications.",
	"required_fields": ["action_type"],
	"optional_fields": [],
	"notes": "Always valid. Call this every turn after admin actions.",
	"example": {"action_type": "advance_time"},
	},
	],
	}


	@app.get("/metrics", tags=["Health"], summary="Operational API metrics")
	def metrics() -> dict[str, Any]:
	try:
	tasks = list_benchmark_tasks()
	except Exception:
	tasks = []
	return {
	"active_sessions": sessions.active_count(),
	"tasks_available": tasks,
	"total_tasks": len(tasks),
	"uptime_status": "ok",
	"endpoints_total": 16,
	"version": "2.0.0",
	"phase": "3_rl_training",
	"session_ids_active": sessions.list_ids(),
	}


	api = APIRouter(prefix="/api", tags=["frontend"])


	@api.get("/health", response_model=HealthResponse, summary="Health — frontend alias")
	def api_health() -> HealthResponse:
	return health()


	@api.get("/tasks", response_model=TaskListResponse, summary="List available tasks")
	def api_tasks() -> TaskListResponse:
	return TaskListResponse(tasks=list_tasks())


	@api.get("/agents", response_model=list[str], summary="List baseline agent policies")
	def api_agents() -> list[str]:
	return sorted(POLICIES.keys())


	@api.post("/reset", response_model=ResetResponse, summary="Reset episode — frontend alias")
	def api_reset(body: ResetRequest \| None = Body(default=None)) -> ResetResponse:
	return reset(body)


	@api.post("/step", response_model=StepResponse, summary="Step episode — frontend alias")
	def api_step(body: StepRequest) -> StepResponse:
	return step(body)


	@api.post("/auto_step", response_model=AutoStepResponse, summary="Compute policy action and step once")
	def api_auto_step(body: AutoStepRequest) -> AutoStepResponse:
	env = get_or_404(body.session_id)
	if env.terminated or env.truncated:
	raise HTTPException(
	status_code=status.HTTP_409_CONFLICT,
	detail="Episode has already ended. Call /api/reset first.",
	)
	policy = resolve_policy_or_422(body.agent_policy)
	obs = env._build_observation()
	action = policy(obs)
	next_obs, reward, terminated, truncated, info = env.step(action)
	return AutoStepResponse(
	session_id=body.session_id,
	agent_policy=body.agent_policy,
	action=action,
	observation=next_obs,
	reward=reward,
	done=terminated or truncated,
	terminated=terminated,
	truncated=truncated,
	info=info,
	)


	@api.post("/state", response_model=StateResponse, summary="State — frontend alias")
	def api_state(body: StateRequest) -> StateResponse:
	return state_post(body)


	@api.post("/action-masks", response_model=ActionMaskResponse, summary="Action masks - frontend alias")
	def api_action_masks(body: ActionMaskRequest) -> ActionMaskResponse:
	return action_masks(body)


	@api.get("/actions/schema", summary="Action schema - frontend alias")
	def api_actions_schema() -> dict[str, Any]:
	return actions_schema()


	@api.post("/grade", response_model=GradeResponse, summary="Grade — frontend alias")
	def api_grade(body: GradeRequest) -> GradeResponse:
	return grade(body)


	@api.get("/sessions", response_model=SessionListResponse, summary="List sessions — frontend alias")
	def api_sessions() -> SessionListResponse:
	return list_sessions()


	@api.delete("/sessions/{session_id}", response_model=DeleteSessionResponse, summary="Delete session — frontend alias")
	def api_delete_session(session_id: str) -> DeleteSessionResponse:
	return delete_session(session_id)


	@api.post("/benchmark", response_model=BenchmarkResponse, summary="Run multiple baseline episodes")
	def api_benchmark(body: BenchmarkRequest) -> BenchmarkResponse:
	valid_tasks = set(list_tasks())
	if body.task_id not in valid_tasks:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown task_id '{body.task_id}'.",
	)
	if not body.agent_policies:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail="agent_policies must contain at least one policy.",
	)
	agent_results = []
	for policy_name in body.agent_policies:
	resolve_policy_or_422(policy_name)
	run_rows = []
	for run_idx in range(body.runs):
	seed = None if body.seed_base is None else body.seed_base + run_idx
	result = run_policy_episode(
	task_id=body.task_id,
	policy_name=policy_name,
	seed=seed,
	max_steps=body.max_steps,
	)
	run_rows.append(BenchmarkAgentRun(
	run_index=run_idx + 1,
	seed=seed,
	score=float(_result_value(result, "score", 0.0)),
	reward_sum=float(_result_value(result, "reward_sum", 0.0)),
	completed=int(_result_value(result, "completed", 0)),
	backlog=int(_result_value(result, "backlog", 0)),
	steps=int(_result_value(result, "steps", 0)),
	))
	scores = [r.score for r in run_rows]
	agent_results.append(BenchmarkAgentSummary(
	agent_policy=policy_name,
	average_score=float(sum(scores) / len(scores)),
	min_score=float(min(scores)),
	max_score=float(max(scores)),
	runs=run_rows,
	))
	return BenchmarkResponse(
	task_id=body.task_id,
	requested_runs=body.runs,
	agent_results=agent_results,
	)


	@api.get("/workflows/components", response_model=WorkflowComponentsResponse, summary="Describe visible workflow components")
	def api_workflow_components() -> WorkflowComponentsResponse:
	repo_root = REPO_ROOT
	baseline_f = repo_root / "baseline_openai.py"
	inference_f = repo_root / "inference.py"
	phase2_model = next((p for p in _discover_phase12_zip_models() if _phase_from_model_path(p) == 2), None)
	components = [
	WorkflowComponentStatus(
	component="baseline_openai.py",
	description="CLI baseline runner using OpenAI-compatible/NVIDIA endpoints.",
	available=baseline_f.exists(),
	command=r".\.venv\3.11\Scripts\python.exe baseline_openai.py --task district_backlog_easy",
	notes="Uses API keys from environment variables.",
	),
	WorkflowComponentStatus(
	component="inference.py",
	description="Submission-style inference runner with strict START/STEP/END logging.",
	available=inference_f.exists(),
	command=r".\.venv\3.11\Scripts\python.exe inference.py",
	notes="Reads HF/OpenAI-compatible credentials from environment variables.",
	),
	WorkflowComponentStatus(
	component="phase2_final.zip",
	description="Trained Phase 2 PPO checkpoint used for local RL evaluation/execution.",
	available=phase2_model is not None,
	command=(
	f".\\.venv\\3.11\\Scripts\\python.exe -m rl.evaluate --model {phase2_model} --episodes 3 --model-type maskable"
	if phase2_model is not None
	else r".\.venv\3.11\Scripts\python.exe -m rl.evaluate --model results/best_model/phase2_final.zip --episodes 3 --model-type maskable"
	),
	),
	WorkflowComponentStatus(
	component="openenv-api",
	description="Standard environment API exposed through reset/step/state/grade.",
	available=True,
	command="POST /reset, POST /step, GET+POST /state, POST /grade",
	),
	]
	return WorkflowComponentsResponse(components=components)



	@api.post("/workflows/run", response_model=WorkflowRunResponse, summary="Execute a workflow component as a subprocess")
	def api_workflow_run(body: WorkflowRunRequest) -> WorkflowRunResponse:
	repo_root = REPO_ROOT
	python_bin = shutil.which("python") or "python"

	cmd = []
	if body.workflow_id == "baseline_openai":
	cmd = [python_bin, "baseline_openai.py", "--task", "district_backlog_easy"]
	elif body.workflow_id == "inference":
	cmd = [python_bin, "inference.py", "--max-steps", str(body.max_steps)]
	elif body.workflow_id == "phase2_eval":
	cmd = [python_bin, "-m", "rl.evaluate", "--model", body.model_path, "--episodes", str(body.episodes), "--model-type", body.model_type]

	start_t = time.time()
	try:
	proc = subprocess.run(
	cmd,
	cwd=str(repo_root),
	capture_output=True,
	text=True,
	timeout=body.timeout_seconds,
	check=False,
	)
	duration = time.time() - start_t
	return WorkflowRunResponse(
	workflow_id=body.workflow_id,
	command=cmd,
	exit_code=proc.returncode,
	duration_seconds=round(duration, 3),
	stdout=proc.stdout or "",
	stderr=proc.stderr or "",
	timed_out=False,
	)
	except subprocess.TimeoutExpired as exc:
	duration = time.time() - start_t
	return WorkflowRunResponse(
	workflow_id=body.workflow_id,
	command=cmd,
	exit_code=-1,
	duration_seconds=round(duration, 3),
	stdout=exc.stdout or "",
	stderr=exc.stderr or "",
	timed_out=True,
	)


	@api.get("/openenv_compliance", response_model=OpenEnvComplianceResponse, summary="Check OpenEnv interface compliance")
	def api_openenv_compliance(
	run_validate: bool = Query(default=False)
	) -> OpenEnvComplianceResponse:
	repo_root = REPO_ROOT
	openenv_yaml = repo_root / "openenv.yaml"
	route_paths = {getattr(r, "path", "") for r in app.routes}

	def has_path(path: str) -> bool:
	return path in route_paths

	items = [
	OpenEnvComplianceItem(
	key="typed_action_model",
	label="Typed Action model (Pydantic)",
	status="pass" if issubclass(ActionModel, BaseModel) else "fail",
	detail=f"ActionModel type={ActionModel.__name__}",
	),
	OpenEnvComplianceItem(
	key="typed_observation_model",
	label="Typed Observation model (Pydantic)",
	status="pass" if issubclass(ObservationModel, BaseModel) else "fail",
	detail=f"ObservationModel type={ObservationModel.__name__}",
	),
	OpenEnvComplianceItem(
	key="typed_step_info_model",
	label="Typed step info model (Pydantic)",
	status="pass" if issubclass(StepInfoModel, BaseModel) else "fail",
	detail=f"StepInfoModel type={StepInfoModel.__name__}",
	),
	OpenEnvComplianceItem(
	key="api_step_reset_state",
	label="step/reset/state API exposed",
	status="pass" if (has_path("/reset") and has_path("/step") and has_path("/state")) else "fail",
	detail="Expected endpoints: POST /reset, POST /step, GET+POST /state",
	),
	OpenEnvComplianceItem(
	key="openenv_yaml",
	label="openenv.yaml metadata file",
	status="pass" if openenv_yaml.exists() else "fail",
	detail=str(openenv_yaml),
	),
	]

	validate_rc = validate_out = validate_err = None
	if run_validate:
	openenv_bin = shutil.which("openenv")
	if openenv_bin is None:
	items.append(OpenEnvComplianceItem(
	key="openenv_validate",
	label="openenv validate execution",
	status="unknown",
	detail="openenv CLI not found in runtime PATH.",
	))
	else:
	proc = subprocess.run(
	[openenv_bin, "validate"],
	cwd=str(repo_root),
	capture_output=True,
	text=True,
	timeout=120,
	check=False,
	)
	validate_rc = int(proc.returncode)
	validate_out = (proc.stdout or "")[-4000:]
	validate_err = (proc.stderr or "")[-2000:]
	items.append(OpenEnvComplianceItem(
	key="openenv_validate",
	label="openenv validate execution",
	status="pass" if proc.returncode == 0 else "fail",
	detail=f"Exit code: {proc.returncode}",
	))
	else:
	items.append(OpenEnvComplianceItem(
	key="openenv_validate",
	label="openenv validate execution",
	status="unknown",
	detail="Not executed in this check. Pass run_validate=true to execute.",
	))

	return OpenEnvComplianceResponse(
	checked_at=time.time(),
	items=items,
	openenv_validate_exit_code=validate_rc,
	openenv_validate_stdout_tail=validate_out,
	openenv_validate_stderr_tail=validate_err,
	)


	@api.get("/rl_models", response_model=RLModelsResponse, summary="List available trained RL model checkpoints")
	def api_rl_models() -> RLModelsResponse:
	models: list[RLModelInfo] = []
	for path in _discover_phase12_zip_models():
	phase = _phase_from_model_path(path)
	model_type: Literal["maskable", "recurrent"] = (
	"recurrent" if "recurrent" in path.name.lower() else "maskable"
	)
	label = f"Phase {phase} - {path.stem.replace('_', ' ')}"
	models.append(
	RLModelInfo(
	label=label,
	path=str(path),
	exists=True,
	model_type=model_type,
	)
	)
	return RLModelsResponse(models=models)


	@api.post("/rl_models/upload", summary="Upload RL checkpoint zip to persistent storage")
	async def api_rl_model_upload(
	phase: int = Query(..., ge=1, le=2, description="Model phase bucket (1 or 2)"),
	file: UploadFile = File(..., description="Checkpoint zip file"),
	) -> dict[str, Any]:
	name = (file.filename or "").strip()
	if not name:
	raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail="Missing filename.")
	if not name.lower().endswith(".zip"):
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail="Only .zip checkpoint files are accepted.",
	)

	safe_name = Path(name).name
	try:
	base_dir = _model_storage_base_dir()
	except RuntimeError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail=str(exc),
	) from exc
	target_dir = base_dir / f"phase{phase}"
	try:
	target_dir.mkdir(parents=True, exist_ok=True)
	except OSError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail=f"Failed to initialize upload directory: {exc}",
	) from exc
	target_path = target_dir / safe_name

	total = 0
	with target_path.open("wb") as out:
	while True:
	chunk = await file.read(1024 * 1024)
	if not chunk:
	break
	out.write(chunk)
	total += len(chunk)
	await file.close()

	return {
	"saved": True,
	"phase": phase,
	"filename": safe_name,
	"size_bytes": total,
	"path": str(target_path),
	}


	@api.get(
	"/rl/models",
	response_model=list[ModelInfo],
	summary="List discovered RL model checkpoints (V2 slash alias)",
	)
	def api_rl_models_v2() -> list[ModelInfo]:
	"""
	Slash-path alias for frontend clients that call `/api/rl/models`.
	Returns the same V2 payload shape as root `/rl/models`.
	"""
	return rl_models_v2()

	@api.post("/rl_run", response_model=RLRunResponse, summary="Run one episode with a trained RL checkpoint")
	def api_rl_run(body: RLRunRequest) -> RLRunResponse:
	if body.task_id not in set(list_tasks()):
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown task_id '{body.task_id}'.",
	)
	model_path = resolve_model_path_or_422(body.model_path)
	model = load_model_cached_or_503(model_path, body.model_type)
	try:
	import numpy as np
	from rl.gov_workflow_env import GovWorkflowGymEnv
	except ModuleNotFoundError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail="RL runtime dependencies are not available. Install requirements-rl.txt.",
	) from exc

	seed = body.seed if body.seed is not None else int(TASKS[body.task_id].seed)
	env = GovWorkflowGymEnv(task_id=body.task_id, seed=seed, hard_action_mask=True)
	obs, _ = env.reset(seed=seed)
	trace: list[RLRunStep] = []
	total_reward = 0.0
	done = False
	lstm_state: Any = None
	episode_start = np.array([True], dtype=bool)

	for idx in range(1, body.max_steps + 1):
	masks = env.action_masks()
	if body.model_type == "recurrent":
	action, lstm_state = model.predict(
	obs, state=lstm_state, episode_start=episode_start, deterministic=True
	)
	else:
	try:
	from sb3_contrib.common.maskable.utils import get_action_masks # type: ignore[import-not-found]
	except ModuleNotFoundError:
	from sb3contrib.common.maskable.utils import get_action_masks # type: ignore[import-not-found]
	action, _ = model.predict(obs, action_masks=get_action_masks(env), deterministic=True)

	action_idx = int(action.item()) if hasattr(action, "item") else action
	if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
	valid = np.flatnonzero(masks)
	action_idx = int(valid[0]) if valid.size > 0 else 18

	obs, reward, terminated, truncated, info = env.step(action_idx)
	done = bool(terminated or truncated)
	total_reward += float(reward)
	core_obs = env.core_env.build_observation()
	trace.append(RLRunStep(
	step=idx,
	action_index=action_idx,
	action_label=decode_action_index(action_idx),
	reward=float(reward),
	backlog=int(core_obs.total_backlog),
	completed=int(core_obs.total_completed),
	sla_breaches=int(core_obs.total_sla_breaches),
	fairness_gap=float(core_obs.fairness_gap),
	done=done,
	))
	if body.model_type == "recurrent":
	episode_start = np.array([done], dtype=bool)
	if done:
	break

	final_state = env.core_env.state()
	grade_result = grade_episode(final_state)
	return RLRunResponse(
	model_path=str(model_path),
	model_type=body.model_type,
	task_id=body.task_id,
	seed=seed,
	total_steps=int(final_state.total_steps),
	total_reward=float(total_reward),
	grader_score=float(grade_result.score),
	grader_name=grade_result.grader_name,
	trace=trace,
	)


	@api.post("/rl_evaluate", response_model=RLEvaluateResponse, summary="Evaluate trained model across tasks")
	def api_rl_evaluate(body: RLEvaluateRequest) -> RLEvaluateResponse:
	model_path = resolve_model_path_or_422(body.model_path)
	task_ids = body.task_ids or list_tasks()
	valid_tasks = set(list_tasks())
	unknown = [t for t in task_ids if t not in valid_tasks]
	if unknown:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown task_id values: {unknown}",
	)
	try:
	from rl.evaluate import evaluate_model
	except ModuleNotFoundError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail="RL evaluation dependencies are not available. Install requirements-rl.txt.",
	) from exc
	try:
	eval_rows = evaluate_model(
	model_path=str(model_path),
	task_ids=task_ids,
	n_episodes=body.episodes,
	verbose=False,
	model_type=body.model_type,
	)
	except ValueError as exc:
	raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail=str(exc)) from exc

	results = [
	RLEvaluateTaskResult(
	task_id=row.task_id,
	grader_score=float(row.grader_score),
	total_reward=float(row.total_reward),
	total_steps=int(row.total_steps),
	total_completed=int(row.total_completed),
	total_sla_breaches=int(row.total_sla_breaches),
	fairness_gap=float(row.fairness_gap),
	)
	for row in eval_rows
	]
	avg_score = float(sum(x.grader_score for x in results) / max(len(results), 1))
	return RLEvaluateResponse(
	model_path=str(model_path),
	model_type=body.model_type,
	episodes=body.episodes,
	average_grader_score=avg_score,
	results=results,
	)


	@api.post("/simulation/run", response_model=SimulationResponse, summary="Run a workflow simulation")
	def api_simulation_run(body: SimulationRequest) -> SimulationResponse:
	if body.task_id not in set(list_tasks()):
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown task_id '{body.task_id}'.",
	)
	if body.agent_mode == SimulationAgentMode.BASELINE_POLICY and body.policy_name not in POLICIES:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown policy_name '{body.policy_name}'. Available: {sorted(POLICIES.keys())}",
	)
	try:
	run = run_simulation(
	task_id=body.task_id,
	agent_mode=body.agent_mode,
	max_steps=body.max_steps,
	seed=body.seed,
	policy_name=body.policy_name,
	model_path=body.model_path,
	model_type=body.model_type,
	)
	except ValueError as exc:
	raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail=str(exc)) from exc
	except ModuleNotFoundError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail="RL runtime dependencies are unavailable. Install requirements-rl.txt.",
	) from exc

	run_id = str(uuid4())
	if persistence.enabled:
	persistence.upsert_simulation_run(
	run_id=run_id,
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	status="completed",
	payload={
	"task_id": run.task_id,
	"agent_mode": run.agent_mode,
	"seed": run.seed,
	"total_reward": run.total_reward,
	"score": run.score,
	"grader_name": run.grader_name,
	"summary": run.summary,
	"trace": run.trace,
	},
	)
	return SimulationResponse(
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	seed=run.seed,
	total_reward=run.total_reward,
	score=run.score,
	grader_name=run.grader_name,
	summary=run.summary,
	trace=[SimulationStep(**row) for row in run.trace],
	)


	@api.post("/simulation/live/start", response_model=SimulationLiveStartResponse, summary="Start a live step-by-step simulation")
	def api_simulation_live_start(body: SimulationLiveStartRequest) -> SimulationLiveStartResponse:
	if body.task_id not in set(list_tasks()):
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown task_id '{body.task_id}'.",
	)
	if body.agent_mode == SimulationAgentMode.BASELINE_POLICY and body.policy_name not in POLICIES:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
	detail=f"Unknown policy_name '{body.policy_name}'. Available: {sorted(POLICIES.keys())}",
	)
	try:
	run = LiveSimulationSession(
	task_id=body.task_id,
	agent_mode=body.agent_mode,
	max_steps=body.max_steps,
	seed=body.seed,
	policy_name=body.policy_name,
	model_path=body.model_path,
	model_type=body.model_type,
	)
	except (ValueError, ModuleNotFoundError) as exc:
	raise HTTPException(
	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT
	if isinstance(exc, ValueError) else status.HTTP_503_SERVICE_UNAVAILABLE,
	detail=str(exc),
	) from exc

	run_id = sim_runs.create(run)
	if persistence.enabled:
	persistence.upsert_simulation_run(
	run_id=run_id,
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	status="running",
	payload={
	"task_id": run.task_id,
	"agent_mode": run.agent_mode,
	"seed": run.seed,
	"max_steps": run.max_steps,
	"summary": None,
	"trace_len": 0,
	"route_plan": list(run.llm_route),
	},
	)
	return SimulationLiveStartResponse(
	run_id=run_id,
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	seed=run.seed,
	max_steps=run.max_steps,
	start_log=_log_line_text(run.start_line()),
	route_plan=list(run.llm_route),
	)


	@api.post("/simulation/live/step", response_model=SimulationLiveStepResponse, summary="Execute one step for a live simulation")
	def api_simulation_live_step(body: SimulationLiveStepRequest) -> SimulationLiveStepResponse:
	run = get_sim_or_404(body.run_id)
	if run.done:
	if persistence.enabled:
	persistence.upsert_simulation_run(
	run_id=body.run_id,
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	status="completed",
	payload={
	"task_id": run.task_id,
	"agent_mode": run.agent_mode,
	"seed": run.seed,
	"max_steps": run.max_steps,
	"total_reward": float(run.total_reward),
	"score": run.score,
	"grader_name": run.grader_name,
	"summary": run.summary,
	"trace": list(run.trace),
	},
	)
	return SimulationLiveStepResponse(
	run_id=body.run_id,
	done=True,
	total_reward=float(run.total_reward),
	score=run.score,
	grader_name=run.grader_name,
	summary=run.summary,
	end_log=_log_line_text(run.end_line()),
	)
	try:
	row, step_log, done = run.step_once()
	except Exception as exc:
	raise HTTPException(
	status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
	detail=f"Simulation step failed: {exc}",
	) from exc

	if persistence.enabled:
	persistence.upsert_simulation_run(
	run_id=body.run_id,
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	status="completed" if done else "running",
	payload={
	"task_id": run.task_id,
	"agent_mode": run.agent_mode,
	"seed": run.seed,
	"max_steps": run.max_steps,
	"total_reward": float(run.total_reward),
	"score": run.score,
	"grader_name": run.grader_name,
	"summary": run.summary,
	"trace": list(run.trace) if done else [],
	"trace_len": len(run.trace),
	},
	)
	return SimulationLiveStepResponse(
	run_id=body.run_id,
	done=done,
	step=SimulationStep(**row),
	step_log=_log_line_text(step_log) if step_log is not None else None,
	end_log=_log_line_text(run.end_line()) if done else None,
	total_reward=float(run.total_reward),
	score=run.score,
	grader_name=run.grader_name,
	summary=run.summary,
	)


	@api.get("/simulation/live/{run_id}", response_model=SimulationLiveStateResponse, summary="Get live simulation state")
	def api_simulation_live_state(run_id: str) -> SimulationLiveStateResponse:
	run = get_sim_or_404(run_id)
	return SimulationLiveStateResponse(run_id=run_id, state=run.snapshot())


	@api.post("/simulation/live/{run_id}/stop", response_model=dict, summary="Stop and remove a live simulation run")
	def api_simulation_live_stop(run_id: str) -> dict[str, Any]:
	run: LiveSimulationSession \| None = None
	try:
	run = sim_runs.get(run_id)
	except Exception:
	run = None
	deleted = sim_runs.delete(run_id)
	if not deleted:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail=f"Simulation run '{run_id}' not found.",
	)
	if persistence.enabled and run is not None:
	persistence.upsert_simulation_run(
	run_id=run_id,
	task_id=run.task_id,
	agent_mode=run.agent_mode,
	status="stopped",
	payload={
	"task_id": run.task_id,
	"agent_mode": run.agent_mode,
	"seed": run.seed,
	"max_steps": run.max_steps,
	"total_reward": float(run.total_reward),
	"score": run.score,
	"grader_name": run.grader_name,
	"summary": run.summary,
	"trace_len": len(run.trace),
	},
	)
	return {"run_id": run_id, "stopped": True}


	@api.get("/training_jobs", response_model=TrainingJobsListResponse, summary="List all background RL training jobs")
	def api_training_jobs() -> TrainingJobsListResponse:
	return TrainingJobsListResponse(jobs=training_jobs.list_jobs())


	@api.get("/training_jobs/list", response_model=TrainingJobsListResponse, summary="List training jobs — stable alias")
	def api_training_jobs_list() -> TrainingJobsListResponse:
	return api_training_jobs()


	@api.get("/training_jobs/{job_id}", response_model=dict, summary="Get one background RL training job")
	def api_training_job(job_id: str) -> dict[str, Any]:
	job = training_jobs.get_job(job_id)
	if job is None:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Training job '{job_id}' not found.")
	return job


	@api.post("/training_jobs", response_model=dict, summary="Start RL training in a background process")
	def api_training_start(body: TrainingJobStartRequest) -> dict[str, Any]:
	try:
	import stable_baselines3 # noqa: F401
	try:
	import sb3_contrib # noqa: F401
	except ModuleNotFoundError:
	import sb3contrib # noqa: F401
	import gymnasium # noqa: F401
	except ModuleNotFoundError as exc:
	raise HTTPException(
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
	detail="RL training dependencies are unavailable. Install requirements-rl.txt.",
	) from exc
	cfg = (
	body.config_path
	or ("rl/configs/curriculum.yaml" if body.phase == 2 else "rl/configs/ppo_easy.yaml")
	)
	return training_jobs.start_job(
	phase=body.phase,
	timesteps=body.timesteps,
	n_envs=body.n_envs,
	seed=body.seed,
	config_path=cfg,
	)


	@api.post("/training_jobs/{job_id}/stop", response_model=TrainingJobStopResponse, summary="Stop a background training job")
	def api_training_stop(job_id: str) -> TrainingJobStopResponse:
	job = training_jobs.stop_job(job_id)
	if job is None:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Training job '{job_id}' not found.")
	return TrainingJobStopResponse(stopped=True, job_id=job_id, status=str(job.get("status", "unknown")))


	@api.delete("/training_jobs/{job_id}", response_model=TrainingJobDeleteResponse, summary="Delete one training job from history")
	def api_training_job_delete(job_id: str, clear_artifacts: bool = Query(default=False)) -> TrainingJobDeleteResponse:
	deleted = training_jobs.delete_job(job_id, clear_artifacts=clear_artifacts)
	if not deleted:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Training job '{job_id}' not found.")
	return TrainingJobDeleteResponse(deleted=True, job_id=job_id)


	@api.delete("/training_jobs", response_model=HistoryClearResponse, summary="Clear persisted training job history")
	def api_training_jobs_clear(clear_artifacts: bool = Query(default=False)) -> HistoryClearResponse:
	deleted = training_jobs.clear_jobs(clear_artifacts=clear_artifacts)
	return HistoryClearResponse(cleared=True, deleted_rows=int(deleted), scope="training_jobs")


	@api.get("/history/simulations", response_model=SimulationHistoryListResponse, summary="List persisted simulation runs")
	def api_history_simulations(limit: int = Query(default=20, ge=1, le=500)) -> SimulationHistoryListResponse:
	if not persistence.enabled:
	return SimulationHistoryListResponse(runs=[])
	return SimulationHistoryListResponse(runs=persistence.list_simulation_runs(limit=limit))


	@api.delete("/history/simulations", response_model=HistoryClearResponse, summary="Clear persisted simulation history")
	def api_history_simulations_clear() -> HistoryClearResponse:
	if not persistence.enabled:
	raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Persistence is disabled.")
	deleted = persistence.clear_simulation_runs()
	return HistoryClearResponse(cleared=True, deleted_rows=int(deleted), scope="simulation_history")


	@api.get("/history/simulations/{run_id}", response_model=dict, summary="Get one persisted simulation run")
	def api_history_simulation(run_id: str) -> dict[str, Any]:
	if not persistence.enabled:
	raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Persistence is disabled.")
	row = persistence.get_simulation_run(run_id)
	if row is None:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Simulation history '{run_id}' not found.")
	return row


	@api.post("/history/comparisons", response_model=ComparisonHistoryCreateResponse, summary="Persist a model-comparison result snapshot")
	def api_history_comparison_create(body: ComparisonHistoryCreateRequest) -> ComparisonHistoryCreateResponse:
	if not persistence.enabled:
	raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Persistence is disabled.")
	payload = body.model_dump(mode="json")
	comparison_id = persistence.create_comparison_run(payload)
	if comparison_id is None:
	raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to persist comparison result.")
	return ComparisonHistoryCreateResponse(comparison_id=comparison_id)


	@api.get("/history/comparisons", response_model=ComparisonHistoryListResponse, summary="List persisted model-comparison snapshots")
	def api_history_comparisons(limit: int = Query(default=20, ge=1, le=500)) -> ComparisonHistoryListResponse:
	if not persistence.enabled:
	return ComparisonHistoryListResponse(comparisons=[])
	return ComparisonHistoryListResponse(comparisons=persistence.list_comparison_runs(limit=limit))


	@api.get("/history/comparisons/{comparison_id}", response_model=dict, summary="Get one persisted model-comparison snapshot")
	def api_history_comparison(comparison_id: str) -> dict[str, Any]:
	if not persistence.enabled:
	raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Persistence is disabled.")
	row = persistence.get_comparison_run(comparison_id)
	if row is None:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Comparison history '{comparison_id}' not found.")
	return row


	@api.delete("/history/comparisons", response_model=HistoryClearResponse, summary="Clear persisted comparison history")
	def api_history_comparisons_clear() -> HistoryClearResponse:
	if not persistence.enabled:
	raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Persistence is disabled.")
	deleted = persistence.clear_comparison_runs()
	return HistoryClearResponse(cleared=True, deleted_rows=int(deleted), scope="comparison_history")


	@api.post("/history/comparisons/{comparison_id}/repair", response_model=ComparisonHistoryRepairResponse, summary="Repair legacy comparison snapshot")
	def api_history_comparison_repair(comparison_id: str) -> ComparisonHistoryRepairResponse:
	if not persistence.enabled:
	raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Persistence is disabled.")
	row = persistence.get_comparison_run(comparison_id)
	if row is None:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Comparison history '{comparison_id}' not found.")
	result = row.get("result") if isinstance(row.get("result"), dict) else {}
	include_llm = bool(row.get("include_llm", True))
	has_baseline = isinstance(result.get("baselineRuns"), list) and len(result["baselineRuns"]) > 0
	has_llm = not include_llm or (isinstance(result.get("llmRuns"), list) and len(result["llmRuns"]) > 0)
	if has_baseline and has_llm:
	return ComparisonHistoryRepairResponse(
	comparison_id=comparison_id,
	repaired=False,
	detail="No repair needed. Snapshot already contains per-run rows.",
	)
	task_id = str(row.get("task_id") or env_settings.default_task_id)
	baseline_policy = str(row.get("baseline_policy") or "backlog_clearance")
	runs = max(1, int(row.get("runs") or 1))
	steps = max(1, int(row.get("steps") or 80))
	seed_base = int(row.get("seed_base") or 100)
	baseline_runs: list[dict[str, Any]] = []
	for i in range(runs):
	seed = seed_base + i
	rr = run_policy_episode(task_id=task_id, policy_name=baseline_policy, seed=seed, max_steps=steps)
	baseline_runs.append({
	"run_index": i + 1,
	"seed": int(rr.seed),
	"score": float(rr.score),
	"reward_sum": float(rr.reward_sum),
	"completed": int(rr.completed),
	"backlog": int(rr.backlog),
	})
	llm_runs: list[dict[str, Any]] = []
	llm_error: str \| None = None
	if include_llm:
	try:
	for i in range(runs):
	seed = seed_base + i
	sim = run_simulation(task_id=task_id, agent_mode=SimulationAgentMode.LLM_INFERENCE,
	max_steps=steps, seed=seed, policy_name="backlog_clearance")
	llm_runs.append({
	"run_index": i + 1,
	"seed": int(sim.seed),
	"score": float(sim.score),
	"reward_sum": float(sim.total_reward),
	"completed": int(sim.summary.get("total_completed", 0)),
	"backlog": int(sim.summary.get("total_backlog", 0)),
	})
	except Exception as exc:
	llm_error = str(exc)

	baseline_score = float(sum(float(x["score"]) for x in baseline_runs) / max(1, len(baseline_runs)))
	llm_score = float(sum(float(x["score"]) for x in llm_runs) / max(1, len(llm_runs))) if llm_runs else result.get("llmScore")
	repaired_result = dict(result)
	repaired_result["baselineScore"] = baseline_score
	repaired_result["baselineRuns"] = baseline_runs
	repaired_result["llmRuns"] = llm_runs
	repaired_result["llmScore"] = llm_score
	if llm_error:
	repaired_result["llmError"] = llm_error
	updated = dict(row)
	updated["result"] = repaired_result
	updated["updated_at"] = time.time()
	saved_id = persistence.create_comparison_run(updated)
	if saved_id is None:
	raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to persist repaired comparison snapshot.")
	return ComparisonHistoryRepairResponse(
	comparison_id=comparison_id,
	repaired=True,
	detail="Repaired legacy snapshot by backfilling per-run baseline/LLM rows.",
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# COMPATIBILITY ALIASES (no /api prefix — for clients that don't route through /api)
	# ─────────────────────────────────────────────────────────────────────────────

	app.include_router(api)


	def _normalize_api_prefix(prefix: str) -> str:
	p = (prefix or "").strip()
	if not p:
	return ""
	if not p.startswith("/"):
	p = "/" + p
	return p.rstrip("/")


	def _mount_versioned_api_aliases(
	application: FastAPI,
	*,
	source_prefix: str,
	target_prefix: str,
	) -> None:
	"""Mirror source API routes into a versioned target prefix."""
	source_prefix = _normalize_api_prefix(source_prefix)
	target_prefix = _normalize_api_prefix(target_prefix)
	if not source_prefix or not target_prefix or source_prefix == target_prefix:
	return

	existing_keys: set[tuple[str, tuple[str, ...]]] = set()
	for route in application.routes:
	if isinstance(route, APIRoute):
	methods = tuple(sorted(m for m in (route.methods or set()) if m not in {"HEAD", "OPTIONS"}))
	existing_keys.add((route.path, methods))

	for route in list(application.routes):
	if not isinstance(route, APIRoute):
	continue
	if not route.path.startswith(f"{source_prefix}/"):
	continue
	if route.path.startswith(f"{target_prefix}/"):
	continue

	methods = sorted(m for m in (route.methods or set()) if m not in {"HEAD", "OPTIONS"})
	if not methods:
	continue

	suffix = route.path[len(source_prefix):]
	versioned_path = f"{target_prefix}{suffix}"
	route_key = (versioned_path, tuple(methods))
	if route_key in existing_keys:
	continue

	base_op = route.operation_id or route.name or "operation"
	path_token = versioned_path.strip("/").replace("/", "_").replace("{", "").replace("}", "")
	versioned_operation_id = f"{base_op}__v1__{path_token}"

	application.add_api_route(
	path=versioned_path,
	endpoint=route.endpoint,
	methods=methods,
	response_model=route.response_model,
	status_code=route.status_code,
	tags=list(route.tags or []),
	dependencies=list(route.dependencies),
	summary=route.summary,
	description=route.description,
	response_description=route.response_description,
	responses=dict(route.responses),
	deprecated=route.deprecated,
	operation_id=versioned_operation_id,
	response_class=route.response_class,
	include_in_schema=route.include_in_schema,
	)
	existing_keys.add(route_key)


	enable_structured_v1_api = os.getenv("ENABLE_STRUCTURED_V1_API", "1").strip().lower() in {
	"1",
	"true",
	"yes",
	"on",
	}
	structured_source_prefix = os.getenv("OPENENV_API_SOURCE_PREFIX", "/api")
	structured_target_prefix = os.getenv("OPENENV_API_V1_PREFIX", "/api/v1")
	if enable_structured_v1_api:
	_mount_versioned_api_aliases(
	app,
	source_prefix=structured_source_prefix,
	target_prefix=structured_target_prefix,
	)


	def _route_exists(application: FastAPI, path: str, method: str) -> bool:
	needle = method.upper()
	for route in application.routes:
	if not isinstance(route, APIRoute):
	continue
	if route.path != path:
	continue
	if needle in (route.methods or set()):
	return True
	return False


	for _v1_alias, _endpoint, _method, _model in [
	("/api/v1/agents", api_agents, "GET", list[str]),
	("/api/v1/rl_models", api_rl_models, "GET", RLModelsResponse),
	("/api/v1/rl/models", api_rl_models_v2, "GET", list[ModelInfo]),
	]:
	if _route_exists(app, _v1_alias, _method):
	continue
	if _method == "GET":
	app.get(_v1_alias, response_model=_model, include_in_schema=False)(_endpoint)
	else:
	app.post(_v1_alias, response_model=_model, include_in_schema=False)(_endpoint)

	# OpenEnv-native routes under /openenv so both contracts are visible
	# in a single Swagger UI without colliding with existing root endpoints.
	try:
	from server.app import app as _openenv_app

	app.include_router(_openenv_app.router, prefix="/openenv")
	except Exception:
	# Keep primary app startup resilient even if optional OpenEnv adapter
	# dependencies are unavailable in a minimal runtime.
	pass

	# Direct top-level aliases for all /api/* routes
	for _alias, _endpoint, _method, _model in [
	("/simulation/run", api_simulation_run, "POST", SimulationResponse),
	("/simulation/live/start", api_simulation_live_start, "POST", SimulationLiveStartResponse),
	("/simulation/live/step", api_simulation_live_step, "POST", SimulationLiveStepResponse),
	("/rl_models", api_rl_models, "GET", RLModelsResponse),
	("/rl_run", api_rl_run, "POST", RLRunResponse),
	("/rl_evaluate", api_rl_evaluate, "POST", RLEvaluateResponse),
	("/openenv_compliance", api_openenv_compliance, "GET", OpenEnvComplianceResponse),
	("/training_jobs", api_training_jobs, "GET", TrainingJobsListResponse),
	("/history/simulations", api_history_simulations, "GET", SimulationHistoryListResponse),
	("/history/comparisons", api_history_comparisons, "GET", ComparisonHistoryListResponse),
	("/workflows/run", api_workflow_run, "POST", WorkflowRunResponse),
	]:
	if _method == "GET":
	app.get(_alias, response_model=_model, include_in_schema=False)(_endpoint)
	else:
	app.post(_alias, response_model=_model, include_in_schema=False)(_endpoint)


	# ─────────────────────────────────────────────────────────────────────────────
	# ENTRY POINT
	# ─────────────────────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(
	"app.main:app",
	host=server_settings.host,
	port=server_settings.port,
	log_level=server_settings.log_level,
	workers=server_settings.workers, # always 1 for in-memory sessions
	reload=False,
	)