| |
| """Evaluate baseline policies on one sampled case.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| import os |
| from pathlib import Path |
|
|
| import sys |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| if str(ROOT) not in sys.path: |
| sys.path.insert(0, str(ROOT)) |
|
|
| from app.api.service import APIService |
| from app.env.env_core import PolyGuardEnv |
|
|
|
|
| def _evaluate_no_change_baseline(episodes: int = 8) -> dict[str, float | int | str]: |
| rewards: list[float] = [] |
| legal: list[float] = [] |
| for idx in range(episodes): |
| env = PolyGuardEnv() |
| env.reset(seed=99 + idx, difficulty="medium") |
| candidates = env.get_candidate_actions() |
| action = next((item for item in candidates if item.get("candidate_id") == "cand_01"), None) |
| if action is None and candidates: |
| action = candidates[0] |
| if action is None: |
| rewards.append(0.001) |
| legal.append(0.0) |
| continue |
| _, reward, _, info = env.step(action) |
| rewards.append(float(reward)) |
| legal.append(1.0 if bool(info.get("safety_report", {}).get("legal")) else 0.0) |
|
|
| return { |
| "baseline_policy": "no_change_candidate", |
| "episodes": episodes, |
| "avg_reward": round(sum(rewards) / len(rewards), 6) if rewards else 0.0, |
| "legality_rate": round(sum(legal) / len(legal), 6) if legal else 0.0, |
| "success_rate": 0.0, |
| } |
|
|
|
|
| def main() -> None: |
| service = APIService() |
| service.reset(seed=99, difficulty="medium") |
| out = service.run_baselines() |
| out.update(_evaluate_no_change_baseline()) |
| ablations: dict[str, dict[str, float]] = {} |
| for stack in ["bandit-only", "llm-only", "llm+bandit"]: |
| os.environ["POLYGUARD_POLICY_STACK"] = stack |
| service.reset(seed=99, difficulty="medium") |
| rollout_rewards: list[float] = [] |
| legal: list[float] = [] |
| for _ in range(3): |
| step = service.orchestrate() |
| rollout_rewards.append(float(step.get("reward", 0.0))) |
| legal.append(1.0 if bool(step.get("critic", {}).get("legal")) else 0.0) |
| if step.get("done"): |
| break |
| ablations[stack] = { |
| "avg_reward": (sum(rollout_rewards) / len(rollout_rewards)) if rollout_rewards else 0.0, |
| "legality_rate": (sum(legal) / len(legal)) if legal else 0.0, |
| "steps": float(len(rollout_rewards)), |
| } |
| out["policy_stack_ablations"] = ablations |
| root = Path(__file__).resolve().parents[1] |
| report = root / "outputs" / "reports" |
| report.mkdir(parents=True, exist_ok=True) |
| (report / "baselines.json").write_text(json.dumps(out, ensure_ascii=True, indent=2), encoding="utf-8") |
| print("baseline_eval_done") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|