| import json |
| import os |
| import subprocess |
| import sys |
| import tempfile |
|
|
|
|
| def test_quick_mode_runs(tmp_path): |
| output = tmp_path / "eval_results.json" |
| result = subprocess.run( |
| [sys.executable, "scripts/eval_compare.py", "--quick", "--output", str(output)], |
| capture_output=True, |
| text=True, |
| ) |
| assert result.returncode == 0, f"Script failed:\n{result.stderr}" |
| assert output.exists(), "Output JSON not created" |
|
|
| with open(output) as f: |
| data = json.load(f) |
|
|
| assert "random" in data, "random agent missing from results" |
| assert "heuristic" in data, "heuristic agent missing from results" |
|
|
| for agent in ("random", "heuristic"): |
| tier_data = data[agent].get("easy") |
| assert tier_data is not None, f"{agent} easy tier is None" |
| assert tier_data["total_reward"] is not None |
|
|