Spaces:
Running
Running
| from __future__ import annotations | |
| import unittest | |
| from environment import SentinelEnv | |
| class EnvironmentTests(unittest.TestCase): | |
| def test_reset_observation_has_integer_subtask_index(self) -> None: | |
| env = SentinelEnv() | |
| result = env.reset(task_type="task3", seed=3) | |
| self.assertEqual(result["observation"]["subtask_index"], 0) | |
| self.assertEqual(result["info"]["score"], 0.0) | |
| self.assertFalse(result["done"]) | |
| def test_accurate_slow_public_slot_costs_two_steps(self) -> None: | |
| env = SentinelEnv() | |
| result = env.reset(task_type="task1", seed=11) | |
| slow_slot = next( | |
| public_id | |
| for public_id, internal_id in env._pool.internal_profile().items() | |
| if internal_id == "S0" | |
| ) | |
| result = env.step({ | |
| "session_id": result["observation"]["session_id"], | |
| "task_type": "task1", | |
| "action_type": "delegate", | |
| "specialist_id": slow_slot, | |
| }) | |
| self.assertEqual(result["info"]["step_count"], 2) | |
| def test_verify_accurate_slow_costs_specialist_plus_verify_step(self) -> None: | |
| env = SentinelEnv() | |
| result = env.reset(task_type="task1", seed=11) | |
| slow_slot = next( | |
| public_id | |
| for public_id, internal_id in env._pool.internal_profile().items() | |
| if internal_id == "S0" | |
| ) | |
| result = env.step({ | |
| "session_id": result["observation"]["session_id"], | |
| "task_type": "task1", | |
| "action_type": "verify", | |
| "specialist_id": slow_slot, | |
| }) | |
| self.assertEqual(result["info"]["step_count"], 3) | |
| def test_self_solve_finishes_long_task_with_normalized_score(self) -> None: | |
| env = SentinelEnv() | |
| result = env.reset(task_type="task3", seed=5) | |
| while not result["done"]: | |
| obs = result["observation"] | |
| result = env.step({ | |
| "session_id": obs["session_id"], | |
| "task_type": obs["task_type"], | |
| "action_type": "solve_independently", | |
| "subtask_response": "SELF_SOLVED", | |
| }) | |
| self.assertEqual(result["info"]["completion_rate"], 1.0) | |
| self.assertGreater(result["info"]["step_count"], 2) | |
| self.assertGreaterEqual(result["info"]["score"], 0.0) | |
| self.assertLessEqual(result["info"]["score"], 1.0) | |
| self.assertIn("reward_report", result["info"]) | |
| self.assertGreater(result["info"]["reward_report"]["reward_events"], 0) | |
| def test_reward_report_tracks_process_components(self) -> None: | |
| env = SentinelEnv() | |
| result = env.reset(task_type="task3", seed=42) | |
| obs = result["observation"] | |
| result = env.step({ | |
| "session_id": obs["session_id"], | |
| "task_type": "task3", | |
| "action_type": "delegate", | |
| "specialist_id": "S0", | |
| }) | |
| report = env.reward_report() | |
| self.assertEqual(report["reward_events"], 1) | |
| self.assertIn("confidence_alignment", report["component_averages"]) | |
| self.assertIn("domain_routing", report["component_averages"]) | |
| self.assertEqual(report["events"][0]["action_type"], "delegate") | |
| if __name__ == "__main__": | |
| unittest.main() | |