Spaces:

XcodeAddy
/

sentinel-env

Running

App Files Files Community

sentinel-env / tests /test_environment.py

XcodeAddy

Add process-aware reward engine reports

b3b9bbd 18 days ago

raw

history blame contribute delete

3.28 kB

	from __future__ import annotations

	import unittest

	from environment import SentinelEnv


	class EnvironmentTests(unittest.TestCase):
	def test_reset_observation_has_integer_subtask_index(self) -> None:
	env = SentinelEnv()
	result = env.reset(task_type="task3", seed=3)

	self.assertEqual(result["observation"]["subtask_index"], 0)
	self.assertEqual(result["info"]["score"], 0.0)
	self.assertFalse(result["done"])

	def test_accurate_slow_public_slot_costs_two_steps(self) -> None:
	env = SentinelEnv()
	result = env.reset(task_type="task1", seed=11)
	slow_slot = next(
	public_id
	for public_id, internal_id in env._pool.internal_profile().items()
	if internal_id == "S0"
	)

	result = env.step({
	"session_id": result["observation"]["session_id"],
	"task_type": "task1",
	"action_type": "delegate",
	"specialist_id": slow_slot,
	})

	self.assertEqual(result["info"]["step_count"], 2)

	def test_verify_accurate_slow_costs_specialist_plus_verify_step(self) -> None:
	env = SentinelEnv()
	result = env.reset(task_type="task1", seed=11)
	slow_slot = next(
	public_id
	for public_id, internal_id in env._pool.internal_profile().items()
	if internal_id == "S0"
	)

	result = env.step({
	"session_id": result["observation"]["session_id"],
	"task_type": "task1",
	"action_type": "verify",
	"specialist_id": slow_slot,
	})

	self.assertEqual(result["info"]["step_count"], 3)

	def test_self_solve_finishes_long_task_with_normalized_score(self) -> None:
	env = SentinelEnv()
	result = env.reset(task_type="task3", seed=5)

	while not result["done"]:
	obs = result["observation"]
	result = env.step({
	"session_id": obs["session_id"],
	"task_type": obs["task_type"],
	"action_type": "solve_independently",
	"subtask_response": "SELF_SOLVED",
	})

	self.assertEqual(result["info"]["completion_rate"], 1.0)
	self.assertGreater(result["info"]["step_count"], 2)
	self.assertGreaterEqual(result["info"]["score"], 0.0)
	self.assertLessEqual(result["info"]["score"], 1.0)
	self.assertIn("reward_report", result["info"])
	self.assertGreater(result["info"]["reward_report"]["reward_events"], 0)

	def test_reward_report_tracks_process_components(self) -> None:
	env = SentinelEnv()
	result = env.reset(task_type="task3", seed=42)
	obs = result["observation"]

	result = env.step({
	"session_id": obs["session_id"],
	"task_type": "task3",
	"action_type": "delegate",
	"specialist_id": "S0",
	})

	report = env.reward_report()

	self.assertEqual(report["reward_events"], 1)
	self.assertIn("confidence_alignment", report["component_averages"])
	self.assertIn("domain_routing", report["component_averages"])
	self.assertEqual(report["events"][0]["action_type"], "delegate")


	if __name__ == "__main__":
	unittest.main()