Spaces:
Running
Running
| """API 14 — REST session isolation tests. | |
| Proves that two REST users with different session_ids do not share | |
| or corrupt each other's env state, episode IDs, protocol, or history. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from fastapi.testclient import TestClient | |
| from server.app import app | |
| def client(): | |
| return TestClient(app) | |
| def _reset(client: TestClient, *, seed: int = 42, session_id: str | None = None, **kw) -> dict: | |
| payload: dict = {"seed": seed, "scenario": "math_reasoning", "difficulty": "easy"} | |
| if session_id is not None: | |
| payload["session_id"] = session_id | |
| payload.update(kw) | |
| resp = client.post("/reset", json=payload) | |
| assert resp.status_code == 200 | |
| return resp.json() | |
| def _good_action(client: TestClient) -> dict: | |
| """Build a valid propose_protocol action from a fresh scenario.""" | |
| from replicalab.scenarios import generate_scenario | |
| scenario = generate_scenario(seed=42, template="math_reasoning", difficulty="easy") | |
| lab = scenario.lab_manager_observation | |
| return { | |
| "action_type": "propose_protocol", | |
| "sample_size": 3, | |
| "controls": ["baseline"], | |
| "technique": scenario.hidden_reference_spec.required_elements[0] | |
| if scenario.hidden_reference_spec.required_elements else "algebraic_proof", | |
| "duration_days": min(1, lab.time_limit_days), | |
| "required_equipment": list(lab.equipment_available[:1]) if lab.equipment_available else [], | |
| "required_reagents": list(lab.reagents_in_stock[:1]) if lab.reagents_in_stock else [], | |
| "questions": [], | |
| "rationale": "Test protocol for isolation check.", | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Two resets produce isolated sessions | |
| # --------------------------------------------------------------------------- | |
| class TestSessionIsolation: | |
| """Two REST users with different session_ids must not share state.""" | |
| def test_two_resets_produce_different_sessions(self, client: TestClient) -> None: | |
| d1 = _reset(client, seed=1) | |
| d2 = _reset(client, seed=2) | |
| assert d1["session_id"] != d2["session_id"] | |
| assert d1["episode_id"] != d2["episode_id"] | |
| def test_two_sessions_have_independent_observations(self, client: TestClient) -> None: | |
| """Different seeds → different observations, proving separate envs.""" | |
| d1 = _reset(client, seed=100, scenario="math_reasoning", difficulty="easy") | |
| d2 = _reset(client, seed=200, scenario="ml_benchmark", difficulty="hard") | |
| obs1 = d1["observation"] | |
| obs2 = d2["observation"] | |
| # Different scenarios produce different paper titles | |
| assert obs1["scientist"]["paper_title"] != obs2["scientist"]["paper_title"] | |
| def test_stepping_one_session_does_not_mutate_other(self, client: TestClient) -> None: | |
| """Step session A, then verify session B's next step is unaffected.""" | |
| sid_a = "isolation-a" | |
| sid_b = "isolation-b" | |
| d_a = _reset(client, seed=42, session_id=sid_a) | |
| d_b = _reset(client, seed=42, session_id=sid_b) | |
| # Both start with the same observation (same seed/scenario/difficulty) | |
| assert d_a["observation"] == d_b["observation"] | |
| action = _good_action(client) | |
| # Step session A | |
| resp_a = client.post("/step", json={"session_id": sid_a, "action": action}) | |
| assert resp_a.status_code == 200 | |
| step_a = resp_a.json() | |
| assert step_a["done"] is False | |
| # Step session B with the same action — should produce the same result | |
| # because it started from the same state and hasn't been touched | |
| resp_b = client.post("/step", json={"session_id": sid_b, "action": action}) | |
| assert resp_b.status_code == 200 | |
| step_b = resp_b.json() | |
| # Both should have the same reward and observation since they saw the | |
| # same state + same action | |
| assert step_a["reward"] == step_b["reward"] | |
| assert step_a["observation"] == step_b["observation"] | |
| def test_sessions_have_independent_round_counts(self, client: TestClient) -> None: | |
| """Advancing one session by two steps doesn't advance the other.""" | |
| sid_a = "rounds-a" | |
| sid_b = "rounds-b" | |
| _reset(client, seed=42, session_id=sid_a) | |
| _reset(client, seed=42, session_id=sid_b) | |
| action = _good_action(client) | |
| # Step session A twice | |
| client.post("/step", json={"session_id": sid_a, "action": action}) | |
| resp_a2 = client.post("/step", json={"session_id": sid_a, "action": action}) | |
| obs_a = resp_a2.json()["observation"] | |
| # Step session B once | |
| resp_b1 = client.post("/step", json={"session_id": sid_b, "action": action}) | |
| obs_b = resp_b1.json()["observation"] | |
| # Session A is at round 2, session B at round 1 | |
| assert obs_a["scientist"]["round_number"] != obs_b["scientist"]["round_number"] | |
| assert obs_b["scientist"]["round_number"] < obs_a["scientist"]["round_number"] | |
| def test_terminal_one_session_does_not_affect_other(self, client: TestClient) -> None: | |
| """Completing session A (accept) doesn't terminate session B.""" | |
| sid_a = "term-a" | |
| sid_b = "term-b" | |
| _reset(client, seed=42, session_id=sid_a) | |
| _reset(client, seed=42, session_id=sid_b) | |
| action = _good_action(client) | |
| # Step A with propose, then accept | |
| client.post("/step", json={"session_id": sid_a, "action": action}) | |
| accept = { | |
| "action_type": "accept", | |
| "sample_size": 0, | |
| "controls": [], | |
| "technique": "", | |
| "duration_days": 0, | |
| "required_equipment": [], | |
| "required_reagents": [], | |
| "questions": [], | |
| "rationale": "", | |
| } | |
| resp_accept = client.post("/step", json={"session_id": sid_a, "action": accept}) | |
| assert resp_accept.json()["done"] is True | |
| # Session B should still be alive and non-terminal | |
| resp_b = client.post("/step", json={"session_id": sid_b, "action": action}) | |
| assert resp_b.status_code == 200 | |
| assert resp_b.json()["done"] is False | |
| # --------------------------------------------------------------------------- | |
| # Session ID reuse | |
| # --------------------------------------------------------------------------- | |
| class TestSessionReuse: | |
| """Reusing a session_id should close the old env and start fresh.""" | |
| def test_reuse_session_id_creates_new_episode(self, client: TestClient) -> None: | |
| sid = "reuse-test" | |
| d1 = _reset(client, seed=10, session_id=sid) | |
| d2 = _reset(client, seed=20, session_id=sid) | |
| assert d1["session_id"] == d2["session_id"] == sid | |
| assert d1["episode_id"] != d2["episode_id"] | |
| def test_reuse_session_id_resets_round_counter(self, client: TestClient) -> None: | |
| """After stepping session, reusing the ID should start at round 0.""" | |
| sid = "reuse-rounds" | |
| _reset(client, seed=42, session_id=sid) | |
| action = _good_action(client) | |
| client.post("/step", json={"session_id": sid, "action": action}) | |
| # Now reset with the same session_id | |
| d2 = _reset(client, seed=99, session_id=sid) | |
| obs = d2["observation"] | |
| assert obs["scientist"]["round_number"] == 0 | |
| def test_reuse_does_not_affect_other_sessions(self, client: TestClient) -> None: | |
| """Resetting session A with reuse doesn't touch session B.""" | |
| sid_a = "reuse-a" | |
| sid_b = "reuse-b" | |
| _reset(client, seed=42, session_id=sid_a) | |
| d_b = _reset(client, seed=42, session_id=sid_b) | |
| action = _good_action(client) | |
| client.post("/step", json={"session_id": sid_b, "action": action}) | |
| # Reuse session A with a new seed | |
| _reset(client, seed=99, session_id=sid_a) | |
| # Session B should still be at round 1 from the step we did | |
| resp_b = client.post("/step", json={"session_id": sid_b, "action": action}) | |
| assert resp_b.status_code == 200 | |
| # B is still alive and progressing independently | |
| assert resp_b.json()["done"] is False | |
| # --------------------------------------------------------------------------- | |
| # Invalid session handling | |
| # --------------------------------------------------------------------------- | |
| class TestInvalidSession: | |
| """Invalid or missing session_id should return clean errors.""" | |
| def test_step_nonexistent_session_returns_404(self, client: TestClient) -> None: | |
| action = _good_action(client) | |
| resp = client.post("/step", json={ | |
| "session_id": "does-not-exist", | |
| "action": action, | |
| }) | |
| assert resp.status_code == 404 | |
| assert "Session not found" in resp.json()["detail"] | |
| def test_step_after_terminal_session_raises(self, client: TestClient) -> None: | |
| """A terminated session's env is closed; stepping again should error.""" | |
| sid = "terminal-step" | |
| _reset(client, seed=42, session_id=sid) | |
| action = _good_action(client) | |
| client.post("/step", json={"session_id": sid, "action": action}) | |
| accept = { | |
| "action_type": "accept", | |
| "sample_size": 0, | |
| "controls": [], | |
| "technique": "", | |
| "duration_days": 0, | |
| "required_equipment": [], | |
| "required_reagents": [], | |
| "questions": [], | |
| "rationale": "", | |
| } | |
| resp_term = client.post("/step", json={"session_id": sid, "action": accept}) | |
| assert resp_term.json()["done"] is True | |
| # Stepping a done env should return an error (env raises or returns error) | |
| resp_again = client.post("/step", json={"session_id": sid, "action": action}) | |
| # The server should still return a response (200 with error or 500), | |
| # not crash silently | |
| assert resp_again.status_code in (200, 500) | |
| def test_replay_isolation_between_sessions(self, client: TestClient) -> None: | |
| """Each session's terminal episode gets its own replay entry.""" | |
| sid_a = "replay-a" | |
| sid_b = "replay-b" | |
| d_a = _reset(client, seed=10, session_id=sid_a) | |
| d_b = _reset(client, seed=20, session_id=sid_b) | |
| action = _good_action(client) | |
| accept = { | |
| "action_type": "accept", | |
| "sample_size": 0, | |
| "controls": [], | |
| "technique": "", | |
| "duration_days": 0, | |
| "required_equipment": [], | |
| "required_reagents": [], | |
| "questions": [], | |
| "rationale": "", | |
| } | |
| # Complete both episodes | |
| client.post("/step", json={"session_id": sid_a, "action": action}) | |
| client.post("/step", json={"session_id": sid_a, "action": accept}) | |
| client.post("/step", json={"session_id": sid_b, "action": action}) | |
| client.post("/step", json={"session_id": sid_b, "action": accept}) | |
| # Both replays should exist independently | |
| ep_a = d_a["episode_id"] | |
| ep_b = d_b["episode_id"] | |
| resp_a = client.get(f"/replay/{ep_a}") | |
| resp_b = client.get(f"/replay/{ep_b}") | |
| assert resp_a.status_code == 200 | |
| assert resp_b.status_code == 200 | |
| # Different seeds → different episode content | |
| assert resp_a.json()["seed"] != resp_b.json()["seed"] | |