Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Run a single ReplicaLab episode locally and dump logs. | |
| OBS 07 — Quick local smoke-test script. Resets the environment with a | |
| given seed/scenario/difficulty, runs a baseline propose→accept sequence, | |
| and writes the episode replay JSON + reward CSV/JSONL to the default | |
| output directories. | |
| Usage: | |
| python -m scripts.run_episode | |
| python -m scripts.run_episode --seed 42 --scenario ml_benchmark --difficulty hard | |
| python -m scripts.run_episode --max-rounds 3 | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import sys | |
| from pathlib import Path | |
| # Ensure project root is importable when run as a script | |
| _PROJECT_ROOT = Path(__file__).resolve().parents[1] | |
| if str(_PROJECT_ROOT) not in sys.path: | |
| sys.path.insert(0, str(_PROJECT_ROOT)) | |
| from replicalab.config import DEFAULT_DIFFICULTY, DEFAULT_SCENARIO_TEMPLATE | |
| from replicalab.env.replicalab_env import ReplicaLabEnv | |
| from replicalab.models import ScientistAction | |
| from replicalab.scenarios import generate_scenario | |
| from replicalab.utils.logging import log_episode_reward, write_episode_log | |
| def _build_propose_action(env: ReplicaLabEnv, seed: int, scenario: str, difficulty: str) -> ScientistAction: | |
| """Build a baseline propose_protocol action from the scenario pack.""" | |
| pack = generate_scenario(seed=seed, template=scenario, difficulty=difficulty) | |
| lab = pack.lab_manager_observation | |
| spec = pack.hidden_reference_spec | |
| return ScientistAction( | |
| action_type="propose_protocol", | |
| sample_size=10, | |
| controls=["baseline", "ablation"], | |
| technique=spec.summary[:60] if spec.summary else "replication_plan", | |
| duration_days=max(1, min(2, lab.time_limit_days)), | |
| required_equipment=list(lab.equipment_available[:1]) if lab.equipment_available else [], | |
| required_reagents=list(lab.reagents_in_stock[:1]) if lab.reagents_in_stock else [], | |
| questions=[], | |
| rationale=( | |
| f"Plan addresses: {', '.join(spec.required_elements[:2])}. " | |
| f"Target metric: {spec.target_metric}. " | |
| f"Target value: {spec.target_value}. " | |
| "Stay within budget and schedule." | |
| ), | |
| ) | |
| def _build_accept_action() -> ScientistAction: | |
| return ScientistAction( | |
| action_type="accept", | |
| sample_size=0, | |
| controls=[], | |
| technique="", | |
| duration_days=0, | |
| required_equipment=[], | |
| required_reagents=[], | |
| questions=[], | |
| rationale="", | |
| ) | |
| def run_episode( | |
| seed: int = 0, | |
| scenario: str = DEFAULT_SCENARIO_TEMPLATE, | |
| difficulty: str = DEFAULT_DIFFICULTY, | |
| max_rounds: int | None = None, | |
| ) -> None: | |
| """Run one episode and persist outputs.""" | |
| env = ReplicaLabEnv() | |
| obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty) | |
| episode_id = env.episode_id() | |
| print(f"Episode {episode_id} | seed={seed} scenario={scenario} difficulty={difficulty}") | |
| print(f" Paper: {obs.scientist.paper_title}") | |
| propose_action = _build_propose_action(env, seed, scenario, difficulty) | |
| total_steps = 0 | |
| invalid_count = 0 | |
| # Step 1: propose | |
| result = env.step(propose_action) | |
| total_steps += 1 | |
| if result.info.error: | |
| invalid_count += 1 | |
| print(f" Round 1 propose | reward={result.reward:.4f} done={result.done}") | |
| if not result.done: | |
| # Step 2: accept | |
| result = env.step(_build_accept_action()) | |
| total_steps += 1 | |
| if result.info.error: | |
| invalid_count += 1 | |
| print(f" Round 2 accept | reward={result.reward:.4f} done={result.done}") | |
| state = env.state() | |
| info = result.info | |
| # Build and persist episode log | |
| from replicalab.models import EpisodeLog | |
| episode_log = EpisodeLog( | |
| episode_id=episode_id, | |
| seed=state.seed, | |
| scenario_template=state.scenario_template, | |
| difficulty=state.difficulty, | |
| final_state=state, | |
| transcript=list(state.conversation_history), | |
| reward_breakdown=info.reward_breakdown, | |
| total_reward=state.reward, | |
| rounds_used=state.round_number, | |
| agreement_reached=info.agreement_reached, | |
| judge_notes=info.judge_notes or "", | |
| verdict=info.verdict or "", | |
| top_failure_reasons=list(info.top_failure_reasons), | |
| invalid_action_count=invalid_count, | |
| invalid_action_rate=round(invalid_count / total_steps, 6) if total_steps else 0.0, | |
| ) | |
| replay_path = write_episode_log(episode_log) | |
| csv_path, jsonl_path = log_episode_reward( | |
| episode_id=episode_id, | |
| seed=state.seed, | |
| scenario_template=state.scenario_template, | |
| difficulty=state.difficulty, | |
| total_reward=state.reward, | |
| breakdown=info.reward_breakdown, | |
| rounds_used=state.round_number, | |
| agreement_reached=info.agreement_reached, | |
| verdict=info.verdict or "", | |
| judge_notes=info.judge_notes or "", | |
| ) | |
| print(f"\n Verdict: {info.verdict}") | |
| print(f" Total reward: {state.reward:.4f}") | |
| print(f" Agreement: {info.agreement_reached}") | |
| print(f" Invalid actions: {invalid_count}/{total_steps}") | |
| print(f"\n Replay JSON: {replay_path}") | |
| print(f" Reward CSV: {csv_path}") | |
| print(f" Reward JSONL: {jsonl_path}") | |
| env.close() | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Run a single ReplicaLab episode") | |
| parser.add_argument("--seed", type=int, default=0) | |
| parser.add_argument("--scenario", default=DEFAULT_SCENARIO_TEMPLATE) | |
| parser.add_argument("--difficulty", default=DEFAULT_DIFFICULTY) | |
| parser.add_argument("--max-rounds", type=int, default=None) | |
| args = parser.parse_args() | |
| run_episode( | |
| seed=args.seed, | |
| scenario=args.scenario, | |
| difficulty=args.difficulty, | |
| max_rounds=args.max_rounds, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |