replicalab / tests /test_training_datasets.py
maxxie114's picture
Initial HF Spaces deployment
80d8c84
"""Tests for Scientist and Lab Manager dataset builders."""
from __future__ import annotations
import json
from replicalab.models import LabManagerAction
from replicalab.training.datasets import (
build_lab_manager_sft_examples,
build_scientist_prompt_examples,
)
def test_scientist_prompt_examples_include_frozen_evidence_when_available() -> None:
rows = build_scientist_prompt_examples(
seeds=[3],
templates=["math_reasoning", "ml_benchmark"],
difficulties=["easy"],
)
assert len(rows) == 6
math_row = next(
row
for row in rows
if row.scenario == "math_reasoning" and row.goal_variant == "paper_understanding"
)
ml_row = next(
row
for row in rows
if row.scenario == "ml_benchmark" and row.goal_variant == "paper_understanding"
)
assert math_row.evidence_id is None
assert ml_row.evidence_id is not None
assert "Frozen evidence pack" in ml_row.prompt[-1]["content"]
assert {row.goal_variant for row in rows} == {
"paper_understanding",
"constraint_grounding",
"negotiation_quality",
}
def test_lab_manager_sft_examples_emit_valid_lab_manager_action_json() -> None:
rows = build_lab_manager_sft_examples(
seeds=[2],
templates=["ml_benchmark"],
difficulties=["easy"],
)
assert len(rows) >= 2
assert {row.candidate_kind for row in rows} >= {"baseline", "constraint_stress"}
payload = json.loads(rows[0].target_json)
action = LabManagerAction.model_validate(payload)
assert action.explanation
assert rows[0].messages[-1]["role"] == "assistant"