Spaces:

openenv-community
/

replicalab

Running

replicalab / tests /test_training_datasets.py

Initial HF Spaces deployment

80d8c84 3 days ago

1.66 kB

	"""Tests for Scientist and Lab Manager dataset builders."""

	from __future__ import annotations

	import json

	from replicalab.models import LabManagerAction
	from replicalab.training.datasets import (
	build_lab_manager_sft_examples,
	build_scientist_prompt_examples,
	)


	def test_scientist_prompt_examples_include_frozen_evidence_when_available() -> None:
	rows = build_scientist_prompt_examples(
	seeds=[3],
	templates=["math_reasoning", "ml_benchmark"],
	difficulties=["easy"],
	)

	assert len(rows) == 6
	math_row = next(
	row
	for row in rows
	if row.scenario == "math_reasoning" and row.goal_variant == "paper_understanding"
	)
	ml_row = next(
	row
	for row in rows
	if row.scenario == "ml_benchmark" and row.goal_variant == "paper_understanding"
	)

	assert math_row.evidence_id is None
	assert ml_row.evidence_id is not None
	assert "Frozen evidence pack" in ml_row.prompt[-1]["content"]
	assert {row.goal_variant for row in rows} == {
	"paper_understanding",
	"constraint_grounding",
	"negotiation_quality",
	}


	def test_lab_manager_sft_examples_emit_valid_lab_manager_action_json() -> None:
	rows = build_lab_manager_sft_examples(
	seeds=[2],
	templates=["ml_benchmark"],
	difficulties=["easy"],
	)

	assert len(rows) >= 2
	assert {row.candidate_kind for row in rows} >= {"baseline", "constraint_stress"}

	payload = json.loads(rows[0].target_json)
	action = LabManagerAction.model_validate(payload)

	assert action.explanation
	assert rows[0].messages[-1]["role"] == "assistant"