| """
|
| Data models for CERNenv: an LHC (Large Hadron Collider) style particle
|
| physics discovery POMDP (Partially Observable Markov Decision Process).
|
|
|
| The agent is a Large Language Model (LLM) acting as a high-energy physicist.
|
| Each step it picks one structured action (configure beams, allocate
|
| luminosity, run a trigger, fit a spectrum, request systematics, submit a
|
| discovery claim, etc.) and receives a noisy detector-style observation.
|
| The latent particle and detector parameters are the hidden ground truth.
|
| """
|
|
|
| from __future__ import annotations
|
|
|
| from enum import Enum
|
| from typing import Any, Dict, List, Optional
|
|
|
| from pydantic import BaseModel, Field
|
|
|
| from openenv.core.env_server.types import Action, Observation
|
|
|
|
|
|
|
|
|
|
|
| class ActionType(str, Enum):
|
|
|
| CONFIGURE_BEAM = "configure_beam"
|
| ALLOCATE_LUMINOSITY = "allocate_luminosity"
|
| SET_TRIGGER = "set_trigger"
|
| COLLECT_COLLISIONS = "collect_collisions"
|
|
|
|
|
| CALIBRATE_DETECTOR = "calibrate_detector"
|
| RECONSTRUCT_TRACKS = "reconstruct_tracks"
|
| SELECT_CHANNEL = "select_channel"
|
|
|
|
|
| BUILD_INVARIANT_MASS = "build_invariant_mass"
|
| SUBTRACT_BACKGROUND = "subtract_background"
|
| FIT_RESONANCE = "fit_resonance"
|
| SCAN_BUMP = "scan_bump"
|
| MEASURE_ANGULAR = "measure_angular"
|
| ESTIMATE_SIGNIFICANCE = "estimate_significance"
|
|
|
|
|
| REQUEST_SYSTEMATICS = "request_systematics"
|
| REQUEST_THEORY_REVIEW = "request_theory_review"
|
|
|
|
|
| SUBMIT_DISCOVERY_CLAIM = "submit_discovery_claim"
|
|
|
|
|
| DAQ_ACTIONS = frozenset({
|
| ActionType.CONFIGURE_BEAM,
|
| ActionType.ALLOCATE_LUMINOSITY,
|
| ActionType.SET_TRIGGER,
|
| ActionType.COLLECT_COLLISIONS,
|
| })
|
|
|
| RECO_ACTIONS = frozenset({
|
| ActionType.CALIBRATE_DETECTOR,
|
| ActionType.RECONSTRUCT_TRACKS,
|
| ActionType.SELECT_CHANNEL,
|
| })
|
|
|
| ANALYSIS_ACTIONS = frozenset({
|
| ActionType.BUILD_INVARIANT_MASS,
|
| ActionType.SUBTRACT_BACKGROUND,
|
| ActionType.FIT_RESONANCE,
|
| ActionType.SCAN_BUMP,
|
| ActionType.MEASURE_ANGULAR,
|
| ActionType.ESTIMATE_SIGNIFICANCE,
|
| })
|
|
|
| META_ACTIONS = frozenset({
|
| ActionType.REQUEST_SYSTEMATICS,
|
| ActionType.REQUEST_THEORY_REVIEW,
|
| ActionType.SUBMIT_DISCOVERY_CLAIM,
|
| })
|
|
|
|
|
|
|
|
|
|
|
| class DetectorChannel(str, Enum):
|
| """Final-state decay channel the agent reconstructs in.
|
|
|
| Channels affect signal acceptance and background composition. Picking a
|
| channel where the true particle does not decay yields low signal yield
|
| no matter how much luminosity is collected — this is intentional.
|
| """
|
|
|
| DIPHOTON = "diphoton"
|
| DILEPTON_EE = "dilepton_ee"
|
| DILEPTON_MUMU = "dilepton_mumu"
|
| DIJET = "dijet"
|
| FOUR_LEPTON = "four_lepton"
|
| BB = "bb"
|
|
|
|
|
| class TriggerType(str, Enum):
|
| """Hardware-level event selection."""
|
|
|
| LOW_PT = "low_pt"
|
| HIGH_PT = "high_pt"
|
| DIPHOTON_HLT = "diphoton_hlt"
|
| DILEPTON_HLT = "dilepton_hlt"
|
| JET_HLT = "jet_hlt"
|
|
|
|
|
| class BeamEnergy(str, Enum):
|
| """LHC-style center-of-mass energies (TeV)."""
|
|
|
| E_7 = "7TeV"
|
| E_8 = "8TeV"
|
| E_13 = "13TeV"
|
| E_14 = "14TeV"
|
|
|
|
|
|
|
|
|
|
|
| class ToolCategory(str, Enum):
|
| DAQ = "daq"
|
| RECONSTRUCTION = "reconstruction"
|
| CALIBRATION = "calibration"
|
| ANALYSIS = "analysis"
|
| STATISTICS = "statistics"
|
| SYSTEMATICS = "systematics"
|
|
|
|
|
| class ToolSpec(BaseModel):
|
| name: str
|
| category: ToolCategory
|
| description: str = ""
|
| typical_runtime_hours: float = 0.5
|
| typical_cost_musd: float = 0.0
|
| requires_gpu: bool = False
|
| channels: List[str] = Field(default_factory=list)
|
|
|
|
|
| TOOL_REGISTRY: Dict[str, ToolSpec] = {
|
| "ATLAS_HLT": ToolSpec(
|
| name="ATLAS_HLT",
|
| category=ToolCategory.DAQ,
|
| description="ATLAS High-Level Trigger system for online event selection",
|
| typical_runtime_hours=0.0,
|
| channels=["diphoton", "dilepton_ee", "dilepton_mumu", "four_lepton", "dijet", "bb"],
|
| ),
|
| "CMS_HLT": ToolSpec(
|
| name="CMS_HLT",
|
| category=ToolCategory.DAQ,
|
| description="CMS High-Level Trigger system",
|
| typical_runtime_hours=0.0,
|
| channels=["diphoton", "dilepton_ee", "dilepton_mumu", "four_lepton", "dijet", "bb"],
|
| ),
|
| "GEANT4": ToolSpec(
|
| name="GEANT4",
|
| category=ToolCategory.RECONSTRUCTION,
|
| description="Detector simulation toolkit for full event reconstruction",
|
| typical_runtime_hours=1.0,
|
| typical_cost_musd=0.05,
|
| requires_gpu=False,
|
| ),
|
| "Athena": ToolSpec(
|
| name="Athena",
|
| category=ToolCategory.RECONSTRUCTION,
|
| description="ATLAS reconstruction framework",
|
| typical_runtime_hours=0.8,
|
| ),
|
| "CMSSW": ToolSpec(
|
| name="CMSSW",
|
| category=ToolCategory.RECONSTRUCTION,
|
| description="CMS reconstruction software",
|
| typical_runtime_hours=0.8,
|
| ),
|
| "ECAL_calibration": ToolSpec(
|
| name="ECAL_calibration",
|
| category=ToolCategory.CALIBRATION,
|
| description="Electromagnetic calorimeter energy-scale calibration",
|
| typical_runtime_hours=0.3,
|
| ),
|
| "Tracker_alignment": ToolSpec(
|
| name="Tracker_alignment",
|
| category=ToolCategory.CALIBRATION,
|
| description="Inner tracker alignment for momentum precision",
|
| typical_runtime_hours=0.4,
|
| ),
|
| "ROOT_RooFit": ToolSpec(
|
| name="ROOT_RooFit",
|
| category=ToolCategory.ANALYSIS,
|
| description="Maximum-likelihood spectrum fitting toolkit",
|
| typical_runtime_hours=0.2,
|
| ),
|
| "MadGraph": ToolSpec(
|
| name="MadGraph",
|
| category=ToolCategory.ANALYSIS,
|
| description="Matrix-element generator for signal+background templates",
|
| typical_runtime_hours=1.5,
|
| typical_cost_musd=0.02,
|
| ),
|
| "Pythia8": ToolSpec(
|
| name="Pythia8",
|
| category=ToolCategory.ANALYSIS,
|
| description="Parton-shower and hadronisation generator",
|
| typical_runtime_hours=0.5,
|
| ),
|
| "BumpHunter": ToolSpec(
|
| name="BumpHunter",
|
| category=ToolCategory.STATISTICS,
|
| description="Sliding-window local-significance bump-hunting algorithm",
|
| typical_runtime_hours=0.1,
|
| ),
|
| "CLs_fit": ToolSpec(
|
| name="CLs_fit",
|
| category=ToolCategory.STATISTICS,
|
| description="Modified-frequentist CLs limits and significance",
|
| typical_runtime_hours=0.1,
|
| ),
|
| "Asimov_significance": ToolSpec(
|
| name="Asimov_significance",
|
| category=ToolCategory.STATISTICS,
|
| description="Asymptotic significance from Asimov dataset",
|
| typical_runtime_hours=0.05,
|
| ),
|
| "JES_systematics": ToolSpec(
|
| name="JES_systematics",
|
| category=ToolCategory.SYSTEMATICS,
|
| description="Jet energy-scale systematic study",
|
| typical_runtime_hours=0.4,
|
| ),
|
| "Luminosity_calibration": ToolSpec(
|
| name="Luminosity_calibration",
|
| category=ToolCategory.SYSTEMATICS,
|
| description="Van der Meer scan luminosity calibration",
|
| typical_runtime_hours=0.3,
|
| ),
|
| }
|
|
|
|
|
|
|
|
|
|
|
| class ExperimentAction(Action):
|
| """One structured experimental step at the LHC."""
|
|
|
| action_type: ActionType = Field(
|
| ...,
|
| description=(
|
| "Discrete LHC pipeline step. The environment enforces physics "
|
| "prerequisites: you cannot fit a spectrum before collecting data, "
|
| "or claim a discovery before estimating significance."
|
| ),
|
| )
|
| method: Optional[str] = Field(
|
| None,
|
| description=(
|
| "Optional named instrument or framework (e.g. 'ROOT_RooFit', "
|
| "'BumpHunter', 'Pythia8'). Affects cost, runtime, and tool-fit reward."
|
| ),
|
| )
|
| parameters: Dict[str, Any] = Field(
|
| default_factory=dict,
|
| description=(
|
| "Action-specific settings such as beam energy, integrated luminosity "
|
| "(fb^-1), trigger selection, decay channel, mass window, fit model."
|
| ),
|
| )
|
| justification: Optional[str] = Field(
|
| None,
|
| description="Short scientific rationale for picking this step now.",
|
| )
|
| confidence: float = Field(
|
| 0.5, ge=0.0, le=1.0,
|
| description="Agent confidence in the chosen step.",
|
| )
|
|
|
|
|
|
|
|
|
|
|
| class OutputType(str, Enum):
|
| BEAM_CONFIG = "beam_config"
|
| LUMINOSITY_LOG = "luminosity_log"
|
| TRIGGER_REPORT = "trigger_report"
|
| COLLISION_BATCH = "collision_batch"
|
| CALIBRATION_REPORT = "calibration_report"
|
| RECONSTRUCTION = "reconstruction"
|
| CHANNEL_SELECTION = "channel_selection"
|
| INVARIANT_MASS_HIST = "invariant_mass_hist"
|
| BACKGROUND_SUBTRACTION = "background_subtraction"
|
| FIT_RESULT = "fit_result"
|
| BUMP_SCAN = "bump_scan"
|
| ANGULAR_RESULT = "angular_result"
|
| SIGNIFICANCE = "significance"
|
| SYSTEMATICS_REPORT = "systematics_report"
|
| THEORY_REVIEW = "theory_review"
|
| DISCOVERY_CLAIM = "discovery_claim"
|
| FAILURE_REPORT = "failure_report"
|
|
|
|
|
| class IntermediateOutput(BaseModel):
|
| """A single noisy detector or analysis artifact."""
|
|
|
| output_type: OutputType
|
| step_index: int
|
| success: bool = True
|
| quality_score: float = Field(1.0, ge=0.0, le=1.0)
|
| summary: str = ""
|
| data: Dict[str, Any] = Field(default_factory=dict)
|
| uncertainty: float = Field(0.0, ge=0.0, le=1.0)
|
| warnings: List[str] = Field(default_factory=list)
|
| artifacts_available: List[str] = Field(default_factory=list)
|
|
|
|
|
|
|
|
|
|
|
| class ResourceUsage(BaseModel):
|
| """Agent-visible resource counters."""
|
|
|
| budget_used_musd: float = 0.0
|
| budget_remaining_musd: float = 100.0
|
| luminosity_used_fb: float = 0.0
|
| luminosity_remaining_fb: float = 300.0
|
| time_used_days: float = 0.0
|
| time_remaining_days: float = 365.0
|
| compute_hours_used: float = 0.0
|
|
|
|
|
| class PipelineStepRecord(BaseModel):
|
| step_index: int
|
| action_type: ActionType
|
| method: Optional[str] = None
|
| parameters: Dict[str, Any] = Field(default_factory=dict)
|
| output_summary: str = ""
|
| output_type: OutputType
|
| success: bool = True
|
| quality_score: float = 1.0
|
| cost_musd: float = 0.0
|
| luminosity_cost_fb: float = 0.0
|
| time_cost_days: float = 0.0
|
|
|
|
|
| class PaperReference(BaseModel):
|
| title: str
|
| citation: Optional[str] = None
|
| doi: Optional[str] = None
|
| arxiv_id: Optional[str] = None
|
| url: Optional[str] = None
|
|
|
|
|
| class ExpectedFinding(BaseModel):
|
| finding: str
|
| category: str = "claim"
|
| keywords: List[str] = Field(default_factory=list)
|
|
|
|
|
| class TaskSpec(BaseModel):
|
| """The physics question the agent is given for this episode."""
|
|
|
| problem_statement: str = "Discover and characterise an unknown resonance."
|
| target_collider: str = "LHC"
|
| beam_energy_options: List[str] = Field(
|
| default_factory=lambda: [e.value for e in BeamEnergy],
|
| )
|
| available_channels: List[str] = Field(
|
| default_factory=lambda: [c.value for c in DetectorChannel],
|
| )
|
| available_triggers: List[str] = Field(
|
| default_factory=lambda: [t.value for t in TriggerType],
|
| )
|
| available_tools: List[str] = Field(
|
| default_factory=lambda: list(TOOL_REGISTRY.keys()),
|
| )
|
| mass_search_window_gev: List[float] = Field(default_factory=lambda: [50.0, 1000.0])
|
| budget_limit_musd: float = 100.0
|
| luminosity_budget_fb: float = 300.0
|
| time_limit_days: float = 365.0
|
| prior_observations: List[str] = Field(default_factory=list)
|
| success_criteria: List[str] = Field(default_factory=list)
|
| paper_references: List[PaperReference] = Field(default_factory=list)
|
| expected_findings: List[ExpectedFinding] = Field(default_factory=list)
|
| difficulty: str = "medium"
|
|
|
|
|
| class DiscoveryClaim(BaseModel):
|
| """Structured final claim graded against hidden truth."""
|
|
|
| claim: str = ""
|
| mass_estimate_gev: Optional[float] = None
|
| mass_uncertainty_gev: Optional[float] = None
|
| width_estimate_gev: Optional[float] = None
|
| significance_sigma: Optional[float] = None
|
| decay_channel: Optional[str] = None
|
| spin_hypothesis: Optional[int] = None
|
| parity: Optional[str] = None
|
| cross_section_fb: Optional[float] = None
|
| confidence: float = Field(0.5, ge=0.0, le=1.0)
|
| evidence_steps: List[int] = Field(default_factory=list)
|
|
|
|
|
| class CollisionObservation(Observation):
|
| """Full observable state returned to the agent each step.
|
|
|
| Excludes the hidden particle truth and hidden detector systematics.
|
| """
|
|
|
| task: TaskSpec = Field(default_factory=TaskSpec)
|
| step_index: int = 0
|
| pipeline_history: List[PipelineStepRecord] = Field(default_factory=list)
|
| available_channels: List[str] = Field(default_factory=list)
|
| available_triggers: List[str] = Field(default_factory=list)
|
| available_tools: List[str] = Field(default_factory=list)
|
| resource_usage: ResourceUsage = Field(default_factory=ResourceUsage)
|
| latest_output: Optional[IntermediateOutput] = None
|
| all_outputs: List[IntermediateOutput] = Field(default_factory=list)
|
| candidate_masses_gev: List[float] = Field(default_factory=list)
|
| candidate_significances: List[float] = Field(default_factory=list)
|
| selected_channel: Optional[str] = None
|
| selected_beam_energy: Optional[str] = None
|
| cumulative_significance: float = 0.0
|
| uncertainty_summary: Dict[str, float] = Field(default_factory=dict)
|
| rule_violations: List[str] = Field(default_factory=list)
|
| step_reward_breakdown: Dict[str, float] = Field(default_factory=dict)
|
|
|
|
|
|
|
|
|
|
|
| AGENT_ACTION_GUIDANCE: Dict[ActionType, str] = {
|
| ActionType.CONFIGURE_BEAM: (
|
| "Pick the LHC center-of-mass energy. Higher energy reaches heavier "
|
| "resonances but costs more per fb^-1. Required before collecting data."
|
| ),
|
| ActionType.ALLOCATE_LUMINOSITY: (
|
| "Schedule a chunk of integrated luminosity (fb^-1). More luminosity "
|
| "means more events but uses budget and time. Required before collecting."
|
| ),
|
| ActionType.SET_TRIGGER: (
|
| "Choose a hardware/HLT trigger. Match the trigger to the channel of "
|
| "interest; mismatched triggers throw away signal."
|
| ),
|
| ActionType.COLLECT_COLLISIONS: (
|
| "Run the experiment. Returns a noisy raw event count plus background "
|
| "estimate, conditioned on beam, luminosity, trigger, and channel."
|
| ),
|
| ActionType.CALIBRATE_DETECTOR: (
|
| "Apply ECAL/tracker calibration. Reduces systematic uncertainty; "
|
| "neglecting it inflates fit uncertainty later."
|
| ),
|
| ActionType.RECONSTRUCT_TRACKS: (
|
| "Reconstruct charged-particle tracks and physics objects. Required "
|
| "before any analysis-level step."
|
| ),
|
| ActionType.SELECT_CHANNEL: (
|
| "Pick the decay channel to study (γγ, ℓℓ, jj, 4ℓ, bb). Wrong channel "
|
| "= small signal acceptance regardless of luminosity."
|
| ),
|
| ActionType.BUILD_INVARIANT_MASS: (
|
| "Construct the invariant-mass histogram in the chosen channel and "
|
| "mass window."
|
| ),
|
| ActionType.SUBTRACT_BACKGROUND: (
|
| "Fit a smooth background model and subtract it to expose any peak."
|
| ),
|
| ActionType.FIT_RESONANCE: (
|
| "Fit a Breit-Wigner / Crystal Ball line shape. Returns mass, width, "
|
| "and statistical uncertainty."
|
| ),
|
| ActionType.SCAN_BUMP: (
|
| "Run a sliding-window bump hunt over the mass window. Reports the "
|
| "most-significant candidate region."
|
| ),
|
| ActionType.MEASURE_ANGULAR: (
|
| "Measure decay angular distribution to constrain spin/parity. "
|
| "Useful only after a peak is identified."
|
| ),
|
| ActionType.ESTIMATE_SIGNIFICANCE: (
|
| "Compute the statistical significance of a candidate signal in σ. "
|
| "Required before claiming a discovery."
|
| ),
|
| ActionType.REQUEST_SYSTEMATICS: (
|
| "Run a systematics study (JES, luminosity, calibration). Improves "
|
| "uncertainty estimates and reduces overconfidence penalty."
|
| ),
|
| ActionType.REQUEST_THEORY_REVIEW: (
|
| "Ask a theorist sub-agent to review the evidence; small extra signal "
|
| "but not a substitute for missing data."
|
| ),
|
| ActionType.SUBMIT_DISCOVERY_CLAIM: (
|
| "Submit a structured discovery claim. Graded on mass calibration, "
|
| "significance, channel, spin hypothesis, and overconfidence."
|
| ),
|
| }
|
|
|
|
|
| AGENT_ENVIRONMENT_RULES: List[str] = [
|
| "Each successful action returns summarized evidence; do not repeat steps.",
|
| "Hard prerequisites are enforced: data collection requires beam+luminosity+trigger; "
|
| "analysis requires reconstruction and a chosen channel.",
|
| "A discovery claim requires a fitted resonance and an estimated significance.",
|
| "Tools listed in available_tools are pre-filtered for this episode; prefer them.",
|
| "Submitting an overconfident wrong claim is heavily penalised.",
|
| ]
|
|
|
|
|
| def build_agent_system_prompt() -> str:
|
| lines = [
|
| "You are an expert high-energy physicist running an analysis at the LHC.",
|
| "",
|
| "At each turn you observe the experiment state and pick one structured next step",
|
| "to maximise the probability of correctly characterising a hidden resonance.",
|
| "",
|
| "Environment rules:",
|
| ]
|
| lines.extend(f" - {rule}" for rule in AGENT_ENVIRONMENT_RULES)
|
| lines.append("")
|
| lines.append("Action guidance:")
|
| lines.extend(
|
| f" - {a.value}: {AGENT_ACTION_GUIDANCE[a]}" for a in ActionType
|
| )
|
| lines.extend([
|
| "",
|
| "Respond with ONLY a single valid JSON object, no extra prose:",
|
| '{"action_type": "...", "method": null, "parameters": {}, "justification": "...", "confidence": 0.8}',
|
| "",
|
| "For submit_discovery_claim, structure parameters['claim'] as:",
|
| '{"mass_estimate_gev": 125.0, "mass_uncertainty_gev": 0.5, "width_estimate_gev": 0.004,'
|
| ' "significance_sigma": 5.2, "decay_channel": "diphoton", "spin_hypothesis": 0,'
|
| ' "parity": "+", "cross_section_fb": 50.0, "confidence": 0.9}',
|
| ])
|
| return "\n".join(lines)
|
|
|
|
|
| def build_agent_observation_context(
|
| obs: CollisionObservation,
|
| *,
|
| max_tools: int = 6,
|
| max_channels: int = 4,
|
| ) -> str:
|
| parts: List[str] = []
|
|
|
| parts.append(
|
| f"Mass search window: [{obs.task.mass_search_window_gev[0]:.0f}, "
|
| f"{obs.task.mass_search_window_gev[1]:.0f}] GeV; "
|
| f"difficulty={obs.task.difficulty}."
|
| )
|
|
|
| chans = list(dict.fromkeys(obs.available_channels or obs.task.available_channels))
|
| if chans:
|
| parts.append("Available channels: " + ", ".join(chans[:max_channels]))
|
|
|
| tools = list(dict.fromkeys(obs.available_tools or obs.task.available_tools))
|
| if tools:
|
| parts.append("Available tools: " + ", ".join(tools[:max_tools]))
|
|
|
| if obs.selected_channel:
|
| parts.append(f"Selected channel: {obs.selected_channel}")
|
| if obs.selected_beam_energy:
|
| parts.append(f"Beam energy: {obs.selected_beam_energy}")
|
|
|
| if obs.candidate_masses_gev:
|
| masses = [f"{m:.1f}" for m in obs.candidate_masses_gev[:3]]
|
| sigmas = [f"{s:.1f}" for s in obs.candidate_significances[:3]]
|
| parts.append(
|
| "Candidate peaks (GeV / σ): "
|
| + ", ".join(f"{m}/{s}" for m, s in zip(masses, sigmas))
|
| )
|
|
|
| return "\n".join(parts)
|
|
|
|
|
| __all__ = [
|
| "ActionType",
|
| "DAQ_ACTIONS",
|
| "RECO_ACTIONS",
|
| "ANALYSIS_ACTIONS",
|
| "META_ACTIONS",
|
| "DetectorChannel",
|
| "TriggerType",
|
| "BeamEnergy",
|
| "ToolCategory",
|
| "ToolSpec",
|
| "TOOL_REGISTRY",
|
| "ExperimentAction",
|
| "OutputType",
|
| "IntermediateOutput",
|
| "ResourceUsage",
|
| "PipelineStepRecord",
|
| "PaperReference",
|
| "ExpectedFinding",
|
| "TaskSpec",
|
| "DiscoveryClaim",
|
| "CollisionObservation",
|
| "AGENT_ACTION_GUIDANCE",
|
| "AGENT_ENVIRONMENT_RULES",
|
| "build_agent_system_prompt",
|
| "build_agent_observation_context",
|
| ]
|
|
|