meta-rl-dsa-solver / models.py
s-shah4
Add V1 env
4433dc8
from __future__ import annotations
from typing import Any
from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field
class AdaptAction(Action):
code: str = Field(..., min_length=1, description="Python code to execute.")
class AdaptObservation(Observation):
problem_id: str = Field(default="", description="Current problem identifier.")
difficulty: str = Field(default="", description="Current curriculum difficulty tier.")
problem: str = Field(default="", description="Problem statement shown to the agent.")
input_format: str = Field(default="", description="Expected stdin format.")
constraints: str = Field(default="", description="Problem constraints.")
examples: list[dict[str, str]] = Field(default_factory=list)
visible_tests: list[dict[str, str]] = Field(default_factory=list)
feedback: str = Field(default="", description="Human-readable execution feedback.")
pass_rate: float = Field(default=0.0, ge=0.0, le=1.0)
visible_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0)
hidden_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0)
syntax_valid: bool = Field(default=True)
execution_status: str = Field(default="not_run")
timeout_count: int = Field(default=0, ge=0)
runtime_error_count: int = Field(default=0, ge=0)
format_compliance: float = Field(default=0.0, ge=0.0, le=1.0)
reward_components: dict[str, float] = Field(default_factory=dict)
class AdaptState(State):
problem_id: str = Field(default="")
difficulty: str = Field(default="")
last_reward: float = Field(default=0.0)
last_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0)
last_feedback: str = Field(default="")
recent_metrics: dict[str, Any] = Field(default_factory=dict)