atin5551's picture
Deploy Varaha OpenEnv Docker Space
cb70a7d
"""Pydantic models for the Varaha OpenEnv environment."""
from typing import Any, Dict, List, Optional
from pydantic import Field
from openenv.core.env_server.types import Action, Observation, State
class VarahaAction(Action):
"""Drone acceleration command with automatic delivery/recharge."""
ax: float = Field(0.0, description="Desired acceleration along x-axis (m/s^2)")
ay: float = Field(0.0, description="Desired acceleration along y-axis (m/s^2)")
az: float = Field(0.0, description="Desired acceleration along z-axis (m/s^2)")
deliver: bool = Field(True, description="Attempt delivery when near a target")
recharge: bool = Field(True, description="Attempt recharge when near base station")
tool_call: str = Field(
"",
description="Optional tool call: request_intel[:target_id] | battery_forecast | mission_report",
)
class VarahaObservation(Observation):
"""Full observation returned after each step/reset."""
drone_position: Dict[str, float] = Field(
default_factory=dict, description="Drone {x, y, z} in local metres"
)
drone_velocity: Dict[str, float] = Field(
default_factory=dict, description="Drone velocity {x, y, z} in m/s"
)
battery: float = Field(0.0, description="Remaining battery units")
carrying_payload: bool = Field(True, description="Whether the drone still carries payload")
alive: bool = Field(True, description="Whether the drone is still operational")
targets: List[Dict[str, Any]] = Field(
default_factory=list,
description="Per-target relative position, urgency, delivered status",
)
hazards: List[Dict[str, Any]] = Field(
default_factory=list,
description="Per-hazard relative position, current height, severity",
)
step_num: int = Field(0, description="Current step number in the episode")
max_steps: int = Field(2000, description="Maximum allowed steps")
reward_breakdown: Dict[str, float] = Field(
default_factory=dict, description="Itemised reward components from the last step"
)
mission: Dict[str, Any] = Field(
default_factory=dict,
description="Instruction-mode progress, next instruction, and violation counters",
)
last_tool_result: Dict[str, Any] = Field(
default_factory=dict,
description="Result payload from the most recent tool call",
)
success: bool = Field(False, description="Whether the mission is successfully completed")
trace: Optional[Dict[str, Any]] = Field(
None, description="Full episode trace (only populated on the final step)"
)
class VarahaState(State):
"""Internal environment state exposed via the state property."""
cumulative_reward: float = Field(0.0, description="Total accumulated reward")
deliveries_completed: int = Field(0, description="Number of targets delivered so far")
total_targets: int = Field(0, description="Total number of targets in the episode")
battery: float = Field(0.0, description="Current battery level")
success: bool = Field(False, description="Whether the mission is complete")