amaljoe88's picture
deploy: sync 712e5bc -> HF
cf6c0e0
"""HTTPEnvClient for the VisionCoder OpenEnv environment."""
from __future__ import annotations
import base64
import io
from typing import Optional
import httpx
from PIL import Image
from openenv.models import Action, Observation, RenderRequest, RenderResponse, State
class VisionCoderClient:
"""Synchronous HTTP client for the VisionCoder OpenEnv server."""
def __init__(self, base_url: str = "http://localhost:8080", timeout: float = 120.0):
self._base_url = base_url.rstrip("/")
self._client = httpx.Client(base_url=self._base_url, timeout=timeout)
# ------------------------------------------------------------------
# Core OpenEnv interface
# ------------------------------------------------------------------
def reset(self, difficulty: str = "mixed") -> Observation:
"""Start a new episode. Returns Observation with session_id and reference screenshot."""
resp = self._client.post("/reset", params={"difficulty": difficulty})
resp.raise_for_status()
return Observation(**resp.json())
def step(self, action: Action) -> Observation:
"""Submit HTML. Returns reward, render_low, render_full, done."""
resp = self._client.post("/step", json=action.model_dump())
resp.raise_for_status()
return Observation(**resp.json())
def render(self, html: str) -> RenderResponse:
"""Render HTML to images without scoring (Developer tool call)."""
resp = self._client.post("/render", json=RenderRequest(html=html).model_dump())
resp.raise_for_status()
return RenderResponse(**resp.json())
def state(self) -> State:
"""Return current episode metadata from the server."""
resp = self._client.get("/state")
resp.raise_for_status()
return State(**resp.json())
def close(self) -> None:
"""Signal session end and release HTTP resources."""
try:
self._client.delete("/close")
finally:
self._client.close()
# ------------------------------------------------------------------
# Convenience helpers
# ------------------------------------------------------------------
def decode_image(self, b64: Optional[str]) -> Optional[Image.Image]:
"""Decode a base64 PNG string into a PIL Image."""
if b64 is None:
return None
return Image.open(io.BytesIO(base64.b64decode(b64))).convert("RGB")
def decode_screenshot(self, obs: Observation) -> Optional[Image.Image]:
"""Decode the reference screenshot from a reset() Observation."""
return self.decode_image(obs.screenshot_b64)
# ------------------------------------------------------------------
# Context manager support
# ------------------------------------------------------------------
def __enter__(self) -> VisionCoderClient:
return self
def __exit__(self, *args: object) -> None:
self.close()