UI-layout-optimizer / inference.py
Kolaps27's picture
chore: final code refactoring for portability and robustness
0e5f237
import os
import argparse
from typing import List, Optional
from env import UIEnv, Observation, Action, clamp_score
# Required Environment Variables
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
BENCHMARK = os.getenv("BENCHMARK", "ui_layout_optimizer")
def log_start(task: str, env: str, model: str) -> None:
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
error_val = error if error else "null"
done_val = str(done).lower()
print(
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
flush=True,
)
def log_end(success: bool, steps: int, rewards: List[float]) -> None:
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
success_val = str(success).lower()
print(f"[END] success={success_val} steps={steps} rewards={rewards_str}", flush=True)
def run_inference(task_id: str = "easy") -> None:
"""
Standard OpenEnv inference entry point.
Evaluates agent performance on concrete objectives.
"""
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
# 1. Setup Environment
env = UIEnv(seed=42, task=task_id)
obs = env.reset()
# 2. Setup Client
client = None
if API_KEY:
from openai import OpenAI
client = OpenAI(
base_url=API_BASE_URL,
api_key=API_KEY
)
done = False
step_count = 0
total_reward = 0.0
completed = False
rewards: List[float] = []
while not done:
step_count += 1
# 3. Perform Inference Step
if client:
from baseline import agent_policy
action = agent_policy(client, obs, MODEL_NAME)
else:
# Fallback to heuristic if no API key is provided
from baseline import heuristic_policy
action = heuristic_policy(obs)
# Format action for logging
action_str = action.type
if action.value is not None:
action_str += f"({action.value})"
# 4. Step Environment
obs, reward, done, info = env.step(action)
rewards.append(reward)
total_reward += reward
error = info.get("error") # Can be parsed dynamically if environment fails on step internally
log_step(step=step_count, action=action_str, reward=reward, done=done, error=error)
if info.get("outcome") == "complete" or info.get("completed") is True:
completed = True
# 5. Retrieve agent performance score from evaluator
task_obj = env.task_dict[task_id]
score = task_obj.grader()
# Enforce strict (0,1) bound
score = clamp_score(score)
log_end(success=completed, steps=step_count, rewards=rewards)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run UIEnv Inference")
default_task = os.getenv("TASK", os.getenv("MY_ENV_TASK", "easy"))
parser.add_argument("--task", type=str, default=default_task, help="Task difficulty (easy, medium, hard)")
args = parser.parse_args()
run_inference(task_id=args.task)