hiitsesh's picture
Initialize GPUClusterEnv boilerplate as per OpenEnv requirements
4e608c3
from fastapi import FastAPI, HTTPException
from src.models import Action, TaskConfig
from src.env import GPUClusterEnv
from src.tasks import TASKS
import subprocess
app = FastAPI(title="GPU Cluster OpenEnv")
env = GPUClusterEnv()
@app.get("/")
def health_check():
return {"status": "ok", "message": "GPUClusterEnv is running"}
@app.post("/reset")
def reset_env(task_id: str = "easy"):
if task_id not in TASKS:
raise HTTPException(status_code=404, detail="Task not found")
obs = env.reset(TASKS[task_id])
return {"observation": obs.dict()}
@app.post("/step")
def step_env(action: Action):
try:
result = env.step(action)
return result.dict()
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/state")
def get_state():
if env.state is None:
raise HTTPException(status_code=400, detail="Environment not initialized")
return {"observation": env.state.dict()}
@app.get("/tasks")
def list_tasks():
return {
"tasks": list(TASKS.keys()),
"action_schema": Action.schema()
}
@app.get("/grader")
def grader():
# Normalizes total reward to a 0.0 - 1.0 score based on max possible baseline
if env.state is None:
return {"score": 0.0}
max_expected_reward = env.config.max_steps * 10 # Arbitrary max for example
score = max(0.0, min(1.0, env.total_reward / max_expected_reward))
return {"score": score}
@app.post("/baseline")
def run_baseline():
# Trigger the baseline script and return results
result = subprocess.run(["python", "src/baseline.py"], capture_output=True, text=True)
return {"output": result.stdout}