Spaces:
Sleeping
Sleeping
ChaitanyaRasane commited on
Commit ·
7e8d400
1
Parent(s): 34d620b
feat: OpenAI support and OpenEnv compliance fixes
Browse files- Dockerfile +4 -4
- backend/main.py +10 -2
- baseline.py +28 -17
- requirements.txt +3 -0
Dockerfile
CHANGED
|
@@ -10,11 +10,11 @@ COPY . /app
|
|
| 10 |
# Install any needed packages specified in requirements.txt
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
EXPOSE
|
| 15 |
|
| 16 |
# Environment variable for the HF token (can be overridden at runtime)
|
| 17 |
ENV HF_TOKEN=""
|
| 18 |
|
| 19 |
-
# Run
|
| 20 |
-
CMD ["
|
|
|
|
| 10 |
# Install any needed packages specified in requirements.txt
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
+
# Expose port (HF Spaces uses 7860)
|
| 14 |
+
EXPOSE 7860
|
| 15 |
|
| 16 |
# Environment variable for the HF token (can be overridden at runtime)
|
| 17 |
ENV HF_TOKEN=""
|
| 18 |
|
| 19 |
+
# Run the FastAPI server
|
| 20 |
+
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
backend/main.py
CHANGED
|
@@ -106,7 +106,13 @@ async def reset_env():
|
|
| 106 |
global current_obs, episode_done
|
| 107 |
current_obs = env.reset()
|
| 108 |
episode_done = False
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
@app.post("/step")
|
|
@@ -126,8 +132,10 @@ async def step_env(req: StepRequest):
|
|
| 126 |
current_obs = obs
|
| 127 |
episode_done = done
|
| 128 |
|
|
|
|
| 129 |
return {
|
| 130 |
-
"observation":
|
|
|
|
| 131 |
"reward": round(reward, 4),
|
| 132 |
"done": done,
|
| 133 |
"info": {
|
|
|
|
| 106 |
global current_obs, episode_done
|
| 107 |
current_obs = env.reset()
|
| 108 |
episode_done = False
|
| 109 |
+
obs_data = obs_to_dict(current_obs)
|
| 110 |
+
return {
|
| 111 |
+
"observation": obs_data,
|
| 112 |
+
"state": obs_data, # OpenEnv compliance
|
| 113 |
+
"done": False,
|
| 114 |
+
"info": {"step_count": 0, "progress": 0.0}
|
| 115 |
+
}
|
| 116 |
|
| 117 |
|
| 118 |
@app.post("/step")
|
|
|
|
| 132 |
current_obs = obs
|
| 133 |
episode_done = done
|
| 134 |
|
| 135 |
+
obs_data = obs_to_dict(obs)
|
| 136 |
return {
|
| 137 |
+
"observation": obs_data,
|
| 138 |
+
"state": obs_data, # OpenEnv compliance
|
| 139 |
"reward": round(reward, 4),
|
| 140 |
"done": done,
|
| 141 |
"info": {
|
baseline.py
CHANGED
|
@@ -49,7 +49,7 @@ def heuristic_policy(obs: Observation) -> Action:
|
|
| 49 |
return Action(type="noop")
|
| 50 |
|
| 51 |
|
| 52 |
-
def llm_policy(client: OpenAI, obs: Observation) -> Action:
|
| 53 |
state_desc = (
|
| 54 |
f"Device: {obs.device}\n"
|
| 55 |
f"Button Size: {obs.layout.button_size:.2f}\n"
|
|
@@ -71,7 +71,7 @@ def llm_policy(client: OpenAI, obs: Observation) -> Action:
|
|
| 71 |
for attempt in range(max_retries + 1):
|
| 72 |
try:
|
| 73 |
response = client.chat.completions.create(
|
| 74 |
-
model=
|
| 75 |
messages=[
|
| 76 |
{"role": "system", "content": "You are a UI optimization agent."},
|
| 77 |
{"role": "user", "content": prompt},
|
|
@@ -112,15 +112,15 @@ def llm_policy(client: OpenAI, obs: Observation) -> Action:
|
|
| 112 |
return Action(type="noop")
|
| 113 |
|
| 114 |
|
| 115 |
-
def agent_policy(client: OpenAI, obs: Observation) -> Action:
|
| 116 |
heuristic_action = heuristic_policy(obs)
|
| 117 |
if heuristic_action.type != "noop":
|
| 118 |
return heuristic_action
|
| 119 |
else:
|
| 120 |
-
return llm_policy(client, obs)
|
| 121 |
|
| 122 |
|
| 123 |
-
def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
|
| 124 |
obs = env.reset()
|
| 125 |
total_reward = 0.0
|
| 126 |
done = False
|
|
@@ -128,7 +128,7 @@ def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
|
|
| 128 |
steps = 0
|
| 129 |
|
| 130 |
while not done and steps < MAX_STEPS:
|
| 131 |
-
action = agent_policy(client, obs)
|
| 132 |
obs, reward, done, info = env.step(action)
|
| 133 |
total_reward += reward
|
| 134 |
steps += 1
|
|
@@ -136,7 +136,7 @@ def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
|
|
| 136 |
if info.get("outcome") == "complete":
|
| 137 |
completed = True
|
| 138 |
|
| 139 |
-
time.sleep(
|
| 140 |
|
| 141 |
if DEBUG:
|
| 142 |
print(f" step={steps} action={action.type} reward={reward:+.3f} outcome={info.get('outcome')}")
|
|
@@ -144,14 +144,14 @@ def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
|
|
| 144 |
return total_reward, completed
|
| 145 |
|
| 146 |
|
| 147 |
-
def evaluate_task(task: str, client: OpenAI, n_episodes: int = 1) -> Tuple[float, float, float]:
|
| 148 |
total_rewards = 0.0
|
| 149 |
completions = 0
|
| 150 |
|
| 151 |
for ep in range(n_episodes):
|
| 152 |
env = load_env(task)
|
| 153 |
|
| 154 |
-
reward, completed = run_episode(env, client)
|
| 155 |
total_rewards += reward
|
| 156 |
if completed:
|
| 157 |
completions += 1
|
|
@@ -167,24 +167,35 @@ def evaluate_task(task: str, client: OpenAI, n_episodes: int = 1) -> Tuple[float
|
|
| 167 |
|
| 168 |
|
| 169 |
def main():
|
|
|
|
| 170 |
hf_token = os.getenv("HF_TOKEN")
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
return
|
| 174 |
|
| 175 |
-
client = OpenAI(
|
| 176 |
-
base_url="https://router.huggingface.co/v1",
|
| 177 |
-
api_key=os.getenv("HF_TOKEN")
|
| 178 |
-
)
|
| 179 |
tasks = ["easy", "medium", "hard"]
|
| 180 |
|
| 181 |
print("=" * 50)
|
| 182 |
-
print(" UIEnv Baseline Evaluation (
|
| 183 |
print("=" * 50)
|
| 184 |
|
| 185 |
for task in tasks:
|
| 186 |
print(f"\n> Evaluating task: {task}...")
|
| 187 |
-
avg_reward, completion_rate, score = evaluate_task(task, client)
|
| 188 |
print(f"\nTask: {task}")
|
| 189 |
print(f" Avg Reward: {avg_reward:.4f}")
|
| 190 |
print(f" Completion Rate: {completion_rate:.4f}")
|
|
|
|
| 49 |
return Action(type="noop")
|
| 50 |
|
| 51 |
|
| 52 |
+
def llm_policy(client: OpenAI, obs: Observation, model_name: str) -> Action:
|
| 53 |
state_desc = (
|
| 54 |
f"Device: {obs.device}\n"
|
| 55 |
f"Button Size: {obs.layout.button_size:.2f}\n"
|
|
|
|
| 71 |
for attempt in range(max_retries + 1):
|
| 72 |
try:
|
| 73 |
response = client.chat.completions.create(
|
| 74 |
+
model=model_name,
|
| 75 |
messages=[
|
| 76 |
{"role": "system", "content": "You are a UI optimization agent."},
|
| 77 |
{"role": "user", "content": prompt},
|
|
|
|
| 112 |
return Action(type="noop")
|
| 113 |
|
| 114 |
|
| 115 |
+
def agent_policy(client: OpenAI, obs: Observation, model_name: str) -> Action:
|
| 116 |
heuristic_action = heuristic_policy(obs)
|
| 117 |
if heuristic_action.type != "noop":
|
| 118 |
return heuristic_action
|
| 119 |
else:
|
| 120 |
+
return llm_policy(client, obs, model_name)
|
| 121 |
|
| 122 |
|
| 123 |
+
def run_episode(env: UIEnv, client: OpenAI, model_name: str) -> Tuple[float, bool]:
|
| 124 |
obs = env.reset()
|
| 125 |
total_reward = 0.0
|
| 126 |
done = False
|
|
|
|
| 128 |
steps = 0
|
| 129 |
|
| 130 |
while not done and steps < MAX_STEPS:
|
| 131 |
+
action = agent_policy(client, obs, model_name)
|
| 132 |
obs, reward, done, info = env.step(action)
|
| 133 |
total_reward += reward
|
| 134 |
steps += 1
|
|
|
|
| 136 |
if info.get("outcome") == "complete":
|
| 137 |
completed = True
|
| 138 |
|
| 139 |
+
time.sleep(1) # Reduced for HF readiness
|
| 140 |
|
| 141 |
if DEBUG:
|
| 142 |
print(f" step={steps} action={action.type} reward={reward:+.3f} outcome={info.get('outcome')}")
|
|
|
|
| 144 |
return total_reward, completed
|
| 145 |
|
| 146 |
|
| 147 |
+
def evaluate_task(task: str, client: OpenAI, model_name: str, n_episodes: int = 1) -> Tuple[float, float, float]:
|
| 148 |
total_rewards = 0.0
|
| 149 |
completions = 0
|
| 150 |
|
| 151 |
for ep in range(n_episodes):
|
| 152 |
env = load_env(task)
|
| 153 |
|
| 154 |
+
reward, completed = run_episode(env, client, model_name)
|
| 155 |
total_rewards += reward
|
| 156 |
if completed:
|
| 157 |
completions += 1
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
def main():
|
| 170 |
+
openai_key = os.getenv("OPENAI_API_KEY")
|
| 171 |
hf_token = os.getenv("HF_TOKEN")
|
| 172 |
+
|
| 173 |
+
if openai_key:
|
| 174 |
+
print("Using OpenAI API...")
|
| 175 |
+
client = OpenAI(api_key=openai_key)
|
| 176 |
+
model_name = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
|
| 177 |
+
provider_name = f"OpenAI ({model_name})"
|
| 178 |
+
elif hf_token:
|
| 179 |
+
print("Using Hugging Face Router...")
|
| 180 |
+
client = OpenAI(
|
| 181 |
+
base_url="https://router.huggingface.co/v1",
|
| 182 |
+
api_key=hf_token
|
| 183 |
+
)
|
| 184 |
+
model_name = "katanemo/Arch-Router-1.5B"
|
| 185 |
+
provider_name = f"HF Router ({model_name})"
|
| 186 |
+
else:
|
| 187 |
+
print("Error: Neither OPENAI_API_KEY nor HF_TOKEN environment variable set.")
|
| 188 |
return
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
tasks = ["easy", "medium", "hard"]
|
| 191 |
|
| 192 |
print("=" * 50)
|
| 193 |
+
print(f" UIEnv Baseline Evaluation ({provider_name})")
|
| 194 |
print("=" * 50)
|
| 195 |
|
| 196 |
for task in tasks:
|
| 197 |
print(f"\n> Evaluating task: {task}...")
|
| 198 |
+
avg_reward, completion_rate, score = evaluate_task(task, client, model_name)
|
| 199 |
print(f"\nTask: {task}")
|
| 200 |
print(f" Avg Reward: {avg_reward:.4f}")
|
| 201 |
print(f" Completion Rate: {completion_rate:.4f}")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
openai
|
| 2 |
pydantic
|
| 3 |
numpy
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
openai
|
| 2 |
pydantic
|
| 3 |
numpy
|
| 4 |
+
fastapi
|
| 5 |
+
uvicorn
|
| 6 |
+
python-multipart
|