ChaitanyaRasane commited on
Commit
7e8d400
·
1 Parent(s): 34d620b

feat: OpenAI support and OpenEnv compliance fixes

Browse files
Files changed (4) hide show
  1. Dockerfile +4 -4
  2. backend/main.py +10 -2
  3. baseline.py +28 -17
  4. requirements.txt +3 -0
Dockerfile CHANGED
@@ -10,11 +10,11 @@ COPY . /app
10
  # Install any needed packages specified in requirements.txt
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Make port 80 available to the world outside this container
14
- EXPOSE 80
15
 
16
  # Environment variable for the HF token (can be overridden at runtime)
17
  ENV HF_TOKEN=""
18
 
19
- # Run baseline.py when the container launches
20
- CMD ["python", "baseline.py"]
 
10
  # Install any needed packages specified in requirements.txt
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Expose port (HF Spaces uses 7860)
14
+ EXPOSE 7860
15
 
16
  # Environment variable for the HF token (can be overridden at runtime)
17
  ENV HF_TOKEN=""
18
 
19
+ # Run the FastAPI server
20
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
backend/main.py CHANGED
@@ -106,7 +106,13 @@ async def reset_env():
106
  global current_obs, episode_done
107
  current_obs = env.reset()
108
  episode_done = False
109
- return {"observation": obs_to_dict(current_obs), "done": False}
 
 
 
 
 
 
110
 
111
 
112
  @app.post("/step")
@@ -126,8 +132,10 @@ async def step_env(req: StepRequest):
126
  current_obs = obs
127
  episode_done = done
128
 
 
129
  return {
130
- "observation": obs_to_dict(obs),
 
131
  "reward": round(reward, 4),
132
  "done": done,
133
  "info": {
 
106
  global current_obs, episode_done
107
  current_obs = env.reset()
108
  episode_done = False
109
+ obs_data = obs_to_dict(current_obs)
110
+ return {
111
+ "observation": obs_data,
112
+ "state": obs_data, # OpenEnv compliance
113
+ "done": False,
114
+ "info": {"step_count": 0, "progress": 0.0}
115
+ }
116
 
117
 
118
  @app.post("/step")
 
132
  current_obs = obs
133
  episode_done = done
134
 
135
+ obs_data = obs_to_dict(obs)
136
  return {
137
+ "observation": obs_data,
138
+ "state": obs_data, # OpenEnv compliance
139
  "reward": round(reward, 4),
140
  "done": done,
141
  "info": {
baseline.py CHANGED
@@ -49,7 +49,7 @@ def heuristic_policy(obs: Observation) -> Action:
49
  return Action(type="noop")
50
 
51
 
52
- def llm_policy(client: OpenAI, obs: Observation) -> Action:
53
  state_desc = (
54
  f"Device: {obs.device}\n"
55
  f"Button Size: {obs.layout.button_size:.2f}\n"
@@ -71,7 +71,7 @@ def llm_policy(client: OpenAI, obs: Observation) -> Action:
71
  for attempt in range(max_retries + 1):
72
  try:
73
  response = client.chat.completions.create(
74
- model="katanemo/Arch-Router-1.5B",
75
  messages=[
76
  {"role": "system", "content": "You are a UI optimization agent."},
77
  {"role": "user", "content": prompt},
@@ -112,15 +112,15 @@ def llm_policy(client: OpenAI, obs: Observation) -> Action:
112
  return Action(type="noop")
113
 
114
 
115
- def agent_policy(client: OpenAI, obs: Observation) -> Action:
116
  heuristic_action = heuristic_policy(obs)
117
  if heuristic_action.type != "noop":
118
  return heuristic_action
119
  else:
120
- return llm_policy(client, obs)
121
 
122
 
123
- def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
124
  obs = env.reset()
125
  total_reward = 0.0
126
  done = False
@@ -128,7 +128,7 @@ def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
128
  steps = 0
129
 
130
  while not done and steps < MAX_STEPS:
131
- action = agent_policy(client, obs)
132
  obs, reward, done, info = env.step(action)
133
  total_reward += reward
134
  steps += 1
@@ -136,7 +136,7 @@ def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
136
  if info.get("outcome") == "complete":
137
  completed = True
138
 
139
- time.sleep(5)
140
 
141
  if DEBUG:
142
  print(f" step={steps} action={action.type} reward={reward:+.3f} outcome={info.get('outcome')}")
@@ -144,14 +144,14 @@ def run_episode(env: UIEnv, client: OpenAI) -> Tuple[float, bool]:
144
  return total_reward, completed
145
 
146
 
147
- def evaluate_task(task: str, client: OpenAI, n_episodes: int = 1) -> Tuple[float, float, float]:
148
  total_rewards = 0.0
149
  completions = 0
150
 
151
  for ep in range(n_episodes):
152
  env = load_env(task)
153
 
154
- reward, completed = run_episode(env, client)
155
  total_rewards += reward
156
  if completed:
157
  completions += 1
@@ -167,24 +167,35 @@ def evaluate_task(task: str, client: OpenAI, n_episodes: int = 1) -> Tuple[float
167
 
168
 
169
  def main():
 
170
  hf_token = os.getenv("HF_TOKEN")
171
- if not hf_token:
172
- print("Error: HF_TOKEN environment variable not set.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  return
174
 
175
- client = OpenAI(
176
- base_url="https://router.huggingface.co/v1",
177
- api_key=os.getenv("HF_TOKEN")
178
- )
179
  tasks = ["easy", "medium", "hard"]
180
 
181
  print("=" * 50)
182
- print(" UIEnv Baseline Evaluation (Hugging Face Router)")
183
  print("=" * 50)
184
 
185
  for task in tasks:
186
  print(f"\n> Evaluating task: {task}...")
187
- avg_reward, completion_rate, score = evaluate_task(task, client)
188
  print(f"\nTask: {task}")
189
  print(f" Avg Reward: {avg_reward:.4f}")
190
  print(f" Completion Rate: {completion_rate:.4f}")
 
49
  return Action(type="noop")
50
 
51
 
52
+ def llm_policy(client: OpenAI, obs: Observation, model_name: str) -> Action:
53
  state_desc = (
54
  f"Device: {obs.device}\n"
55
  f"Button Size: {obs.layout.button_size:.2f}\n"
 
71
  for attempt in range(max_retries + 1):
72
  try:
73
  response = client.chat.completions.create(
74
+ model=model_name,
75
  messages=[
76
  {"role": "system", "content": "You are a UI optimization agent."},
77
  {"role": "user", "content": prompt},
 
112
  return Action(type="noop")
113
 
114
 
115
+ def agent_policy(client: OpenAI, obs: Observation, model_name: str) -> Action:
116
  heuristic_action = heuristic_policy(obs)
117
  if heuristic_action.type != "noop":
118
  return heuristic_action
119
  else:
120
+ return llm_policy(client, obs, model_name)
121
 
122
 
123
+ def run_episode(env: UIEnv, client: OpenAI, model_name: str) -> Tuple[float, bool]:
124
  obs = env.reset()
125
  total_reward = 0.0
126
  done = False
 
128
  steps = 0
129
 
130
  while not done and steps < MAX_STEPS:
131
+ action = agent_policy(client, obs, model_name)
132
  obs, reward, done, info = env.step(action)
133
  total_reward += reward
134
  steps += 1
 
136
  if info.get("outcome") == "complete":
137
  completed = True
138
 
139
+ time.sleep(1) # Reduced for HF readiness
140
 
141
  if DEBUG:
142
  print(f" step={steps} action={action.type} reward={reward:+.3f} outcome={info.get('outcome')}")
 
144
  return total_reward, completed
145
 
146
 
147
+ def evaluate_task(task: str, client: OpenAI, model_name: str, n_episodes: int = 1) -> Tuple[float, float, float]:
148
  total_rewards = 0.0
149
  completions = 0
150
 
151
  for ep in range(n_episodes):
152
  env = load_env(task)
153
 
154
+ reward, completed = run_episode(env, client, model_name)
155
  total_rewards += reward
156
  if completed:
157
  completions += 1
 
167
 
168
 
169
  def main():
170
+ openai_key = os.getenv("OPENAI_API_KEY")
171
  hf_token = os.getenv("HF_TOKEN")
172
+
173
+ if openai_key:
174
+ print("Using OpenAI API...")
175
+ client = OpenAI(api_key=openai_key)
176
+ model_name = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
177
+ provider_name = f"OpenAI ({model_name})"
178
+ elif hf_token:
179
+ print("Using Hugging Face Router...")
180
+ client = OpenAI(
181
+ base_url="https://router.huggingface.co/v1",
182
+ api_key=hf_token
183
+ )
184
+ model_name = "katanemo/Arch-Router-1.5B"
185
+ provider_name = f"HF Router ({model_name})"
186
+ else:
187
+ print("Error: Neither OPENAI_API_KEY nor HF_TOKEN environment variable set.")
188
  return
189
 
 
 
 
 
190
  tasks = ["easy", "medium", "hard"]
191
 
192
  print("=" * 50)
193
+ print(f" UIEnv Baseline Evaluation ({provider_name})")
194
  print("=" * 50)
195
 
196
  for task in tasks:
197
  print(f"\n> Evaluating task: {task}...")
198
+ avg_reward, completion_rate, score = evaluate_task(task, client, model_name)
199
  print(f"\nTask: {task}")
200
  print(f" Avg Reward: {avg_reward:.4f}")
201
  print(f" Completion Rate: {completion_rate:.4f}")
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  openai
2
  pydantic
3
  numpy
 
 
 
 
1
  openai
2
  pydantic
3
  numpy
4
+ fastapi
5
+ uvicorn
6
+ python-multipart