NeerajCodz commited on
Commit
8341a89
·
1 Parent(s): 5b2dac6

feat: add OpenEnv-compliant root inference runner

Browse files

- Added root inference.py using OpenAI client with API_BASE_URL and MODEL_NAME defaults\n- Enforced HF_TOKEN as mandatory without default\n- Implemented strict [START]/[STEP]/[END] stdout formatting and guaranteed [END] emission\n- Added runtime adapter auto-detect (OpenEnv SDK first, ScrapeRL episode API fallback)\n- Updated .env.example and README with inference configuration and usage\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Files changed (3) hide show
  1. .env.example +4 -0
  2. README.md +26 -0
  3. inference.py +539 -0
.env.example CHANGED
@@ -8,6 +8,10 @@ NVIDIA_API_KEY=
8
  # HuggingFace
9
  HF_TOKEN=
10
 
 
 
 
 
11
  # App Settings
12
  DEBUG=false
13
  LOG_LEVEL=INFO
 
8
  # HuggingFace
9
  HF_TOKEN=
10
 
11
+ # OpenEnv inference.py (required for hackathon submission)
12
+ API_BASE_URL=https://api.openai.com/v1
13
+ MODEL_NAME=gpt-4.1-mini
14
+
15
  # App Settings
16
  DEBUG=false
17
  LOG_LEVEL=INFO
README.md CHANGED
@@ -89,6 +89,32 @@ npm run dev
89
 
90
  Frontend will be at **http://localhost:5173**
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  ## 📡 API Endpoints
93
 
94
  ### Core Endpoints
 
89
 
90
  Frontend will be at **http://localhost:5173**
91
 
92
+ ## 🧪 OpenEnv Hackathon Inference Script
93
+
94
+ This repository now includes a root-level **`inference.py`** for OpenEnv-style evaluation.
95
+
96
+ ### Required environment variables
97
+ - `API_BASE_URL` (defaulted in script)
98
+ - `MODEL_NAME` (defaulted in script)
99
+ - `HF_TOKEN` (**required**, no default)
100
+
101
+ ### Run
102
+ ```bash
103
+ python inference.py --task task_001 --benchmark openenv
104
+ ```
105
+
106
+ ### Output contract
107
+ `inference.py` emits strict structured stdout lines:
108
+ ```text
109
+ [START] task=<task_name> env=<benchmark> model=<model_name>
110
+ [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
111
+ [END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
112
+ ```
113
+
114
+ Notes:
115
+ - OpenAI client (`from openai import OpenAI`) is used as the default LLM caller.
116
+ - The script attempts OpenEnv SDK runtime first and falls back to `/api/episode/reset` + `/api/episode/step`.
117
+
118
  ## 📡 API Endpoints
119
 
120
  ### Core Endpoints
inference.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import ast
5
+ import json
6
+ import os
7
+ import sys
8
+ from dataclasses import dataclass
9
+ from typing import Any, Protocol
10
+ from urllib import error as url_error
11
+ from urllib import request as url_request
12
+
13
+ from openai import OpenAI
14
+
15
+
16
+ # Required hackathon configuration variables
17
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
18
+ MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
19
+ HF_TOKEN = os.getenv("HF_TOKEN")
20
+
21
+ # Optional runtime variables for local/OpenEnv execution
22
+ ENV_API_BASE_URL = os.getenv("ENV_API_BASE_URL", "http://localhost:8000/api")
23
+ TASK_NAME_DEFAULT = os.getenv("TASK_NAME", "task_001")
24
+ BENCHMARK_DEFAULT = os.getenv("BENCHMARK", "openenv")
25
+ MAX_STEPS_DEFAULT = int(os.getenv("MAX_STEPS", "12"))
26
+ EPISODE_SEED_DEFAULT = int(os.getenv("EPISODE_SEED", "42"))
27
+ LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.0"))
28
+ PROMPT_HTML_LIMIT = int(os.getenv("PROMPT_HTML_LIMIT", "5000"))
29
+ REQUEST_TIMEOUT_SECONDS = float(os.getenv("REQUEST_TIMEOUT_SECONDS", "30"))
30
+ USE_OPENENV_SDK = os.getenv("USE_OPENENV_SDK", "true").lower() in {"1", "true", "yes", "on"}
31
+
32
+
33
+ @dataclass
34
+ class StepOutcome:
35
+ observation: dict[str, Any]
36
+ reward: float
37
+ terminated: bool
38
+ truncated: bool
39
+ info: dict[str, Any]
40
+
41
+ @property
42
+ def done(self) -> bool:
43
+ return self.terminated or self.truncated
44
+
45
+
46
+ class EpisodeAdapter(Protocol):
47
+ def reset(self, task_name: str, seed: int) -> tuple[dict[str, Any], dict[str, Any]]:
48
+ ...
49
+
50
+ def step(self, action: dict[str, Any]) -> StepOutcome:
51
+ ...
52
+
53
+ def close(self) -> None:
54
+ ...
55
+
56
+
57
+ def _bool_text(value: bool) -> str:
58
+ return "true" if value else "false"
59
+
60
+
61
+ def _reward_text(value: float) -> str:
62
+ return f"{float(value):.2f}"
63
+
64
+
65
+ def _error_text(value: Any) -> str:
66
+ if value is None:
67
+ return "null"
68
+ text = str(value).replace("\r", " ").replace("\n", " ").strip()
69
+ return text if text else "null"
70
+
71
+
72
+ def _truncate(value: Any, limit: int = 500) -> str:
73
+ text = str(value)
74
+ if len(text) <= limit:
75
+ return text
76
+ return f"{text[: limit - 3]}..."
77
+
78
+
79
+ def _emit_start(task_name: str, benchmark: str, model_name: str) -> None:
80
+ print(f"[START] task={task_name} env={benchmark} model={model_name}", flush=True)
81
+
82
+
83
+ def _emit_step(step_number: int, action: str, reward: float, done: bool, error_value: Any) -> None:
84
+ print(
85
+ f"[STEP] step={step_number} action={action} reward={_reward_text(reward)} "
86
+ f"done={_bool_text(done)} error={_error_text(error_value)}",
87
+ flush=True,
88
+ )
89
+
90
+
91
+ def _emit_end(success: bool, steps: int, rewards: list[float]) -> None:
92
+ rewards_text = ",".join(_reward_text(reward) for reward in rewards)
93
+ print(f"[END] success={_bool_text(success)} steps={steps} rewards={rewards_text}", flush=True)
94
+
95
+
96
+ def _action_to_log_string(action: dict[str, Any]) -> str:
97
+ action_type = str(action.get("action_type", "wait"))
98
+ parameters = action.get("parameters")
99
+ if not isinstance(parameters, dict):
100
+ parameters = {}
101
+ params_json = json.dumps(parameters, ensure_ascii=False, separators=(",", ":"))
102
+ return f"{action_type}({params_json})"
103
+
104
+
105
+ def _strip_code_fences(text: str) -> str:
106
+ content = text.strip()
107
+ if content.startswith("```"):
108
+ lines = content.splitlines()
109
+ if lines and lines[0].startswith("```"):
110
+ lines = lines[1:]
111
+ if lines and lines[-1].strip() == "```":
112
+ lines = lines[:-1]
113
+ content = "\n".join(lines).strip()
114
+ return content
115
+
116
+
117
+ def _extract_json_object(text: str) -> dict[str, Any] | None:
118
+ content = _strip_code_fences(text)
119
+ start = content.find("{")
120
+ end = content.rfind("}")
121
+ if start == -1 or end == -1 or start > end:
122
+ return None
123
+ payload = content[start : end + 1]
124
+
125
+ parsed: Any
126
+ try:
127
+ parsed = json.loads(payload)
128
+ except json.JSONDecodeError:
129
+ try:
130
+ parsed = ast.literal_eval(payload)
131
+ except (ValueError, SyntaxError):
132
+ return None
133
+
134
+ if isinstance(parsed, dict):
135
+ return parsed
136
+ return None
137
+
138
+
139
+ def _normalize_action(action: dict[str, Any], observation: dict[str, Any]) -> dict[str, Any]:
140
+ action_type = str(action.get("action_type", "")).strip().lower()
141
+ parameters = action.get("parameters")
142
+ if not isinstance(parameters, dict):
143
+ parameters = {}
144
+
145
+ available_actions = observation.get("available_actions", [])
146
+ allowed_action_types = {
147
+ str(item.get("action_type")).lower()
148
+ for item in available_actions
149
+ if isinstance(item, dict) and item.get("action_type")
150
+ }
151
+
152
+ if not action_type:
153
+ action_type = "wait"
154
+ if allowed_action_types and action_type not in allowed_action_types:
155
+ if "done" in allowed_action_types:
156
+ action_type = "done"
157
+ parameters = {"success": False, "message": "Selected unsupported action type"}
158
+ else:
159
+ action_type = sorted(allowed_action_types)[0]
160
+ parameters = {}
161
+
162
+ return {
163
+ "action_type": action_type,
164
+ "parameters": parameters,
165
+ "reasoning": str(action.get("reasoning", "")),
166
+ }
167
+
168
+
169
+ def _fallback_action(observation: dict[str, Any], step_number: int, max_steps: int) -> dict[str, Any]:
170
+ fields_remaining = observation.get("fields_remaining")
171
+ if isinstance(fields_remaining, list) and fields_remaining:
172
+ return {
173
+ "action_type": "extract_field",
174
+ "parameters": {"field_name": str(fields_remaining[0])},
175
+ "reasoning": "Fallback extraction for next required field.",
176
+ }
177
+
178
+ if step_number >= max_steps:
179
+ return {
180
+ "action_type": "done",
181
+ "parameters": {"success": False, "message": "Max steps reached"},
182
+ "reasoning": "Forced completion at step limit.",
183
+ }
184
+
185
+ return {
186
+ "action_type": "done",
187
+ "parameters": {"success": True, "message": "No fields remaining"},
188
+ "reasoning": "Fallback completion.",
189
+ }
190
+
191
+
192
+ def _build_llm_prompt(
193
+ task_name: str,
194
+ benchmark: str,
195
+ observation: dict[str, Any],
196
+ info: dict[str, Any],
197
+ step_number: int,
198
+ max_steps: int,
199
+ ) -> str:
200
+ task_context = observation.get("task_context", {})
201
+ if not isinstance(task_context, dict):
202
+ task_context = {}
203
+
204
+ current_url = observation.get("current_url") or ""
205
+ page_title = observation.get("page_title") or ""
206
+ extraction_progress = float(observation.get("extraction_progress", 0.0) or 0.0)
207
+ fields_remaining = observation.get("fields_remaining", [])
208
+ if not isinstance(fields_remaining, list):
209
+ fields_remaining = []
210
+
211
+ available_actions = observation.get("available_actions", [])
212
+ action_names: list[str] = []
213
+ if isinstance(available_actions, list):
214
+ for item in available_actions:
215
+ if isinstance(item, dict) and item.get("action_type"):
216
+ action_names.append(str(item["action_type"]))
217
+
218
+ page_html = observation.get("page_html")
219
+ if not isinstance(page_html, str) or not page_html:
220
+ page_html = observation.get("page_text", "")
221
+ if not isinstance(page_html, str):
222
+ page_html = ""
223
+ page_html = _truncate(page_html, PROMPT_HTML_LIMIT)
224
+
225
+ return (
226
+ "You are controlling a web-scraping RL agent.\n"
227
+ "Return ONLY a single JSON object with keys: action_type, parameters, reasoning.\n"
228
+ "Do not include markdown.\n\n"
229
+ f"Benchmark: {benchmark}\n"
230
+ f"Task: {task_name}\n"
231
+ f"Step: {step_number}/{max_steps}\n"
232
+ f"Current URL: {current_url}\n"
233
+ f"Page Title: {page_title}\n"
234
+ f"Extraction Progress: {extraction_progress:.2f}\n"
235
+ f"Fields Remaining: {json.dumps(fields_remaining, ensure_ascii=False)}\n"
236
+ f"Available Actions: {json.dumps(action_names, ensure_ascii=False)}\n"
237
+ f"Task Context: {json.dumps(task_context, ensure_ascii=False)}\n"
238
+ f"Info: {json.dumps(info, ensure_ascii=False)}\n\n"
239
+ "Page Content (truncated):\n"
240
+ f"{page_html}\n\n"
241
+ "If extraction is complete, return action_type=\"done\" with completion parameters."
242
+ )
243
+
244
+
245
+ def _llm_next_action(
246
+ client: OpenAI,
247
+ task_name: str,
248
+ benchmark: str,
249
+ observation: dict[str, Any],
250
+ info: dict[str, Any],
251
+ step_number: int,
252
+ max_steps: int,
253
+ ) -> dict[str, Any]:
254
+ prompt = _build_llm_prompt(task_name, benchmark, observation, info, step_number, max_steps)
255
+
256
+ response = client.chat.completions.create(
257
+ model=MODEL_NAME,
258
+ messages=[
259
+ {"role": "system", "content": "You are a precise action-planning assistant."},
260
+ {"role": "user", "content": prompt},
261
+ ],
262
+ temperature=LLM_TEMPERATURE,
263
+ )
264
+ content = response.choices[0].message.content or ""
265
+ parsed = _extract_json_object(content)
266
+ if parsed is None:
267
+ return _fallback_action(observation, step_number, max_steps)
268
+ return _normalize_action(parsed, observation)
269
+
270
+
271
+ def _http_json(method: str, url: str, payload: dict[str, Any] | None = None) -> dict[str, Any]:
272
+ data = None
273
+ headers = {"Accept": "application/json"}
274
+ if payload is not None:
275
+ data = json.dumps(payload).encode("utf-8")
276
+ headers["Content-Type"] = "application/json"
277
+
278
+ req = url_request.Request(url=url, data=data, headers=headers, method=method)
279
+
280
+ try:
281
+ with url_request.urlopen(req, timeout=REQUEST_TIMEOUT_SECONDS) as response:
282
+ body = response.read().decode("utf-8")
283
+ except url_error.HTTPError as exc:
284
+ body = exc.read().decode("utf-8", errors="replace")
285
+ raise RuntimeError(f"HTTP {exc.code} {url}: {body}") from exc
286
+ except url_error.URLError as exc:
287
+ raise RuntimeError(f"Network error calling {url}: {exc}") from exc
288
+
289
+ if not body:
290
+ return {}
291
+ parsed = json.loads(body)
292
+ if isinstance(parsed, dict):
293
+ return parsed
294
+ raise RuntimeError(f"Expected JSON object from {url}")
295
+
296
+
297
+ class ScrapeRLEpisodeAdapter:
298
+ def __init__(self, base_url: str) -> None:
299
+ self.base_url = base_url.rstrip("/")
300
+ self.episode_id: str | None = None
301
+
302
+ def reset(self, task_name: str, seed: int) -> tuple[dict[str, Any], dict[str, Any]]:
303
+ payload = {"task_id": task_name, "seed": seed}
304
+ response = _http_json("POST", f"{self.base_url}/episode/reset", payload)
305
+ self.episode_id = str(response.get("episode_id", ""))
306
+ observation = response.get("observation", {})
307
+ info = response.get("info", {})
308
+ if not isinstance(observation, dict):
309
+ observation = {}
310
+ if not isinstance(info, dict):
311
+ info = {}
312
+ return observation, info
313
+
314
+ def step(self, action: dict[str, Any]) -> StepOutcome:
315
+ if not self.episode_id:
316
+ raise RuntimeError("Episode has not been reset")
317
+
318
+ payload = {
319
+ "episode_id": self.episode_id,
320
+ "action": action,
321
+ }
322
+ response = _http_json("POST", f"{self.base_url}/episode/step", payload)
323
+ observation = response.get("observation", {})
324
+ if not isinstance(observation, dict):
325
+ observation = {}
326
+ info = response.get("info", {})
327
+ if not isinstance(info, dict):
328
+ info = {}
329
+
330
+ return StepOutcome(
331
+ observation=observation,
332
+ reward=float(response.get("reward", 0.0) or 0.0),
333
+ terminated=bool(response.get("terminated", False)),
334
+ truncated=bool(response.get("truncated", False)),
335
+ info=info,
336
+ )
337
+
338
+ def close(self) -> None:
339
+ if not self.episode_id:
340
+ return
341
+ try:
342
+ _http_json("DELETE", f"{self.base_url}/episode/{self.episode_id}")
343
+ except RuntimeError:
344
+ pass
345
+ self.episode_id = None
346
+
347
+
348
+ class OpenEnvSDKAdapter:
349
+ def __init__(self, benchmark: str) -> None:
350
+ import openenv # type: ignore
351
+
352
+ if not hasattr(openenv, "make"):
353
+ raise RuntimeError("openenv.make is not available")
354
+ self.env = openenv.make(benchmark)
355
+
356
+ def reset(self, task_name: str, seed: int) -> tuple[dict[str, Any], dict[str, Any]]:
357
+ reset_attempts = (
358
+ {"task_name": task_name, "seed": seed},
359
+ {"task": task_name, "seed": seed},
360
+ {"task_id": task_name, "seed": seed},
361
+ {},
362
+ )
363
+ last_error: Exception | None = None
364
+ for kwargs in reset_attempts:
365
+ try:
366
+ result = self.env.reset(**kwargs)
367
+ return self._parse_reset(result)
368
+ except TypeError as exc:
369
+ last_error = exc
370
+ continue
371
+ if last_error:
372
+ raise last_error
373
+ raise RuntimeError("Unable to reset OpenEnv environment")
374
+
375
+ def step(self, action: dict[str, Any]) -> StepOutcome:
376
+ try:
377
+ result = self.env.step(action)
378
+ except TypeError:
379
+ result = self.env.step(action.get("action_type", "wait"))
380
+ return self._parse_step(result)
381
+
382
+ def close(self) -> None:
383
+ if hasattr(self.env, "close"):
384
+ self.env.close()
385
+
386
+ @staticmethod
387
+ def _parse_reset(result: Any) -> tuple[dict[str, Any], dict[str, Any]]:
388
+ if isinstance(result, tuple) and len(result) >= 2:
389
+ observation = result[0] if isinstance(result[0], dict) else {}
390
+ info = result[1] if isinstance(result[1], dict) else {}
391
+ return observation, info
392
+ if isinstance(result, dict):
393
+ observation = result.get("observation", result)
394
+ info = result.get("info", {})
395
+ if not isinstance(observation, dict):
396
+ observation = {}
397
+ if not isinstance(info, dict):
398
+ info = {}
399
+ return observation, info
400
+ return {}, {}
401
+
402
+ @staticmethod
403
+ def _parse_step(result: Any) -> StepOutcome:
404
+ if isinstance(result, dict):
405
+ observation = result.get("observation", {})
406
+ if not isinstance(observation, dict):
407
+ observation = {}
408
+ info = result.get("info", {})
409
+ if not isinstance(info, dict):
410
+ info = {}
411
+ terminated = bool(result.get("terminated", result.get("done", False)))
412
+ truncated = bool(result.get("truncated", False))
413
+ reward = float(result.get("reward", 0.0) or 0.0)
414
+ return StepOutcome(observation=observation, reward=reward, terminated=terminated, truncated=truncated, info=info)
415
+
416
+ if isinstance(result, tuple):
417
+ if len(result) == 6:
418
+ observation, reward, _breakdown, terminated, truncated, info = result
419
+ return StepOutcome(
420
+ observation=observation if isinstance(observation, dict) else {},
421
+ reward=float(reward or 0.0),
422
+ terminated=bool(terminated),
423
+ truncated=bool(truncated),
424
+ info=info if isinstance(info, dict) else {},
425
+ )
426
+ if len(result) == 5:
427
+ observation, reward, terminated, truncated, info = result
428
+ return StepOutcome(
429
+ observation=observation if isinstance(observation, dict) else {},
430
+ reward=float(reward or 0.0),
431
+ terminated=bool(terminated),
432
+ truncated=bool(truncated),
433
+ info=info if isinstance(info, dict) else {},
434
+ )
435
+ if len(result) == 4:
436
+ observation, reward, done, info = result
437
+ return StepOutcome(
438
+ observation=observation if isinstance(observation, dict) else {},
439
+ reward=float(reward or 0.0),
440
+ terminated=bool(done),
441
+ truncated=False,
442
+ info=info if isinstance(info, dict) else {},
443
+ )
444
+
445
+ raise RuntimeError("Unsupported step() return format from OpenEnv SDK")
446
+
447
+
448
+ def _build_adapter(benchmark: str, env_api_base_url: str) -> EpisodeAdapter:
449
+ if USE_OPENENV_SDK:
450
+ try:
451
+ return OpenEnvSDKAdapter(benchmark)
452
+ except Exception:
453
+ pass
454
+ return ScrapeRLEpisodeAdapter(env_api_base_url)
455
+
456
+
457
+ def run_inference(task_name: str, benchmark: str, max_steps: int, seed: int, env_api_base_url: str) -> int:
458
+ rewards: list[float] = []
459
+ steps = 0
460
+ success = False
461
+
462
+ _emit_start(task_name=task_name, benchmark=benchmark, model_name=MODEL_NAME)
463
+
464
+ adapter: EpisodeAdapter | None = None
465
+ try:
466
+ if HF_TOKEN is None:
467
+ raise ValueError("HF_TOKEN environment variable is required")
468
+
469
+ client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
470
+ adapter = _build_adapter(benchmark=benchmark, env_api_base_url=env_api_base_url)
471
+ observation, info = adapter.reset(task_name=task_name, seed=seed)
472
+
473
+ for step_number in range(1, max_steps + 1):
474
+ action = _llm_next_action(
475
+ client=client,
476
+ task_name=task_name,
477
+ benchmark=benchmark,
478
+ observation=observation,
479
+ info=info,
480
+ step_number=step_number,
481
+ max_steps=max_steps,
482
+ )
483
+ action_for_log = _action_to_log_string(action)
484
+ outcome = adapter.step(action)
485
+
486
+ steps = step_number
487
+ rewards.append(outcome.reward)
488
+ last_error = outcome.observation.get("last_action_error")
489
+ _emit_step(
490
+ step_number=step_number,
491
+ action=action_for_log,
492
+ reward=outcome.reward,
493
+ done=outcome.done,
494
+ error_value=last_error,
495
+ )
496
+
497
+ observation = outcome.observation
498
+ info = outcome.info
499
+
500
+ if outcome.done:
501
+ success = bool(outcome.terminated and not outcome.truncated)
502
+ break
503
+ except Exception:
504
+ success = False
505
+ finally:
506
+ if adapter is not None:
507
+ try:
508
+ adapter.close()
509
+ except Exception:
510
+ pass
511
+ _emit_end(success=success, steps=steps, rewards=rewards)
512
+
513
+ return 0 if success else 1
514
+
515
+
516
+ def parse_args() -> argparse.Namespace:
517
+ parser = argparse.ArgumentParser(description="OpenEnv-compliant inference runner.")
518
+ parser.add_argument("--task", default=TASK_NAME_DEFAULT, help="Task name/id")
519
+ parser.add_argument("--benchmark", default=BENCHMARK_DEFAULT, help="Benchmark/environment name")
520
+ parser.add_argument("--max-steps", type=int, default=MAX_STEPS_DEFAULT, help="Maximum step count")
521
+ parser.add_argument("--seed", type=int, default=EPISODE_SEED_DEFAULT, help="Episode reset seed")
522
+ parser.add_argument(
523
+ "--env-api-base-url",
524
+ default=ENV_API_BASE_URL,
525
+ help="Fallback environment API base URL (used when OpenEnv SDK is unavailable)",
526
+ )
527
+ return parser.parse_args()
528
+
529
+
530
+ if __name__ == "__main__":
531
+ args = parse_args()
532
+ exit_code = run_inference(
533
+ task_name=args.task,
534
+ benchmark=args.benchmark,
535
+ max_steps=args.max_steps,
536
+ seed=args.seed,
537
+ env_api_base_url=args.env_api_base_url,
538
+ )
539
+ sys.exit(exit_code)