immortalindeed commited on
Commit
f96532b
·
1 Parent(s): 699f953

Fix syntax of [END] STDOUT line to perfectly match Hackathon mandatory format with score= parameter

Browse files
Files changed (1) hide show
  1. inference.py +5 -5
inference.py CHANGED
@@ -266,12 +266,12 @@ def run_task(client: OpenAI, task_id: str) -> tuple:
266
  except Exception as e:
267
  # Env unreachable — must still emit [START] and [END]
268
  print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
269
- print(f"[END] success=false steps=0 rewards=0.01", flush=True)
270
  return 0.01, False
271
 
272
  if "error" in data and not data.get("episode_id"):
273
  print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
274
- print(f"[END] success=false steps=0 rewards=0.01", flush=True)
275
  return 0.01, False
276
 
277
  episode_id = data.get("episode_id", "unknown")
@@ -378,7 +378,7 @@ def run_task(client: OpenAI, task_id: str) -> tuple:
378
  # spec: success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
379
  # NO score= field — not in the official spec
380
  print(
381
- f"[END] success={str(success).lower()} steps={step_num} rewards={rewards_str}",
382
  flush=True
383
  )
384
 
@@ -420,13 +420,13 @@ def main() -> None:
420
  if remaining not in scores:
421
  scores[remaining] = 0.01
422
  print(f"[START] task={remaining} env={BENCHMARK} model={MODEL_NAME}", flush=True)
423
- print(f"[END] success=false steps=0 rewards=0.01", flush=True)
424
  break
425
 
426
  except Exception as e:
427
  scores[task_id] = 0.01
428
  print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
429
- print(f"[END] success=false steps=0 rewards=0.01", flush=True)
430
 
431
  avg = round(sum(scores.values()) / max(len(scores), 1), 4)
432
  print(f"\n✅ All tasks complete! Average: {avg:.4f}", flush=True)
 
266
  except Exception as e:
267
  # Env unreachable — must still emit [START] and [END]
268
  print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
269
+ print(f"[END] success=false steps=0 score=0.01 rewards=0.01", flush=True)
270
  return 0.01, False
271
 
272
  if "error" in data and not data.get("episode_id"):
273
  print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
274
+ print(f"[END] success=false steps=0 score=0.01 rewards=0.01", flush=True)
275
  return 0.01, False
276
 
277
  episode_id = data.get("episode_id", "unknown")
 
378
  # spec: success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
379
  # NO score= field — not in the official spec
380
  print(
381
+ f"[END] success={str(success).lower()} steps={step_num} score={score:.4f} rewards={rewards_str}",
382
  flush=True
383
  )
384
 
 
420
  if remaining not in scores:
421
  scores[remaining] = 0.01
422
  print(f"[START] task={remaining} env={BENCHMARK} model={MODEL_NAME}", flush=True)
423
+ print(f"[END] success=false steps=0 score=0.01 rewards=0.01", flush=True)
424
  break
425
 
426
  except Exception as e:
427
  scores[task_id] = 0.01
428
  print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
429
+ print(f"[END] success=false steps=0 score=0.01 rewards=0.01", flush=True)
430
 
431
  avg = round(sum(scores.values()) / max(len(scores), 1), 4)
432
  print(f"\n✅ All tasks complete! Average: {avg:.4f}", flush=True)