samrat-rm commited on
Commit
d933934
·
1 Parent(s): a583a04

fic: score condition

Browse files
Files changed (1) hide show
  1. inference.py +4 -4
inference.py CHANGED
@@ -210,7 +210,7 @@ async def run_episode(
210
  print(f"[STEP] step={step} action={action.action_type} reward=0.01 done=true error={e}", flush=True)
211
  break
212
  obs = result.observation
213
- reward = round(min(0.99, result.reward or 0.01), 2)
214
  done = result.done
215
  if action.action_type in ("inspect_logs", "inspect_config", "inspect_gradients"):
216
  source = action.action_type.replace("inspect_", "")
@@ -229,7 +229,7 @@ async def run_episode(
229
  break
230
 
231
  # WebSocket is closed — safe to call the judge now
232
- keyword_score = rewards[-1] if rewards else 0.01
233
  judge_score: float | None = None
234
  if submit_action is not None:
235
  judge_score = llm_judge(
@@ -242,10 +242,10 @@ async def run_episode(
242
  inspection_order=inspection_order,
243
  )
244
  if judge_score is None:
245
- score = round(keyword_score, 2)
246
  # print(f" [JUDGE] scenario={scenario_key} keyword={keyword_score:.2f} reasoning=n/a total={score:.2f}", file=sys.stderr, flush=True)
247
  else:
248
- score = round(0.85 * keyword_score + 0.15 * judge_score, 2)
249
  # print(f" [JUDGE] scenario={scenario_key} keyword={keyword_score:.3f} reasoning={judge_score:.3f} total={score:.3f}", file=sys.stderr, flush=True)
250
 
251
  success = score >= SUCCESS_THRESHOLD
 
210
  print(f"[STEP] step={step} action={action.action_type} reward=0.01 done=true error={e}", flush=True)
211
  break
212
  obs = result.observation
213
+ reward = round(max(0.01, min(0.99, result.reward or 0.01)), 2)
214
  done = result.done
215
  if action.action_type in ("inspect_logs", "inspect_config", "inspect_gradients"):
216
  source = action.action_type.replace("inspect_", "")
 
229
  break
230
 
231
  # WebSocket is closed — safe to call the judge now
232
+ keyword_score = max(0.01, min(0.99, rewards[-1])) if rewards else 0.01
233
  judge_score: float | None = None
234
  if submit_action is not None:
235
  judge_score = llm_judge(
 
242
  inspection_order=inspection_order,
243
  )
244
  if judge_score is None:
245
+ score = round(max(0.01, min(0.99, keyword_score)), 2)
246
  # print(f" [JUDGE] scenario={scenario_key} keyword={keyword_score:.2f} reasoning=n/a total={score:.2f}", file=sys.stderr, flush=True)
247
  else:
248
+ score = round(max(0.01, min(0.99, 0.85 * keyword_score + 0.15 * judge_score)), 2)
249
  # print(f" [JUDGE] scenario={scenario_key} keyword={keyword_score:.3f} reasoning={judge_score:.3f} total={score:.3f}", file=sys.stderr, flush=True)
250
 
251
  success = score >= SUCCESS_THRESHOLD