Priyansh Saxena commited on
Commit
fa638a8
·
1 Parent(s): b0fdd8b

fix: include score in inference END log

Browse files
Files changed (2) hide show
  1. inference.py +7 -4
  2. tests/test_inference_logging.py +2 -2
inference.py CHANGED
@@ -17,6 +17,8 @@ SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7")
17
  MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
18
  SEED = os.environ.get("SEED")
19
  MIN_LOG_REWARD = 0.01
 
 
20
 
21
 
22
  def _parse_seed(value: str | None) -> int | None:
@@ -48,11 +50,11 @@ def log_step(step, action, reward, done, error):
48
  )
49
 
50
 
51
- def log_end(success, steps, rewards):
52
  success_str = "true" if success else "false"
53
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
54
  print(
55
- f"[END] success={success_str} steps={steps} rewards={rewards_str}",
56
  flush=True,
57
  )
58
 
@@ -151,9 +153,10 @@ async def _run_task(task: str, client: OpenAI) -> None:
151
  except Exception:
152
  pass
153
 
154
- score = min(max(rewards[-1] if rewards else 0.0, 0.0), 1.0)
 
155
  success = score >= SUCCESS_SCORE_THRESHOLD
156
- log_end(success=success, steps=steps_taken, rewards=rewards)
157
 
158
 
159
  async def main():
 
17
  MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
18
  SEED = os.environ.get("SEED")
19
  MIN_LOG_REWARD = 0.01
20
+ MIN_SCORE = 0.01
21
+ MAX_SCORE = 0.99
22
 
23
 
24
  def _parse_seed(value: str | None) -> int | None:
 
50
  )
51
 
52
 
53
+ def log_end(success, steps, score, rewards):
54
  success_str = "true" if success else "false"
55
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
56
  print(
57
+ f"[END] success={success_str} steps={steps} score={score:.2f} rewards={rewards_str}",
58
  flush=True,
59
  )
60
 
 
153
  except Exception:
154
  pass
155
 
156
+ score = rewards[-1] if rewards else MIN_SCORE
157
+ score = min(max(score, MIN_SCORE), MAX_SCORE)
158
  success = score >= SUCCESS_SCORE_THRESHOLD
159
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
160
 
161
 
162
  async def main():
tests/test_inference_logging.py CHANGED
@@ -23,6 +23,6 @@ def test_log_step_sanitizes_fields(capsys):
23
 
24
 
25
  def test_log_end_format(capsys):
26
- log_end(success=True, steps=3, rewards=[0.0, 0.1, 1.0])
27
  out = capsys.readouterr().out.strip()
28
- assert out == "[END] success=true steps=3 rewards=0.00,0.10,1.00"
 
23
 
24
 
25
  def test_log_end_format(capsys):
26
+ log_end(success=True, steps=3, score=0.99, rewards=[0.0, 0.1, 1.0])
27
  out = capsys.readouterr().out.strip()
28
+ assert out == "[END] success=true steps=3 score=0.99 rewards=0.00,0.10,1.00"