Priyansh Saxena commited on
Commit ·
fa638a8
1
Parent(s): b0fdd8b
fix: include score in inference END log
Browse files- inference.py +7 -4
- tests/test_inference_logging.py +2 -2
inference.py
CHANGED
|
@@ -17,6 +17,8 @@ SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7")
|
|
| 17 |
MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
|
| 18 |
SEED = os.environ.get("SEED")
|
| 19 |
MIN_LOG_REWARD = 0.01
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def _parse_seed(value: str | None) -> int | None:
|
|
@@ -48,11 +50,11 @@ def log_step(step, action, reward, done, error):
|
|
| 48 |
)
|
| 49 |
|
| 50 |
|
| 51 |
-
def log_end(success, steps, rewards):
|
| 52 |
success_str = "true" if success else "false"
|
| 53 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 54 |
print(
|
| 55 |
-
f"[END] success={success_str} steps={steps} rewards={rewards_str}",
|
| 56 |
flush=True,
|
| 57 |
)
|
| 58 |
|
|
@@ -151,9 +153,10 @@ async def _run_task(task: str, client: OpenAI) -> None:
|
|
| 151 |
except Exception:
|
| 152 |
pass
|
| 153 |
|
| 154 |
-
score =
|
|
|
|
| 155 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 156 |
-
log_end(success=success, steps=steps_taken, rewards=rewards)
|
| 157 |
|
| 158 |
|
| 159 |
async def main():
|
|
|
|
| 17 |
MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
|
| 18 |
SEED = os.environ.get("SEED")
|
| 19 |
MIN_LOG_REWARD = 0.01
|
| 20 |
+
MIN_SCORE = 0.01
|
| 21 |
+
MAX_SCORE = 0.99
|
| 22 |
|
| 23 |
|
| 24 |
def _parse_seed(value: str | None) -> int | None:
|
|
|
|
| 50 |
)
|
| 51 |
|
| 52 |
|
| 53 |
+
def log_end(success, steps, score, rewards):
|
| 54 |
success_str = "true" if success else "false"
|
| 55 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 56 |
print(
|
| 57 |
+
f"[END] success={success_str} steps={steps} score={score:.2f} rewards={rewards_str}",
|
| 58 |
flush=True,
|
| 59 |
)
|
| 60 |
|
|
|
|
| 153 |
except Exception:
|
| 154 |
pass
|
| 155 |
|
| 156 |
+
score = rewards[-1] if rewards else MIN_SCORE
|
| 157 |
+
score = min(max(score, MIN_SCORE), MAX_SCORE)
|
| 158 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 159 |
+
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 160 |
|
| 161 |
|
| 162 |
async def main():
|
tests/test_inference_logging.py
CHANGED
|
@@ -23,6 +23,6 @@ def test_log_step_sanitizes_fields(capsys):
|
|
| 23 |
|
| 24 |
|
| 25 |
def test_log_end_format(capsys):
|
| 26 |
-
log_end(success=True, steps=3, rewards=[0.0, 0.1, 1.0])
|
| 27 |
out = capsys.readouterr().out.strip()
|
| 28 |
-
assert out == "[END] success=true steps=3 rewards=0.00,0.10,1.00"
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def test_log_end_format(capsys):
|
| 26 |
+
log_end(success=True, steps=3, score=0.99, rewards=[0.0, 0.1, 1.0])
|
| 27 |
out = capsys.readouterr().out.strip()
|
| 28 |
+
assert out == "[END] success=true steps=3 score=0.99 rewards=0.00,0.10,1.00"
|