Spaces:
Sleeping
Sleeping
Widen score epsilon to 0.01 so :.3f formatting stays in (0, 1)
Browse filesThe validator parses score=X.XXX from the [END] line of inference.py
stdout. With eps=1e-9 and :.3f formatting, scores rounded to "0.000"
or "1.000" which the validator reads as exactly 0.0 or 1.0.
Using eps=0.01 guarantees the formatted score is always strictly
between 0.010 and 0.990.
- backend/env/tasks.py +1 -1
- inference.py +6 -5
backend/env/tasks.py
CHANGED
|
@@ -330,7 +330,7 @@ def get_all_tasks() -> list[Task]:
|
|
| 330 |
return list(TASKS.values())
|
| 331 |
|
| 332 |
|
| 333 |
-
_EPS =
|
| 334 |
|
| 335 |
|
| 336 |
def grade_response(
|
|
|
|
| 330 |
return list(TASKS.values())
|
| 331 |
|
| 332 |
|
| 333 |
+
_EPS = 0.01 # wide enough that f"{x:.3f}" never rounds to 0.000 or 1.000
|
| 334 |
|
| 335 |
|
| 336 |
def grade_response(
|
inference.py
CHANGED
|
@@ -200,11 +200,12 @@ async def run_episode(
|
|
| 200 |
if done:
|
| 201 |
break
|
| 202 |
|
| 203 |
-
# Score: average of per-step rewards
|
| 204 |
-
#
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
| 208 |
|
| 209 |
finally:
|
| 210 |
log_end(
|
|
|
|
| 200 |
if done:
|
| 201 |
break
|
| 202 |
|
| 203 |
+
# Score: average of per-step rewards. Clamp strictly inside (0, 1)
|
| 204 |
+
# with margin >= 0.005 so f"{score:.3f}" never formats to "0.000" or "1.000".
|
| 205 |
+
_EPS = 0.01
|
| 206 |
+
denom = max(len(rewards), 1)
|
| 207 |
+
avg = sum(rewards) / denom if rewards else _EPS
|
| 208 |
+
score = max(_EPS, min(1.0 - _EPS, avg))
|
| 209 |
|
| 210 |
finally:
|
| 211 |
log_end(
|