Spaces:
Sleeping
Sleeping
Commit ·
b3485c6
1
Parent(s): eb60bd2
fix: clamp scores to strict (0.001, 0.999) range
Browse files- inference.py +1 -1
- server/env.py +1 -1
- server/grader.py +1 -1
inference.py
CHANGED
|
@@ -97,7 +97,7 @@ def run_task(task_id: str):
|
|
| 97 |
error_msg = str(e).replace("\n", " ").replace("\r", "")
|
| 98 |
|
| 99 |
# 3e. Clamp it
|
| 100 |
-
reward = max(0.
|
| 101 |
rewards.append(reward)
|
| 102 |
|
| 103 |
# 3f. Print [STEP] line immediately
|
|
|
|
| 97 |
error_msg = str(e).replace("\n", " ").replace("\r", "")
|
| 98 |
|
| 99 |
# 3e. Clamp it
|
| 100 |
+
reward = max(0.001, min(0.999, float(raw_reward)))
|
| 101 |
rewards.append(reward)
|
| 102 |
|
| 103 |
# 3f. Print [STEP] line immediately
|
server/env.py
CHANGED
|
@@ -43,7 +43,7 @@ class CodeArenaEnv:
|
|
| 43 |
|
| 44 |
# Calculate Reward
|
| 45 |
reward = safe_reward(calculate_reward(exec_result, self.current_task))
|
| 46 |
-
reward = max(0.001, min(0.999, reward))
|
| 47 |
|
| 48 |
# Update State
|
| 49 |
self.previous_attempts.append(action.proposed_fix)
|
|
|
|
| 43 |
|
| 44 |
# Calculate Reward
|
| 45 |
reward = safe_reward(calculate_reward(exec_result, self.current_task))
|
| 46 |
+
reward = max(0.001, min(0.999, float(reward)))
|
| 47 |
|
| 48 |
# Update State
|
| 49 |
self.previous_attempts.append(action.proposed_fix)
|
server/grader.py
CHANGED
|
@@ -10,7 +10,7 @@ def safe_reward(reward) -> float:
|
|
| 10 |
r = float(reward)
|
| 11 |
except Exception:
|
| 12 |
return 0.5
|
| 13 |
-
return max(0.001, min(0.999,
|
| 14 |
|
| 15 |
|
| 16 |
def normalize_reward(passed: int, total: int) -> float:
|
|
|
|
| 10 |
r = float(reward)
|
| 11 |
except Exception:
|
| 12 |
return 0.5
|
| 13 |
+
return max(0.001, min(0.999, float(reward)))
|
| 14 |
|
| 15 |
|
| 16 |
def normalize_reward(passed: int, total: int) -> float:
|