Spaces:

ceoavinash
/

codearena-rl

Sleeping

adityanaikhpt commited on 28 days ago

Commit

b3485c6

1 Parent(s): eb60bd2

fix: clamp scores to strict (0.001, 0.999) range

Files changed (3) hide show

inference.py CHANGED Viewed

@@ -97,7 +97,7 @@ def run_task(task_id: str):
                 error_msg = str(e).replace("\n", " ").replace("\r", "")
         # 3e. Clamp it
-        reward = max(0.01, min(0.99, float(raw_reward)))
         rewards.append(reward)
         # 3f. Print [STEP] line immediately

                 error_msg = str(e).replace("\n", " ").replace("\r", "")
         # 3e. Clamp it
+        reward = max(0.001, min(0.999, float(raw_reward)))
         rewards.append(reward)
         # 3f. Print [STEP] line immediately

server/env.py CHANGED Viewed

@@ -43,7 +43,7 @@ class CodeArenaEnv:
         # Calculate Reward
         reward = safe_reward(calculate_reward(exec_result, self.current_task))
-        reward = max(0.001, min(0.999, reward))
         # Update State
         self.previous_attempts.append(action.proposed_fix)

         # Calculate Reward
         reward = safe_reward(calculate_reward(exec_result, self.current_task))
+        reward = max(0.001, min(0.999, float(reward)))
         # Update State
         self.previous_attempts.append(action.proposed_fix)

server/grader.py CHANGED Viewed

@@ -10,7 +10,7 @@ def safe_reward(reward) -> float:
         r = float(reward)
     except Exception:
         return 0.5
-    return max(0.001, min(0.999, r))
 def normalize_reward(passed: int, total: int) -> float:

         r = float(reward)
     except Exception:
         return 0.5
+    return max(0.001, min(0.999, float(reward)))
 def normalize_reward(passed: int, total: int) -> float: