adityanaikhpt commited on
Commit
b3485c6
·
1 Parent(s): eb60bd2

fix: clamp scores to strict (0.001, 0.999) range

Browse files
Files changed (3) hide show
  1. inference.py +1 -1
  2. server/env.py +1 -1
  3. server/grader.py +1 -1
inference.py CHANGED
@@ -97,7 +97,7 @@ def run_task(task_id: str):
97
  error_msg = str(e).replace("\n", " ").replace("\r", "")
98
 
99
  # 3e. Clamp it
100
- reward = max(0.01, min(0.99, float(raw_reward)))
101
  rewards.append(reward)
102
 
103
  # 3f. Print [STEP] line immediately
 
97
  error_msg = str(e).replace("\n", " ").replace("\r", "")
98
 
99
  # 3e. Clamp it
100
+ reward = max(0.001, min(0.999, float(raw_reward)))
101
  rewards.append(reward)
102
 
103
  # 3f. Print [STEP] line immediately
server/env.py CHANGED
@@ -43,7 +43,7 @@ class CodeArenaEnv:
43
 
44
  # Calculate Reward
45
  reward = safe_reward(calculate_reward(exec_result, self.current_task))
46
- reward = max(0.001, min(0.999, reward))
47
 
48
  # Update State
49
  self.previous_attempts.append(action.proposed_fix)
 
43
 
44
  # Calculate Reward
45
  reward = safe_reward(calculate_reward(exec_result, self.current_task))
46
+ reward = max(0.001, min(0.999, float(reward)))
47
 
48
  # Update State
49
  self.previous_attempts.append(action.proposed_fix)
server/grader.py CHANGED
@@ -10,7 +10,7 @@ def safe_reward(reward) -> float:
10
  r = float(reward)
11
  except Exception:
12
  return 0.5
13
- return max(0.001, min(0.999, r))
14
 
15
 
16
  def normalize_reward(passed: int, total: int) -> float:
 
10
  r = float(reward)
11
  except Exception:
12
  return 0.5
13
+ return max(0.001, min(0.999, float(reward)))
14
 
15
 
16
  def normalize_reward(passed: int, total: int) -> float: