Priyansh Saxena commited on
Commit
1700927
·
1 Parent(s): be50021

test: enforce strict score bounds

Browse files
Files changed (2) hide show
  1. tests/test_graders.py +33 -0
  2. tests/test_reward.py +18 -0
tests/test_graders.py CHANGED
@@ -83,3 +83,36 @@ def test_grade_hard_penalizes_red_herring():
83
  penalized = grade_hard(action, gt)
84
  assert penalized <= 0.9
85
  assert 0.0 < penalized < 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  penalized = grade_hard(action, gt)
84
  assert penalized <= 0.9
85
  assert 0.0 < penalized < 1.0
86
+
87
+
88
+ def test_grade_easy_perfect_is_not_one():
89
+ gt = {
90
+ "bug_type": "missing_zero_grad",
91
+ "primary_bug_file": "train.py",
92
+ "related_files": [],
93
+ "line_range": [10, 12],
94
+ "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
95
+ }
96
+ action = {
97
+ "bug_type": "missing_zero_grad",
98
+ "affected_file": "train.py",
99
+ "line_range": [10, 12],
100
+ "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
101
+ "confidence": 1.0,
102
+ }
103
+ score = grade_easy(action, gt)
104
+ assert 0.0 < score < 1.0
105
+
106
+
107
+ def test_grader_empty_action_clamped():
108
+ gt = {
109
+ "bug_type": "data_leakage",
110
+ "primary_bug_file": "data/dataset.py",
111
+ "related_files": [],
112
+ "line_range": [4, 6],
113
+ "fix_strategy": "Ensure validation split is strictly separate from training",
114
+ }
115
+ action = {}
116
+ assert 0.0 < grade_easy(action, gt) < 1.0
117
+ assert 0.0 < grade_medium(action, gt) < 1.0
118
+ assert 0.0 < grade_hard(action, gt) < 1.0
tests/test_reward.py CHANGED
@@ -44,6 +44,24 @@ def test_final_diagnosis_score_bounds():
44
  assert 0.0 < score < 1.0
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def test_compute_step_reward_clamps_non_negative():
48
  gt = {
49
  "bug_type": "missing_zero_grad",
 
44
  assert 0.0 < score < 1.0
45
 
46
 
47
+ def test_final_diagnosis_score_perfect_clamped():
48
+ gt = {
49
+ "bug_type": "missing_zero_grad",
50
+ "primary_bug_file": "train.py",
51
+ "related_files": [],
52
+ "line_range": [10, 12],
53
+ "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
54
+ }
55
+ action = {
56
+ "bug_type": "missing_zero_grad",
57
+ "affected_file": "train.py",
58
+ "line_range": [10, 12],
59
+ "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
60
+ }
61
+ score = final_diagnosis_score(action, gt)
62
+ assert 0.0 < score < 1.0
63
+
64
+
65
  def test_compute_step_reward_clamps_non_negative():
66
  gt = {
67
  "bug_type": "missing_zero_grad",