| |
| from src.pytorch_debug_env.graders import grade_easy, grade_hard, grade_medium |
|
|
| def test_grade_easy(): |
| gt = { |
| "bug_type": "missing_zero_grad", |
| "primary_bug_file": "train.py", |
| "related_files": [], |
| "line_range": [10, 15], |
| "fix_strategy": "Call optimizer.zero_grad() before loss.backward()", |
| } |
| action = { |
| "bug_type": "missing_zero_grad", |
| "affected_file": "train.py", |
| "line_range": [10, 15], |
| "fix_strategy": "Call optimizer.zero_grad() before loss.backward()", |
| "confidence": 0.8 |
| } |
| score = grade_easy(action, gt) |
| assert score > 0.8 |
| assert score < 1.0 |
|
|
|
|
| def test_grade_medium_related_file_bonus(): |
| gt = { |
| "bug_type": "data_leakage", |
| "primary_bug_file": "data/dataset.py", |
| "related_files": ["data/preprocessing.py"], |
| "line_range": [4, 6], |
| "fix_strategy": "Ensure validation split is strictly separate from training", |
| } |
| action = { |
| "bug_type": "data_leakage", |
| "affected_file": "data/preprocessing.py", |
| "line_range": [1, 2], |
| "fix_strategy": "Ensure validation split is strictly separate from training", |
| "confidence": 0.6, |
| } |
| score = grade_medium(action, gt) |
| assert score >= grade_easy(action, gt) |
| assert 0.0 < score < 1.0 |
|
|
|
|
| def test_grade_hard_category_partial_credit(): |
| gt = { |
| "bug_type": "missing_zero_grad", |
| "category": "optimization", |
| "primary_bug_file": "train.py", |
| "related_files": [], |
| "red_herring_file": "model/attention.py", |
| "line_range": [10, 12], |
| "fix_strategy": "Call optimizer.zero_grad() before loss.backward()", |
| } |
| action = { |
| "bug_type": "wrong_loss_function", |
| "affected_file": "data/dataset.py", |
| "line_range": [1, 2], |
| "fix_strategy": "Use CrossEntropyLoss instead of MSE", |
| "confidence": 0.5, |
| } |
| score = grade_hard(action, gt) |
| assert score >= 0.18 |
| assert 0.0 < score < 1.0 |
|
|
|
|
| def test_grade_hard_penalizes_red_herring(): |
| gt = { |
| "bug_type": "memory_leak", |
| "category": "resource", |
| "primary_bug_file": "data/dataset.py", |
| "related_files": ["train.py"], |
| "red_herring_file": "model/attention.py", |
| "line_range": [5, 9], |
| "fix_strategy": "Avoid holding reference to tensors in class cache", |
| } |
| action = { |
| "bug_type": "memory_leak", |
| "affected_file": "model/attention.py", |
| "line_range": [5, 9], |
| "fix_strategy": "Avoid holding reference to tensors in class cache", |
| "confidence": 0.7, |
| } |
| penalized = grade_hard(action, gt) |
| assert penalized <= 0.9 |
| assert 0.0 < penalized < 1.0 |
|
|
|
|
| def test_grade_easy_perfect_is_not_one(): |
| gt = { |
| "bug_type": "missing_zero_grad", |
| "primary_bug_file": "train.py", |
| "related_files": [], |
| "line_range": [10, 12], |
| "fix_strategy": "Call optimizer.zero_grad() before loss.backward()", |
| } |
| action = { |
| "bug_type": "missing_zero_grad", |
| "affected_file": "train.py", |
| "line_range": [10, 12], |
| "fix_strategy": "Call optimizer.zero_grad() before loss.backward()", |
| "confidence": 1.0, |
| } |
| score = grade_easy(action, gt) |
| assert 0.0 < score < 1.0 |
|
|
|
|
| def test_grader_empty_action_clamped(): |
| gt = { |
| "bug_type": "data_leakage", |
| "primary_bug_file": "data/dataset.py", |
| "related_files": [], |
| "line_range": [4, 6], |
| "fix_strategy": "Ensure validation split is strictly separate from training", |
| } |
| action = {} |
| assert 0.0 < grade_easy(action, gt) < 1.0 |
| assert 0.0 < grade_medium(action, gt) < 1.0 |
| assert 0.0 < grade_hard(action, gt) < 1.0 |
|
|