code-review / snippets.py
Avnishjain's picture
Upload 26 files
ec566e9 verified
"""
Code corpus: Python snippets with embedded ground-truth issues.
Each entry has:
- snippet : CodeSnippet to show the agent
- issues : list of ground-truth ReviewComment objects the grader checks against
- task_id : which task this belongs to
"""
from __future__ import annotations
from env.models import CodeSnippet, ReviewCategory, ReviewComment, Severity
# ---------------------------------------------------------------------------
# TASK 1 – Easy (Bug detection + Code style)
# ---------------------------------------------------------------------------
TASK1_SNIPPET = CodeSnippet(
file_name="calculator.py",
source='''\
def divide(a, b):
return a / b # line 2
def average(numbers):
total = 0
for n in numbers:
total = total + n
return total / len(numbers) # line 8
def celsius_to_fahrenheit(c):
return c * 9/5 + 31 # line 11 (bug: should be +32)
def is_palindrome(s):
return s == s[::-1] # line 14
def find_max(lst):
max_val = lst[0] # line 17
for i in range(len(lst)):
if lst[i] > max_val:
max_val = lst[i]
return max_val # line 21
def count_words(text):
words = text.split(" ")
wordcount = {}
for w in words:
if w in wordcount:
wordcount[w] = wordcount[w]+1
else:
wordcount[w] = 1
return wordcount # line 30
''',
)
TASK1_ISSUES: list[ReviewComment] = [
# ---- Bugs ----
ReviewComment(
line=2,
category=ReviewCategory.BUG,
severity=Severity.HIGH,
message="divide() has no guard against division by zero; will raise ZeroDivisionError when b=0.",
suggestion="Add `if b == 0: raise ValueError('b must not be zero')` before returning.",
),
ReviewComment(
line=8,
category=ReviewCategory.BUG,
severity=Severity.HIGH,
message="average() crashes with ZeroDivisionError on an empty list.",
suggestion="Guard with `if not numbers: return 0.0` or raise ValueError.",
),
ReviewComment(
line=11,
category=ReviewCategory.BUG,
severity=Severity.MEDIUM,
message="celsius_to_fahrenheit uses +31 instead of +32, giving wrong results.",
suggestion="Change `+ 31` to `+ 32`.",
),
ReviewComment(
line=17,
category=ReviewCategory.BUG,
severity=Severity.MEDIUM,
message="find_max() crashes with IndexError on an empty list.",
suggestion="Add `if not lst: raise ValueError('list is empty')` at the top.",
),
# ---- Style ----
ReviewComment(
line=18,
category=ReviewCategory.STYLE,
severity=Severity.LOW,
message="Iterating with `for i in range(len(lst))` is unpythonic; prefer `for val in lst`.",
suggestion="Replace loop body with `for val in lst: if val > max_val: max_val = val`.",
),
ReviewComment(
line=25,
category=ReviewCategory.STYLE,
severity=Severity.LOW,
message="count_words manually reimplements collections.Counter; use the stdlib instead.",
suggestion="Replace with `from collections import Counter; return Counter(text.split())`.",
),
]
# ---------------------------------------------------------------------------
# TASK 2 – Medium (Security + Performance)
# ---------------------------------------------------------------------------
TASK2_SNIPPET = CodeSnippet(
file_name="user_service.py",
source='''\
import sqlite3
import hashlib
import os
DB_PATH = "users.db"
def get_user(username):
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
query = f"SELECT * FROM users WHERE username = \'{ username }\'" # line 10
cursor.execute(query)
result = cursor.fetchone()
conn.close()
return result
def create_user(username, password):
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
pw_hash = hashlib.md5(password.encode()).hexdigest() # line 19
cursor.execute(
"INSERT INTO users (username, password) VALUES (?, ?)",
(username, pw_hash),
)
conn.commit()
conn.close()
def load_all_users():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT * FROM users")
rows = cursor.fetchall() # line 31
conn.close()
users = []
for row in rows:
users.append({
"id": row[0],
"username": row[1],
"password": row[2],
})
return users
def authenticate(username, password):
user = get_user(username)
if user is None:
return False
pw_hash = hashlib.md5(password.encode()).hexdigest() # line 46
return user[2] == pw_hash
def delete_user(username):
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
query = f"DELETE FROM users WHERE username = \'{ username }\'" # line 52
cursor.execute(query)
conn.commit()
conn.close()
''',
)
TASK2_ISSUES: list[ReviewComment] = [
# ---- Security ----
ReviewComment(
line=10,
category=ReviewCategory.SECURITY,
severity=Severity.CRITICAL,
message="SQL injection vulnerability: username is interpolated directly into the query string.",
suggestion="Use parameterised queries: `cursor.execute('SELECT * FROM users WHERE username=?', (username,))`",
),
ReviewComment(
line=19,
category=ReviewCategory.SECURITY,
severity=Severity.CRITICAL,
message="MD5 is cryptographically broken and must not be used for password hashing.",
suggestion="Replace with `bcrypt.hashpw(password.encode(), bcrypt.gensalt())` or `hashlib.scrypt`.",
),
ReviewComment(
line=52,
category=ReviewCategory.SECURITY,
severity=Severity.CRITICAL,
message="delete_user() is also vulnerable to SQL injection via string interpolation.",
suggestion="Use parameterised queries: `cursor.execute('DELETE FROM users WHERE username=?', (username,))`",
),
ReviewComment(
line=46,
category=ReviewCategory.SECURITY,
severity=Severity.HIGH,
message="authenticate() re-hashes with MD5 for comparison; same broken-hash issue as create_user.",
suggestion="Adopt bcrypt.checkpw() or equivalent constant-time comparison.",
),
# ---- Performance ----
ReviewComment(
line=31,
category=ReviewCategory.PERFORMANCE,
severity=Severity.HIGH,
message="fetchall() loads the entire users table into memory; will OOM on large tables.",
suggestion="Use `cursor.fetchmany(size=1000)` in a loop or add a LIMIT clause.",
),
ReviewComment(
line=8,
category=ReviewCategory.PERFORMANCE,
severity=Severity.MEDIUM,
message="A new DB connection is opened and closed for every single query; connection pooling should be used.",
suggestion="Use a module-level connection or a context-manager pool (e.g. `sqlite3.connect` as a shared resource).",
),
]
# ---------------------------------------------------------------------------
# TASK 3 – Hard (All categories: Bug + Security + Performance + Style + Docs)
# ---------------------------------------------------------------------------
TASK3_SNIPPET = CodeSnippet(
file_name="data_pipeline.py",
source='''\
"""Data pipeline for processing CSV exports from the analytics platform."""
import csv
import os
import pickle
import subprocess
import time
CACHE = {}
def load_data(filepath):
with open(filepath) as f: # line 12
reader = csv.DictReader(f)
data = []
for row in reader:
data.append(row)
return data
def process_row(row, transform_script):
result = subprocess.run(transform_script, shell=True, input=str(row)) # line 20
return result.stdout
def cache_result(key, value):
CACHE[key] = pickle.dumps(value) # line 24
def get_cached(key):
if key in CACHE:
return pickle.loads(CACHE[key]) # line 28
def compute_stats(data):
n = len(data) # line 31
total = sum(float(row["value"]) for row in data)
mean = total / n
variance = sum((float(row["value"]) - mean) ** 2 for row in data) / n
return {"mean": mean, "variance": variance, "count": n}
def run_pipeline(filepath, transform_script=None):
data = load_data(filepath)
if transform_script:
processed = []
for row in data:
processed.append(process_row(row, transform_script))
data = processed
stats = compute_stats(data)
cache_result(filepath, stats)
return stats
def export_results(stats, output_path):
with open(output_path, "w") as f: # line 47
f.write(str(stats))
''',
)
TASK3_ISSUES: list[ReviewComment] = [
# ---- Security ----
ReviewComment(
line=20,
category=ReviewCategory.SECURITY,
severity=Severity.CRITICAL,
message="subprocess.run with shell=True and user-supplied transform_script enables arbitrary OS command injection.",
suggestion="Avoid shell=True; pass args as a list or whitelist allowed scripts.",
),
ReviewComment(
line=28,
category=ReviewCategory.SECURITY,
severity=Severity.CRITICAL,
message="pickle.loads() on untrusted/arbitrary cache data allows arbitrary code execution.",
suggestion="Replace pickle with json.dumps/loads for serialisable data, or sign+verify the payload.",
),
ReviewComment(
line=24,
category=ReviewCategory.SECURITY,
severity=Severity.HIGH,
message="Storing pickled data in a module-level dict means deserialization risk persists across calls.",
suggestion="Use JSON for the cache and validate schemas on retrieval.",
),
# ---- Bugs ----
ReviewComment(
line=31,
category=ReviewCategory.BUG,
severity=Severity.HIGH,
message="compute_stats() raises ZeroDivisionError when data is empty (n=0).",
suggestion="Guard with `if not data: return {'mean': 0, 'variance': 0, 'count': 0}`.",
),
ReviewComment(
line=32,
category=ReviewCategory.BUG,
severity=Severity.MEDIUM,
message="If any row is missing the 'value' key, a KeyError will silently abort the pipeline.",
suggestion="Use `row.get('value', 0)` or validate schema at load time.",
),
ReviewComment(
line=12,
category=ReviewCategory.BUG,
severity=Severity.MEDIUM,
message="open(filepath) without encoding='utf-8' will use the system locale; may fail on non-ASCII data.",
suggestion="Use `open(filepath, encoding='utf-8')`.",
),
# ---- Performance ----
ReviewComment(
line=31,
category=ReviewCategory.PERFORMANCE,
severity=Severity.MEDIUM,
message="compute_stats() iterates over data twice (once for sum, once for variance); single-pass Welford's algorithm is more efficient.",
suggestion="Use Welford's online algorithm or numpy for large datasets.",
),
ReviewComment(
line=38,
category=ReviewCategory.PERFORMANCE,
severity=Severity.MEDIUM,
message="process_row() spawns a new subprocess for every row; should batch or vectorise the transformation.",
suggestion="Pass all rows to a single subprocess call or use a Python-native transform function.",
),
# ---- Style ----
ReviewComment(
line=47,
category=ReviewCategory.STYLE,
severity=Severity.LOW,
message="export_results writes str(stats) (a Python dict repr) rather than valid JSON or CSV.",
suggestion="Use `import json; f.write(json.dumps(stats, indent=2))`.",
),
ReviewComment(
line=9,
category=ReviewCategory.STYLE,
severity=Severity.LOW,
message="Module-level mutable CACHE dict is a global side-effect; makes the pipeline hard to test and thread-unsafe.",
suggestion="Encapsulate state inside a Pipeline class or pass cache explicitly.",
),
# ---- Documentation ----
ReviewComment(
line=12,
category=ReviewCategory.DOCUMENTATION,
severity=Severity.LOW,
message="load_data() has no docstring; expected CSV schema (required columns, types) is undocumented.",
suggestion="Add a docstring describing filepath, expected columns, and return type.",
),
ReviewComment(
line=19,
category=ReviewCategory.DOCUMENTATION,
severity=Severity.LOW,
message="process_row() does not document what transform_script should be, its expected format, or return value.",
suggestion="Add docstring: args, expected script interface, return type, and example.",
),
ReviewComment(
line=None,
category=ReviewCategory.DOCUMENTATION,
severity=Severity.LOW,
message="Module-level docstring is too vague; doesn't mention side-effects, required CSV schema, or dependencies.",
suggestion="Expand the module docstring with usage example, required columns, and external dependencies.",
),
]
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
CORPUS: dict[str, dict] = {
"task_1_easy": {
"snippet": TASK1_SNIPPET,
"issues": TASK1_ISSUES,
},
"task_2_medium": {
"snippet": TASK2_SNIPPET,
"issues": TASK2_ISSUES,
},
"task_3_hard": {
"snippet": TASK3_SNIPPET,
"issues": TASK3_ISSUES,
},
}