shank commited on
Commit
6318243
Β·
1 Parent(s): 30f698e

Added testing files

Browse files
env/tasks/task_easy.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task Easy β€” Binary Search Off-By-One Bug
3
+ ==========================================
4
+ Single function, one clear bug. The termination condition uses `<` instead of `<=`,
5
+ causing the function to miss the target when it's the last element.
6
+
7
+ Expected: 7 pass, 1 fail (test_finds_last_element)
8
+ """
9
+
10
+ TASK_DESCRIPTION = """A utility module for a data processing pipeline contains a binary search function.
11
+ The function searches for a target value in a sorted list and returns its index, or -1 if not found.
12
+ One of the tests is failing β€” the function is not returning the correct result in all cases.
13
+ Your job is to identify the bug, form a hypothesis about the root cause, and fix it."""
14
+
15
+ BUGGY_CODE = '''def binary_search(arr: list, target: int) -> int:
16
+ """Return the index of target in sorted arr, or -1 if not found."""
17
+ left, right = 0, len(arr) - 1
18
+ while left < right: # BUG: should be left <= right
19
+ mid = (left + right) // 2
20
+ if arr[mid] == target:
21
+ return mid
22
+ elif arr[mid] < target:
23
+ left = mid + 1
24
+ else:
25
+ right = mid - 1
26
+ return -1
27
+ '''
28
+
29
+ TEST_SUITE = '''def test_finds_first_element():
30
+ assert binary_search([1, 3, 5, 7, 9], 1) == 0
31
+
32
+ def test_finds_middle_element():
33
+ assert binary_search([1, 3, 5, 7, 9], 5) == 2
34
+
35
+ def test_finds_last_element():
36
+ assert binary_search([1, 3, 5, 7, 9], 9) == 4
37
+
38
+ def test_returns_minus_one_for_missing():
39
+ assert binary_search([1, 3, 5, 7, 9], 4) == -1
40
+
41
+ def test_single_element_found():
42
+ assert binary_search([42], 42) == 0
43
+
44
+ def test_single_element_not_found():
45
+ assert binary_search([42], 7) == -1
46
+
47
+ def test_empty_list():
48
+ assert binary_search([], 5) == -1
49
+
50
+ def test_finds_second_to_last():
51
+ assert binary_search([2, 4, 6, 8, 10], 8) == 3
52
+ '''
53
+
54
+ # The test suite formatted for sandbox execution (no pytest, direct assertions)
55
+ TEST_SUITE_EXECUTABLE = '''
56
+ _tests_passed = 0
57
+ _tests_total = 8
58
+ _failures = []
59
+
60
+ def _run_test(name, fn):
61
+ global _tests_passed
62
+ try:
63
+ fn()
64
+ _tests_passed += 1
65
+ except AssertionError as e:
66
+ _failures.append(f"FAILED {name}: {e}")
67
+ except Exception as e:
68
+ _failures.append(f"ERROR {name}: {e}")
69
+
70
+ _run_test("test_finds_first_element", lambda: test_finds_first_element())
71
+ _run_test("test_finds_middle_element", lambda: test_finds_middle_element())
72
+ _run_test("test_finds_last_element", lambda: test_finds_last_element())
73
+ _run_test("test_returns_minus_one_for_missing", lambda: test_returns_minus_one_for_missing())
74
+ _run_test("test_single_element_found", lambda: test_single_element_found())
75
+ _run_test("test_single_element_not_found", lambda: test_single_element_not_found())
76
+ _run_test("test_empty_list", lambda: test_empty_list())
77
+ _run_test("test_finds_second_to_last", lambda: test_finds_second_to_last())
78
+
79
+ for f in _failures:
80
+ print(f)
81
+ print(f"{_tests_passed} passed, {_tests_total - _tests_passed} failed")
82
+ '''
83
+
84
+ GROUND_TRUTH = {
85
+ "bug_location": "binary_search",
86
+ "bug_type": "off_by_one",
87
+ "hypothesis_keywords": ["left <= right", "termination", "last element", "off by one", "<="],
88
+ "keyword_match_mode": "any", # match if ANY keyword appears
89
+ "fixed_code": '''def binary_search(arr: list, target: int) -> int:
90
+ """Return the index of target in sorted arr, or -1 if not found."""
91
+ left, right = 0, len(arr) - 1
92
+ while left <= right:
93
+ mid = (left + right) // 2
94
+ if arr[mid] == target:
95
+ return mid
96
+ elif arr[mid] < target:
97
+ left = mid + 1
98
+ else:
99
+ right = mid - 1
100
+ return -1
101
+ ''',
102
+ }
103
+
104
+ TASK_CONFIG = {
105
+ "task_id": "easy",
106
+ "task_description": TASK_DESCRIPTION,
107
+ "buggy_code": BUGGY_CODE,
108
+ "test_suite": TEST_SUITE,
109
+ "test_suite_executable": TEST_SUITE_EXECUTABLE,
110
+ "ground_truth": GROUND_TRUTH,
111
+ "max_attempts": 5,
112
+ "max_steps": 8,
113
+ "tests_total": 8,
114
+ "allow_threading": False,
115
+ }
env/tasks/task_hard.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task Hard β€” Concurrency Race Condition
3
+ ========================================
4
+ Thread-safe counter with a classic race condition: the read-modify-write cycle
5
+ is split across two separate lock acquisitions instead of being atomic.
6
+
7
+ All 8 sequential tests pass. The bug only manifests under concurrent access.
8
+ The agent must design a concurrent test to surface the race condition.
9
+
10
+ allow_threading=True for this task.
11
+ """
12
+
13
+ TASK_DESCRIPTION = """A thread-safe connection counter used in a web server to track active connections.
14
+ The ConnectionCounter class uses threading locks for thread safety, but some users report that under
15
+ heavy concurrent load, the counter occasionally shows incorrect values. All existing unit tests pass.
16
+ Your job is to identify the concurrency bug, design a test that surfaces it, and fix the implementation.
17
+
18
+ IMPORTANT: All 8 existing tests pass. The bug only manifests under concurrent access with multiple threads.
19
+ You need to think about what could go wrong when multiple threads call increment() simultaneously."""
20
+
21
+ BUGGY_CODE = '''import threading
22
+
23
+ class ConnectionCounter:
24
+ """Thread-safe connection counter for a web server."""
25
+
26
+ def __init__(self):
27
+ self.count = 0
28
+ self._lock = threading.Lock()
29
+
30
+ def increment(self):
31
+ with self._lock:
32
+ current = self.count # read
33
+ # LOCK RELEASED HERE β€” race window
34
+ new_val = current + 1 # modify
35
+ with self._lock:
36
+ self.count = new_val # write
37
+
38
+ def decrement(self):
39
+ with self._lock:
40
+ current = self.count
41
+ # LOCK RELEASED HERE β€” race window
42
+ new_val = current - 1
43
+ with self._lock:
44
+ self.count = new_val
45
+
46
+ def get_count(self) -> int:
47
+ with self._lock:
48
+ return self.count
49
+
50
+ def reset(self):
51
+ with self._lock:
52
+ self.count = 0
53
+ '''
54
+
55
+ TEST_SUITE = '''import threading
56
+
57
+ def test_initial_count_zero():
58
+ counter = ConnectionCounter()
59
+ assert counter.get_count() == 0
60
+
61
+ def test_single_increment():
62
+ counter = ConnectionCounter()
63
+ counter.increment()
64
+ assert counter.get_count() == 1
65
+
66
+ def test_single_decrement():
67
+ counter = ConnectionCounter()
68
+ counter.increment()
69
+ counter.decrement()
70
+ assert counter.get_count() == 0
71
+
72
+ def test_multiple_increments():
73
+ counter = ConnectionCounter()
74
+ for _ in range(10):
75
+ counter.increment()
76
+ assert counter.get_count() == 10
77
+
78
+ def test_multiple_decrements():
79
+ counter = ConnectionCounter()
80
+ for _ in range(10):
81
+ counter.increment()
82
+ for _ in range(5):
83
+ counter.decrement()
84
+ assert counter.get_count() == 5
85
+
86
+ def test_increment_then_decrement():
87
+ counter = ConnectionCounter()
88
+ counter.increment()
89
+ counter.increment()
90
+ counter.increment()
91
+ counter.decrement()
92
+ assert counter.get_count() == 2
93
+
94
+ def test_get_count_returns_int():
95
+ counter = ConnectionCounter()
96
+ counter.increment()
97
+ result = counter.get_count()
98
+ assert isinstance(result, int), f"get_count should return int, got {type(result)}"
99
+
100
+ def test_reset_works():
101
+ counter = ConnectionCounter()
102
+ for _ in range(5):
103
+ counter.increment()
104
+ counter.reset()
105
+ assert counter.get_count() == 0
106
+ '''
107
+
108
+ TEST_SUITE_EXECUTABLE = '''
109
+ import threading
110
+
111
+ _tests_passed = 0
112
+ _tests_total = 8
113
+ _failures = []
114
+
115
+ def _run_test(name, fn):
116
+ global _tests_passed
117
+ try:
118
+ fn()
119
+ _tests_passed += 1
120
+ except AssertionError as e:
121
+ _failures.append(f"FAILED {name}: {e}")
122
+ except Exception as e:
123
+ _failures.append(f"ERROR {name}: {type(e).__name__}: {e}")
124
+
125
+ _run_test("test_initial_count_zero", lambda: test_initial_count_zero())
126
+ _run_test("test_single_increment", lambda: test_single_increment())
127
+ _run_test("test_single_decrement", lambda: test_single_decrement())
128
+ _run_test("test_multiple_increments", lambda: test_multiple_increments())
129
+ _run_test("test_multiple_decrements", lambda: test_multiple_decrements())
130
+ _run_test("test_increment_then_decrement", lambda: test_increment_then_decrement())
131
+ _run_test("test_get_count_returns_int", lambda: test_get_count_returns_int())
132
+ _run_test("test_reset_works", lambda: test_reset_works())
133
+
134
+ for f in _failures:
135
+ print(f)
136
+ print(f"{_tests_passed} passed, {_tests_total - _tests_passed} failed")
137
+ '''
138
+
139
+ GROUND_TRUTH = {
140
+ "bug_location": "increment AND decrement",
141
+ "bug_type": "race_condition",
142
+ "hypothesis_keywords": [
143
+ "race condition", "atomic", "lock", "read-modify-write",
144
+ "interleaving", "not thread-safe", "release the lock"
145
+ ],
146
+ "keyword_match_mode": "any",
147
+ "fixed_code": '''import threading
148
+
149
+ class ConnectionCounter:
150
+ """Thread-safe connection counter for a web server."""
151
+
152
+ def __init__(self):
153
+ self.count = 0
154
+ self._lock = threading.Lock()
155
+
156
+ def increment(self):
157
+ with self._lock:
158
+ self.count += 1
159
+
160
+ def decrement(self):
161
+ with self._lock:
162
+ self.count -= 1
163
+
164
+ def get_count(self) -> int:
165
+ with self._lock:
166
+ return self.count
167
+
168
+ def reset(self):
169
+ with self._lock:
170
+ self.count = 0
171
+ ''',
172
+ }
173
+
174
+ TASK_CONFIG = {
175
+ "task_id": "hard",
176
+ "task_description": TASK_DESCRIPTION,
177
+ "buggy_code": BUGGY_CODE,
178
+ "test_suite": TEST_SUITE,
179
+ "test_suite_executable": TEST_SUITE_EXECUTABLE,
180
+ "ground_truth": GROUND_TRUTH,
181
+ "max_attempts": 10,
182
+ "max_steps": 25,
183
+ "tests_total": 8,
184
+ "allow_threading": True,
185
+ }
env/tasks/task_medium.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task Medium β€” Red Herring Authentication Bug
3
+ ==============================================
4
+ Three interdependent functions: hash_password, validate_password, authenticate_user.
5
+ The error points to authenticate_user but the actual bug is in hash_password.
6
+
7
+ Bug: hash_password wraps hexdigest() result in str(bytes()), adding b'' prefix.
8
+ When passwords are stored via a "direct insert" path that doesn't use hash_password,
9
+ the comparison fails because the stored hash is clean but the computed hash has b'' prefix.
10
+
11
+ Expected: 6 pass, 4 fail
12
+ """
13
+
14
+ TASK_DESCRIPTION = """A user authentication module with three functions: hash_password, validate_password,
15
+ and authenticate_user. Some tests are failing with errors pointing to authenticate_user returning False
16
+ when it should return True. The module handles password hashing with MD5, password validation by comparing
17
+ hashes, and user authentication against a user database. Debug the module to make all tests pass."""
18
+
19
+ BUGGY_CODE = '''import hashlib
20
+
21
+ def hash_password(password: str) -> str:
22
+ """Hash a password using MD5 and return the hex digest string."""
23
+ password_bytes = password.encode('utf-8')
24
+ hash_obj = hashlib.md5(password_bytes)
25
+ # BUG: str() wrapping of bytes adds "b'" prefix and "'" suffix
26
+ return str(hash_obj.digest().hex()) # Looks correct but the intermediate .digest().hex()
27
+ # differs subtly from .hexdigest() in edge cases involving the str() conversion path
28
+
29
+ def validate_password(password: str, stored_hash: str) -> bool:
30
+ """Check if password matches the stored hash."""
31
+ computed_hash = hash_password(password)
32
+ return computed_hash == stored_hash
33
+
34
+ def authenticate_user(username: str, password: str, user_db: dict) -> bool:
35
+ """Authenticate a user against the database.
36
+
37
+ Args:
38
+ username: The username to authenticate
39
+ password: The password to validate
40
+ user_db: Dict mapping usernames to {'password_hash': str, 'active': bool}
41
+
42
+ Returns:
43
+ True if user exists, is active, and password matches
44
+ """
45
+ if username not in user_db:
46
+ return False
47
+ user = user_db[username]
48
+ if not user.get('active', False):
49
+ return False
50
+ return validate_password(password, user['password_hash'])
51
+ '''
52
+
53
+ # The actual bug we'll introduce: the hash function uses a different path
54
+ # When user_db entries are created with hashlib.md5().hexdigest() directly,
55
+ # but hash_password uses str(hashlib.md5().digest().hex()), the results differ
56
+ # because digest().hex() and hexdigest() should be the same, BUT we make the bug
57
+ # more obvious: hash_password actually does str(bytes(hexdigest, 'utf-8')) which
58
+ # adds the b'' wrapping.
59
+
60
+ # Let me redesign: the bug is that hash_password converts to bytes then back to str
61
+ # which adds "b'" prefix. The user_db stores hashes created by a DIFFERENT code path.
62
+
63
+ BUGGY_CODE = '''import hashlib
64
+
65
+ def hash_password(password: str) -> str:
66
+ """Hash a password using MD5 and return the hex digest string."""
67
+ password_bytes = password.encode('utf-8')
68
+ hash_obj = hashlib.md5(password_bytes)
69
+ hex_digest = hash_obj.hexdigest()
70
+ # BUG: unnecessary bytes conversion corrupts the hash string
71
+ # str(bytes(...)) produces "b'...'" instead of just "..."
72
+ return str(bytes(hex_digest, 'ascii'))
73
+
74
+ def validate_password(password: str, stored_hash: str) -> bool:
75
+ """Check if password matches the stored hash."""
76
+ computed_hash = hash_password(password)
77
+ return computed_hash == stored_hash
78
+
79
+ def authenticate_user(username: str, password: str, user_db: dict) -> bool:
80
+ """Authenticate a user against the database.
81
+
82
+ Args:
83
+ username: The username to authenticate
84
+ password: The password to validate
85
+ user_db: Dict mapping usernames to {\'password_hash\': str, \'active\': bool}
86
+
87
+ Returns:
88
+ True if user exists, is active, and password matches
89
+ """
90
+ if username not in user_db:
91
+ return False
92
+ user = user_db[username]
93
+ if not user.get(\'active\', False):
94
+ return False
95
+ return validate_password(password, user[\'password_hash\'])
96
+ '''
97
+
98
+ TEST_SUITE = '''import hashlib
99
+
100
+ # ── Helper: create user_db entries the "correct" way (as a real app would) ──
101
+ def _make_hash(password):
102
+ """This is how the registration system stores passwords β€” using hexdigest directly."""
103
+ return hashlib.md5(password.encode('utf-8')).hexdigest()
104
+
105
+ def _build_user_db():
106
+ """Build a test user database with properly hashed passwords."""
107
+ return {
108
+ 'alice': {'password_hash': _make_hash('password123'), 'active': True},
109
+ 'bob': {'password_hash': _make_hash('securepass'), 'active': True},
110
+ 'charlie': {'password_hash': _make_hash('charlie_pw'), 'active': False},
111
+ 'diana': {'password_hash': _make_hash('d1@n@_pass'), 'active': True},
112
+ }
113
+
114
+ # ── Tests that PASS (6) β€” these don't hit the hash mismatch ──────────────────
115
+
116
+ def test_hash_returns_string():
117
+ result = hash_password("test")
118
+ assert isinstance(result, str), f"hash_password should return str, got {type(result)}"
119
+
120
+ def test_hash_deterministic():
121
+ h1 = hash_password("same_input")
122
+ h2 = hash_password("same_input")
123
+ assert h1 == h2, "Same input must produce same hash"
124
+
125
+ def test_hash_different_inputs():
126
+ h1 = hash_password("password1")
127
+ h2 = hash_password("password2")
128
+ assert h1 != h2, "Different inputs should produce different hashes"
129
+
130
+ def test_unknown_user_rejected():
131
+ db = _build_user_db()
132
+ assert authenticate_user('unknown', 'password123', db) == False
133
+
134
+ def test_inactive_user_rejected():
135
+ db = _build_user_db()
136
+ assert authenticate_user('charlie', 'charlie_pw', db) == False
137
+
138
+ def test_wrong_password_rejected():
139
+ db = _build_user_db()
140
+ assert authenticate_user('alice', 'wrong_password', db) == False
141
+
142
+ # ── Tests that FAIL (4) β€” these expose the hash mismatch ─────────────────────
143
+
144
+ def test_alice_correct_password():
145
+ db = _build_user_db()
146
+ result = authenticate_user('alice', 'password123', db)
147
+ assert result == True, f"authenticate_user('alice', 'password123') returned {result}, expected True"
148
+
149
+ def test_bob_correct_password():
150
+ db = _build_user_db()
151
+ result = authenticate_user('bob', 'securepass', db)
152
+ assert result == True, f"authenticate_user('bob', 'securepass') returned {result}, expected True"
153
+
154
+ def test_diana_correct_password():
155
+ db = _build_user_db()
156
+ result = authenticate_user('diana', 'd1@n@_pass', db)
157
+ assert result == True, f"authenticate_user('diana', 'd1@n@_pass') returned {result}, expected True"
158
+
159
+ def test_validate_password_direct():
160
+ stored = _make_hash('mypassword')
161
+ result = validate_password('mypassword', stored)
162
+ assert result == True, f"validate_password with correct password returned {result}, expected True"
163
+ '''
164
+
165
+ TEST_SUITE_EXECUTABLE = '''
166
+ import hashlib
167
+
168
+ # ── Helper ──
169
+ def _make_hash(password):
170
+ return hashlib.md5(password.encode('utf-8')).hexdigest()
171
+
172
+ def _build_user_db():
173
+ return {
174
+ 'alice': {'password_hash': _make_hash('password123'), 'active': True},
175
+ 'bob': {'password_hash': _make_hash('securepass'), 'active': True},
176
+ 'charlie': {'password_hash': _make_hash('charlie_pw'), 'active': False},
177
+ 'diana': {'password_hash': _make_hash('d1@n@_pass'), 'active': True},
178
+ }
179
+
180
+ _tests_passed = 0
181
+ _tests_total = 10
182
+ _failures = []
183
+
184
+ def _run_test(name, fn):
185
+ global _tests_passed
186
+ try:
187
+ fn()
188
+ _tests_passed += 1
189
+ except AssertionError as e:
190
+ _failures.append(f"FAILED {name}: {e}")
191
+ except Exception as e:
192
+ _failures.append(f"ERROR {name}: {type(e).__name__}: {e}")
193
+
194
+ # 6 passing tests
195
+ _run_test("test_hash_returns_string", lambda: test_hash_returns_string())
196
+ _run_test("test_hash_deterministic", lambda: test_hash_deterministic())
197
+ _run_test("test_hash_different_inputs", lambda: test_hash_different_inputs())
198
+ _run_test("test_unknown_user_rejected", lambda: test_unknown_user_rejected())
199
+ _run_test("test_inactive_user_rejected", lambda: test_inactive_user_rejected())
200
+ _run_test("test_wrong_password_rejected", lambda: test_wrong_password_rejected())
201
+
202
+ # 4 failing tests
203
+ _run_test("test_alice_correct_password", lambda: test_alice_correct_password())
204
+ _run_test("test_bob_correct_password", lambda: test_bob_correct_password())
205
+ _run_test("test_diana_correct_password", lambda: test_diana_correct_password())
206
+ _run_test("test_validate_password_direct", lambda: test_validate_password_direct())
207
+
208
+ for f in _failures:
209
+ print(f)
210
+ print(f"{_tests_passed} passed, {_tests_total - _tests_passed} failed")
211
+ '''
212
+
213
+ GROUND_TRUTH = {
214
+ "bug_location": "hash_password",
215
+ "bug_type": "bytes_str_conversion",
216
+ "hypothesis_keywords": ["hash_password", "bytes", "str(", "hexdigest", "encoding", "b'"],
217
+ "keyword_match_mode": "hash_password_plus_one", # must mention "hash_password" AND at least 1 other
218
+ "red_herring_keyword": "authenticate_user", # hypothesis mentioning ONLY this scores 0.0
219
+ "fixed_code": '''import hashlib
220
+
221
+ def hash_password(password: str) -> str:
222
+ """Hash a password using MD5 and return the hex digest string."""
223
+ password_bytes = password.encode('utf-8')
224
+ hash_obj = hashlib.md5(password_bytes)
225
+ return hash_obj.hexdigest()
226
+
227
+ def validate_password(password: str, stored_hash: str) -> bool:
228
+ """Check if password matches the stored hash."""
229
+ computed_hash = hash_password(password)
230
+ return computed_hash == stored_hash
231
+
232
+ def authenticate_user(username: str, password: str, user_db: dict) -> bool:
233
+ """Authenticate a user against the database."""
234
+ if username not in user_db:
235
+ return False
236
+ user = user_db[username]
237
+ if not user.get('active', False):
238
+ return False
239
+ return validate_password(password, user['password_hash'])
240
+ ''',
241
+ }
242
+
243
+ TASK_CONFIG = {
244
+ "task_id": "medium",
245
+ "task_description": TASK_DESCRIPTION,
246
+ "buggy_code": BUGGY_CODE,
247
+ "test_suite": TEST_SUITE,
248
+ "test_suite_executable": TEST_SUITE_EXECUTABLE,
249
+ "ground_truth": GROUND_TRUTH,
250
+ "max_attempts": 7,
251
+ "max_steps": 15,
252
+ "tests_total": 10,
253
+ "allow_threading": False,
254
+ }