tracefix_rl / tasks /tasks.py
databoysu
reorganization done
8f66ad1
"""
tasks.py β€” Static Task Registry
================================
This is a "dumb" registry. Tasks are hardcoded dicts representing
curated buggy programs generated offline via MutationEngine.
Exported symbols:
TASKS_BY_DIFFICULTY Dict[str, List[Dict]] β€” tasks grouped by tier
ALL_TASKS List[Dict] β€” flat list for random sampling
Run mutation_engine.py + dataset_generator.py locally (offline) to
generate new candidates, curate the best ones, and add them here.
"""
from __future__ import annotations
from typing import Any, Callable, Dict, List
# ---------------------------------------------------------------------------
# Test helpers (module-level; accept namespace dict, raise AssertionError)
# ---------------------------------------------------------------------------
# ── sum_even_numbers ────────────────────────────────────────────────────────
def _tse_1(ns):
res = ns["sum_even_numbers"]([1, 2, 3, 4])
assert res == 6, f"Test failed: input=[1, 2, 3, 4], expected=6, got={res}"
def _tse_2(ns):
res = ns["sum_even_numbers"]([])
assert res == 0, f"Test failed: input=[], expected=0, got={res}"
def _tse_3(ns):
res = ns["sum_even_numbers"]([1, 3, 5])
assert res == 0, f"Test failed: input=[1, 3, 5], expected=0, got={res}"
def _tse_4(ns):
res = ns["sum_even_numbers"]([2, 2, 2])
assert res == 6, f"Test failed: input=[2, 2, 2], expected=6, got={res}"
# ── reverse_string ──────────────────────────────────────────────────────────
def _trs_1(ns):
res = ns["reverse_string"]("abc")
assert res == "cba", f"Test failed: input='abc', expected='cba', got={res!r}"
def _trs_2(ns):
res = ns["reverse_string"]("")
assert res == "", f"Test failed: input='', expected='', got={res!r}"
def _trs_3(ns):
res = ns["reverse_string"]("a")
assert res == "a", f"Test failed: input='a', expected='a', got={res!r}"
def _trs_4(ns):
res = ns["reverse_string"]("abcd")
assert res == "dcba", f"Test failed: input='abcd', expected='dcba', got={res!r}"
# ── binary_search ───────────────────────────────────────────────────────────
def _tbs_1(ns):
res = ns["binary_search"]([1, 2, 3, 4, 5], 3)
assert res == 2, f"Test failed: input=([1, 2, 3, 4, 5], 3), expected=2, got={res}"
def _tbs_2(ns):
res = ns["binary_search"]([1, 2, 3, 4, 5], 6)
assert res == -1, f"Test failed: input=([1, 2, 3, 4, 5], 6), expected=-1, got={res}"
def _tbs_3(ns):
res = ns["binary_search"]([], 1)
assert res == -1, f"Test failed: input=([], 1), expected=-1, got={res}"
def _tbs_4(ns):
res = ns["binary_search"]([7], 7)
assert res == 0, f"Test failed: input=([7], 7), expected=0, got={res}"
# ── flatten ─────────────────────────────────────────────────────────────────
def _tfl_1(ns):
res = ns["flatten"]([1, [2, 3]])
assert res == [1, 2, 3], f"Test failed: input=[1, [2, 3]], expected=[1, 2, 3], got={res}"
def _tfl_2(ns):
res = ns["flatten"]([])
assert res == [], f"Test failed: input=[], expected=[], got={res}"
def _tfl_3(ns):
res = ns["flatten"]([1, [2, [3]]])
assert res == [1, 2, 3], f"Test failed: input=[1, [2, [3]]], expected=[1, 2, 3], got={res}"
def _tfl_4(ns):
res = ns["flatten"]([[1], [2, 3], [4]])
assert res == [1, 2, 3, 4], f"Test failed: input=[[1], [2, 3], [4]], expected=[1, 2, 3, 4], got={res}"
# ── word_count ──────────────────────────────────────────────────────────────
def _twc_1(ns):
res = ns["word_count"]("hello world hello")
assert res == {"hello": 2, "world": 1}, f"Test failed: input='hello world hello', expected={{'hello': 2, 'world': 1}}, got={res}"
def _twc_2(ns):
res = ns["word_count"]("Hi, hi!")
assert res == {"hi": 2}, f"Test failed: input='Hi, hi!', expected={{'hi': 2}}, got={res}"
def _twc_3(ns):
res = ns["word_count"]("")
assert res == {}, f"Test failed: input='', expected={{}}, got={res}"
def _twc_4(ns):
res = ns["word_count"]("Hello HELLO hello")
assert res == {"hello": 3}, f"Test failed: input='Hello HELLO hello', expected={{'hello': 3}}, got={res}"
# ── lru_cache ───────────────────────────────────────────────────────────────
def _tlru_1(ns):
C = ns["LRUCache"]
c = C(2); c.put(1, 1); c.put(2, 2)
res = c.get(1)
assert res == 1, f"Test failed: Capacity 2. Added (1,1), then (2,2). Expected get(1) to be 1, got {res}"
def _tlru_2(ns):
C = ns["LRUCache"]
c = C(1); c.put(1, 1); c.put(2, 2)
res = c.get(1)
assert res == -1, f"Test failed: Capacity 1. Added (1,1), then (2,2). Expected key 1 to be evicted (return -1), got {res}"
def _tlru_3(ns):
C = ns["LRUCache"]
c = C(2); c.put(1, 1); c.put(2, 2); c.get(1); c.put(3, 3)
res = c.get(2)
assert res == -1, f"Test failed: Capacity 2. Added (1,1), then (2,2), got(1), added (3,3). Expected key 2 to be evicted (return -1) since 1 was promoted during get(1), got {res}. Did you promote key 1 during get()?"
# ── valid_parentheses ────────────────────────────────────────────────────────
def _tvp_1(ns):
res = ns["is_valid"]("()")
assert res == True, f"Test failed: input='()', expected=True, got={res}"
def _tvp_2(ns):
res = ns["is_valid"]("(]")
assert res == False, f"Test failed: input='(]', expected=False, got={res}"
def _tvp_3(ns):
res = ns["is_valid"]("([{}])")
assert res == True, f"Test failed: input='([{{}}])', expected=True, got={res}"
def _tvp_4(ns):
res = ns["is_valid"]("")
assert res == True, f"Test failed: input='', expected=True, got={res}"
# ── merge_intervals ──────────────────────────────────────────────────────────
def _tmi_1(ns):
res = ns["merge_intervals"]([[1, 3], [2, 6]])
assert res == [[1, 6]], f"Test failed: input=[[1, 3], [2, 6]], expected=[[1, 6]], got={res}"
def _tmi_2(ns):
res = ns["merge_intervals"]([[1, 4], [4, 5]])
assert res == [[1, 5]], f"Test failed: input=[[1, 4], [4, 5]], expected=[[1, 5]], got={res}"
def _tmi_3(ns):
res = ns["merge_intervals"]([[1, 2], [3, 4]])
assert res == [[1, 2], [3, 4]], f"Test failed: input=[[1, 2], [3, 4]], expected=[[1, 2], [3, 4]], got={res}"
# ---------------------------------------------------------------------------
# Static task registry
# ---------------------------------------------------------------------------
def _t(name, description, code, solution, tests, difficulty, bug_type):
return dict(
name=name, description=description,
code=code, solution=solution,
tests=tests, difficulty=difficulty, bug_type=bug_type,
)
# ── EASY ──────────────────────────────────────────────────────────────────
TASK_SUM_EVEN_WRONG_OP = _t(
name="sum_even_wrong_condition",
description="Debug the sum_even_numbers function so it passes all tests.",
difficulty="easy",
bug_type="wrong_operator",
code=[
"def sum_even_numbers(nums):",
" total = 0",
" for n in nums:",
" if n % 2 != 0:",
" total += n",
" return total",
],
solution=[
"def sum_even_numbers(nums):",
" total = 0",
" for n in nums:",
" if n % 2 == 0:",
" total += n",
" return total",
],
tests=[_tse_1, _tse_2, _tse_3, _tse_4],
)
TASK_SUM_EVEN_MISSING_INIT = _t(
name="sum_even_missing_accumulator",
description="Debug the sum_even_numbers function so it passes all tests.",
difficulty="easy",
bug_type="wrong_operator",
code=[
"def sum_even_numbers(nums):",
" total = 0",
" for n in nums:",
" if n % 2 == 0:",
" total -= n",
" return total",
],
solution=[
"def sum_even_numbers(nums):",
" total = 0",
" for n in nums:",
" if n % 2 == 0:",
" total += n",
" return total",
],
tests=[_tse_1, _tse_2, _tse_3, _tse_4],
)
TASK_REVERSE_WRONG_STEP = _t(
name="reverse_string_wrong_step",
description="Debug the reverse_string function so it passes all tests.",
difficulty="easy",
bug_type="off_by_one",
code=[
"def reverse_string(s):",
" return s[::-2]",
],
solution=[
"def reverse_string(s):",
" return s[::-1]",
],
tests=[_trs_1, _trs_2, _trs_3, _trs_4],
)
TASK_REVERSE_NO_REVERSE = _t(
name="reverse_string_returns_original",
description="Debug the reverse_string function so it passes all tests.",
difficulty="easy",
bug_type="wrong_operator",
code=[
"def reverse_string(s):",
" return s[::1]",
],
solution=[
"def reverse_string(s):",
" return s[::-1]",
],
tests=[_trs_1, _trs_2, _trs_3, _trs_4],
)
# ── MEDIUM ────────────────────────────────────────────────────────────────
TASK_BS_OFF_BY_ONE = _t(
name="binary_search_off_by_one",
description="Debug the binary_search function so it passes all tests.",
difficulty="medium",
bug_type="off_by_one",
code=[
"def binary_search(arr, target):",
" left, right = 0, len(arr)",
" while left <= right:",
" mid = (left + right) // 2",
" if arr[mid] == target:",
" return mid",
" elif arr[mid] < target:",
" left = mid + 1",
" else:",
" right = mid - 1",
" return -1",
],
solution=[
"def binary_search(arr, target):",
" left, right = 0, len(arr) - 1",
" while left <= right:",
" mid = (left + right) // 2",
" if arr[mid] == target:",
" return mid",
" elif arr[mid] < target:",
" left = mid + 1",
" else:",
" right = mid - 1",
" return -1",
],
tests=[_tbs_1, _tbs_2, _tbs_3, _tbs_4],
)
TASK_BS_WRONG_MID = _t(
name="binary_search_wrong_mid",
description="Debug the binary_search function so it passes all tests.",
difficulty="medium",
bug_type="wrong_operator",
code=[
"def binary_search(arr, target):",
" left, right = 0, len(arr) - 1",
" while left <= right:",
" mid = left + right",
" if mid >= len(arr):",
" return -1",
" if arr[mid] == target:",
" return mid",
" elif arr[mid] < target:",
" left = mid + 1",
" else:",
" right = mid - 1",
" return -1",
],
solution=[
"def binary_search(arr, target):",
" left, right = 0, len(arr) - 1",
" while left <= right:",
" mid = (left + right) // 2",
" if arr[mid] == target:",
" return mid",
" elif arr[mid] < target:",
" left = mid + 1",
" else:",
" right = mid - 1",
" return -1",
],
tests=[_tbs_1, _tbs_2, _tbs_3, _tbs_4],
)
TASK_FLATTEN_APPEND = _t(
name="flatten_missing_recursion",
description="Debug the flatten function so it passes all tests.",
difficulty="medium",
bug_type="wrong_function_call",
code=[
"def flatten(lst):",
" result = []",
" for item in lst:",
" if isinstance(item, list):",
" result.append(item)",
" else:",
" result.append(item)",
" return result",
],
solution=[
"def flatten(lst):",
" result = []",
" for item in lst:",
" if isinstance(item, list):",
" result.extend(flatten(item))",
" else:",
" result.append(item)",
" return result",
],
tests=[_tfl_1, _tfl_2, _tfl_3, _tfl_4],
)
TASK_FLATTEN_LOGIC_INVERT = _t(
name="flatten_inverted_branch",
description="Debug the flatten function so it passes all tests.",
difficulty="medium",
bug_type="logic_inversion",
code=[
"def flatten(lst):",
" result = []",
" for item in lst:",
" if not isinstance(item, list):",
" result.extend(flatten(item))",
" else:",
" result.append(item)",
" return result",
],
solution=[
"def flatten(lst):",
" result = []",
" for item in lst:",
" if isinstance(item, list):",
" result.extend(flatten(item))",
" else:",
" result.append(item)",
" return result",
],
tests=[_tfl_1, _tfl_2, _tfl_3, _tfl_4],
)
TASK_WC_NO_LOWER = _t(
name="word_count_no_lower",
description="Debug the word_count function so it passes all tests.",
difficulty="medium",
bug_type="missing_return",
code=[
"import string",
"def word_count(text):",
" for p in string.punctuation:",
" text = text.replace(p, '')",
" words = text.split()",
" counts = {}",
" for w in words:",
" counts[w] = counts.get(w, 0) + 1",
" return counts",
],
solution=[
"import string",
"def word_count(text):",
" text = text.lower()",
" for p in string.punctuation:",
" text = text.replace(p, '')",
" words = text.split()",
" counts = {}",
" for w in words:",
" counts[w] = counts.get(w, 0) + 1",
" return counts",
],
tests=[_twc_1, _twc_2, _twc_3, _twc_4],
)
TASK_WC_NO_PUNCT = _t(
name="word_count_no_punct_strip",
description="Debug the word_count function so it passes all tests.",
difficulty="medium",
bug_type="missing_return",
code=[
"def word_count(text):",
" text = text.lower()",
" words = text.split()",
" counts = {}",
" for w in words:",
" counts[w] = counts.get(w, 0) + 1",
" return counts",
],
solution=[
"import string",
"def word_count(text):",
" text = text.lower()",
" for p in string.punctuation:",
" text = text.replace(p, '')",
" words = text.split()",
" counts = {}",
" for w in words:",
" counts[w] = counts.get(w, 0) + 1",
" return counts",
],
tests=[_twc_1, _twc_2, _twc_3, _twc_4],
)
# ── HARD ──────────────────────────────────────────────────────────────────
TASK_LRU_WRONG_EVICT = _t(
name="lru_cache_wrong_eviction",
description="Debug the LRUCache function so it passes all tests.",
difficulty="hard",
bug_type="off_by_one",
code=[
"class LRUCache:",
" def __init__(self, capacity):",
" self.capacity = capacity",
" self.cache = []",
" def get(self, key):",
" for i, (k, v) in enumerate(self.cache):",
" if k == key:",
" self.cache.append(self.cache.pop(i))",
" return v",
" return -1",
" def put(self, key, value):",
" for i, (k, _) in enumerate(self.cache):",
" if k == key:",
" self.cache.pop(i)",
" break",
" if len(self.cache) >= self.capacity:",
" self.cache.pop(0)",
" self.cache.append((key, value))",
],
solution=[
"class LRUCache:",
" def __init__(self, capacity):",
" self.capacity = capacity",
" self.cache = []",
" def get(self, key):",
" for i, (k, v) in enumerate(self.cache):",
" if k == key:",
" self.cache.append(self.cache.pop(i))",
" return v",
" return -1",
" def put(self, key, value):",
" for i, (k, _) in enumerate(self.cache):",
" if k == key:",
" self.cache.pop(i)",
" break",
" if len(self.cache) >= self.capacity:",
" self.cache.pop(0)",
" self.cache.append((key, value))",
],
tests=[_tlru_1, _tlru_2, _tlru_3],
)
TASK_LRU_NO_PROMOTE = _t(
name="lru_cache_no_promotion",
description="Debug the LRUCache function so it passes all tests.",
difficulty="hard",
bug_type="missing_return",
code=[
"class LRUCache:",
" def __init__(self, capacity):",
" self.capacity = capacity",
" self.cache = []",
" def get(self, key):",
" for i, (k, v) in enumerate(self.cache):",
" if k == key:",
" return v",
" return -1",
" def put(self, key, value):",
" for i, (k, _) in enumerate(self.cache):",
" if k == key:",
" self.cache.pop(i)",
" break",
" if len(self.cache) >= self.capacity:",
" self.cache.pop(0)",
" self.cache.append((key, value))",
],
solution=[
"class LRUCache:",
" def __init__(self, capacity):",
" self.capacity = capacity",
" self.cache = []",
" def get(self, key):",
" for i, (k, v) in enumerate(self.cache):",
" if k == key:",
" self.cache.append(self.cache.pop(i))",
" return v",
" return -1",
" def put(self, key, value):",
" for i, (k, _) in enumerate(self.cache):",
" if k == key:",
" self.cache.pop(i)",
" break",
" if len(self.cache) >= self.capacity:",
" self.cache.pop(0)",
" self.cache.append((key, value))",
],
tests=[_tlru_1, _tlru_2, _tlru_3],
)
TASK_VP_WRONG_MAPPING = _t(
name="valid_parentheses_wrong_mapping",
description="Debug the is_valid function so it passes all tests.",
difficulty="hard",
bug_type="wrong_operator",
code=[
"def is_valid(s):",
" stack = []",
" mapping = {')': '[', ']': '{', '}': '('}",
" for c in s:",
" if c in mapping.values():",
" stack.append(c)",
" elif c in mapping:",
" if not stack or stack.pop() != mapping[c]:",
" return False",
" return len(stack) == 0",
],
solution=[
"def is_valid(s):",
" stack = []",
" mapping = {')': '(', ']': '[', '}': '{'}",
" for c in s:",
" if c in mapping.values():",
" stack.append(c)",
" elif c in mapping:",
" if not stack or stack.pop() != mapping[c]:",
" return False",
" return len(stack) == 0",
],
tests=[_tvp_1, _tvp_2, _tvp_3, _tvp_4],
)
TASK_VP_MISSING_EMPTY_CHECK = _t(
name="valid_parentheses_no_empty_check",
description="Debug the is_valid function so it passes all tests.",
difficulty="hard",
bug_type="logic_inversion",
code=[
"def is_valid(s):",
" stack = []",
" mapping = {')': '(', ']': '[', '}': '{'}",
" for c in s:",
" if c in mapping.values():",
" stack.append(c)",
" elif c in mapping:",
" if stack.pop() != mapping[c]:",
" return False",
" return len(stack) == 0",
],
solution=[
"def is_valid(s):",
" stack = []",
" mapping = {')': '(', ']': '[', '}': '{'}",
" for c in s:",
" if c in mapping.values():",
" stack.append(c)",
" elif c in mapping:",
" if not stack or stack.pop() != mapping[c]:",
" return False",
" return len(stack) == 0",
],
tests=[_tvp_1, _tvp_2, _tvp_3, _tvp_4],
)
TASK_MI_STRICT_OVERLAP = _t(
name="merge_intervals_strict_overlap",
description="Debug the merge_intervals function so it passes all tests.",
difficulty="hard",
bug_type="wrong_operator",
code=[
"def merge_intervals(intervals):",
" intervals.sort()",
" merged = []",
" for interval in intervals:",
" if not merged or merged[-1][1] < interval[0]:",
" merged.append(list(interval))",
" else:",
" merged[-1][1] = max(merged[-1][1], interval[1])",
" return merged",
],
solution=[
"def merge_intervals(intervals):",
" intervals.sort()",
" merged = []",
" for interval in intervals:",
" if not merged or merged[-1][1] < interval[0]:",
" merged.append(list(interval))",
" else:",
" merged[-1][1] = max(merged[-1][1], interval[1])",
" return merged",
],
tests=[_tmi_1, _tmi_2, _tmi_3],
)
TASK_MI_NO_SORT = _t(
name="merge_intervals_missing_sort",
description="Debug the merge_intervals function so it passes all tests.",
difficulty="hard",
bug_type="missing_return",
code=[
"def merge_intervals(intervals):",
" merged = []",
" for interval in intervals:",
" if not merged or merged[-1][1] < interval[0]:",
" merged.append(list(interval))",
" else:",
" merged[-1][1] = max(merged[-1][1], interval[1])",
" return merged",
],
solution=[
"def merge_intervals(intervals):",
" intervals.sort()",
" merged = []",
" for interval in intervals:",
" if not merged or merged[-1][1] < interval[0]:",
" merged.append(list(interval))",
" else:",
" merged[-1][1] = max(merged[-1][1], interval[1])",
" return merged",
],
tests=[_tmi_1, _tmi_2, _tmi_3],
)
# ---------------------------------------------------------------------------
# Registries
# ---------------------------------------------------------------------------
TASKS_BY_DIFFICULTY: Dict[str, List[Dict]] = {
"easy": [
TASK_SUM_EVEN_WRONG_OP,
TASK_SUM_EVEN_MISSING_INIT,
TASK_REVERSE_WRONG_STEP,
TASK_REVERSE_NO_REVERSE,
],
"medium": [
TASK_BS_OFF_BY_ONE,
TASK_BS_WRONG_MID,
TASK_FLATTEN_APPEND,
TASK_FLATTEN_LOGIC_INVERT,
TASK_WC_NO_LOWER,
TASK_WC_NO_PUNCT,
],
"hard": [
TASK_LRU_WRONG_EVICT,
TASK_LRU_NO_PROMOTE,
TASK_VP_WRONG_MAPPING,
TASK_VP_MISSING_EMPTY_CHECK,
TASK_MI_STRICT_OVERLAP,
TASK_MI_NO_SORT,
],
}
# Flat list β€” used for random sampling when training_step is not set
ALL_TASKS: List[Dict] = [
t for bucket in TASKS_BY_DIFFICULTY.values() for t in bucket
]