Spaces:

SolusOps
/

tracefix_rl

Sleeping

databoysu

reorganization done

8f66ad1 27 days ago

24.9 kB

	"""
	tasks.py — Static Task Registry
	================================
	This is a "dumb" registry. Tasks are hardcoded dicts representing
	curated buggy programs generated offline via MutationEngine.

	Exported symbols:
	TASKS_BY_DIFFICULTY Dict[str, List[Dict]] — tasks grouped by tier
	ALL_TASKS List[Dict] — flat list for random sampling

	Run mutation_engine.py + dataset_generator.py locally (offline) to
	generate new candidates, curate the best ones, and add them here.
	"""

	from __future__ import annotations
	from typing import Any, Callable, Dict, List


	# ---------------------------------------------------------------------------
	# Test helpers (module-level; accept namespace dict, raise AssertionError)
	# ---------------------------------------------------------------------------

	# ── sum_even_numbers ────────────────────────────────────────────────────────

	def _tse_1(ns):
	res = ns["sum_even_numbers"]([1, 2, 3, 4])
	assert res == 6, f"Test failed: input=[1, 2, 3, 4], expected=6, got={res}"
	def _tse_2(ns):
	res = ns["sum_even_numbers"]([])
	assert res == 0, f"Test failed: input=[], expected=0, got={res}"
	def _tse_3(ns):
	res = ns["sum_even_numbers"]([1, 3, 5])
	assert res == 0, f"Test failed: input=[1, 3, 5], expected=0, got={res}"
	def _tse_4(ns):
	res = ns["sum_even_numbers"]([2, 2, 2])
	assert res == 6, f"Test failed: input=[2, 2, 2], expected=6, got={res}"

	# ── reverse_string ──────────────────────────────────────────────────────────

	def _trs_1(ns):
	res = ns["reverse_string"]("abc")
	assert res == "cba", f"Test failed: input='abc', expected='cba', got={res!r}"
	def _trs_2(ns):
	res = ns["reverse_string"]("")
	assert res == "", f"Test failed: input='', expected='', got={res!r}"
	def _trs_3(ns):
	res = ns["reverse_string"]("a")
	assert res == "a", f"Test failed: input='a', expected='a', got={res!r}"
	def _trs_4(ns):
	res = ns["reverse_string"]("abcd")
	assert res == "dcba", f"Test failed: input='abcd', expected='dcba', got={res!r}"

	# ── binary_search ───────────────────────────────────────────────────────────

	def _tbs_1(ns):
	res = ns["binary_search"]([1, 2, 3, 4, 5], 3)
	assert res == 2, f"Test failed: input=([1, 2, 3, 4, 5], 3), expected=2, got={res}"
	def _tbs_2(ns):
	res = ns["binary_search"]([1, 2, 3, 4, 5], 6)
	assert res == -1, f"Test failed: input=([1, 2, 3, 4, 5], 6), expected=-1, got={res}"
	def _tbs_3(ns):
	res = ns["binary_search"]([], 1)
	assert res == -1, f"Test failed: input=([], 1), expected=-1, got={res}"
	def _tbs_4(ns):
	res = ns["binary_search"]([7], 7)
	assert res == 0, f"Test failed: input=([7], 7), expected=0, got={res}"

	# ── flatten ─────────────────────────────────────────────────────────────────

	def _tfl_1(ns):
	res = ns["flatten"]([1, [2, 3]])
	assert res == [1, 2, 3], f"Test failed: input=[1, [2, 3]], expected=[1, 2, 3], got={res}"
	def _tfl_2(ns):
	res = ns["flatten"]([])
	assert res == [], f"Test failed: input=[], expected=[], got={res}"
	def _tfl_3(ns):
	res = ns["flatten"]([1, [2, [3]]])
	assert res == [1, 2, 3], f"Test failed: input=[1, [2, [3]]], expected=[1, 2, 3], got={res}"
	def _tfl_4(ns):
	res = ns["flatten"]([[1], [2, 3], [4]])
	assert res == [1, 2, 3, 4], f"Test failed: input=[[1], [2, 3], [4]], expected=[1, 2, 3, 4], got={res}"

	# ── word_count ──────────────────────────────────────────────────────────────

	def _twc_1(ns):
	res = ns["word_count"]("hello world hello")
	assert res == {"hello": 2, "world": 1}, f"Test failed: input='hello world hello', expected={{'hello': 2, 'world': 1}}, got={res}"
	def _twc_2(ns):
	res = ns["word_count"]("Hi, hi!")
	assert res == {"hi": 2}, f"Test failed: input='Hi, hi!', expected={{'hi': 2}}, got={res}"
	def _twc_3(ns):
	res = ns["word_count"]("")
	assert res == {}, f"Test failed: input='', expected={{}}, got={res}"
	def _twc_4(ns):
	res = ns["word_count"]("Hello HELLO hello")
	assert res == {"hello": 3}, f"Test failed: input='Hello HELLO hello', expected={{'hello': 3}}, got={res}"

	# ── lru_cache ───────────────────────────────────────────────────────────────

	def _tlru_1(ns):
	C = ns["LRUCache"]
	c = C(2); c.put(1, 1); c.put(2, 2)
	res = c.get(1)
	assert res == 1, f"Test failed: Capacity 2. Added (1,1), then (2,2). Expected get(1) to be 1, got {res}"

	def _tlru_2(ns):
	C = ns["LRUCache"]
	c = C(1); c.put(1, 1); c.put(2, 2)
	res = c.get(1)
	assert res == -1, f"Test failed: Capacity 1. Added (1,1), then (2,2). Expected key 1 to be evicted (return -1), got {res}"

	def _tlru_3(ns):
	C = ns["LRUCache"]
	c = C(2); c.put(1, 1); c.put(2, 2); c.get(1); c.put(3, 3)
	res = c.get(2)
	assert res == -1, f"Test failed: Capacity 2. Added (1,1), then (2,2), got(1), added (3,3). Expected key 2 to be evicted (return -1) since 1 was promoted during get(1), got {res}. Did you promote key 1 during get()?"

	# ── valid_parentheses ────────────────────────────────────────────────────────

	def _tvp_1(ns):
	res = ns["is_valid"]("()")
	assert res == True, f"Test failed: input='()', expected=True, got={res}"
	def _tvp_2(ns):
	res = ns["is_valid"]("(]")
	assert res == False, f"Test failed: input='(]', expected=False, got={res}"
	def _tvp_3(ns):
	res = ns["is_valid"]("([{}])")
	assert res == True, f"Test failed: input='([{{}}])', expected=True, got={res}"
	def _tvp_4(ns):
	res = ns["is_valid"]("")
	assert res == True, f"Test failed: input='', expected=True, got={res}"

	# ── merge_intervals ──────────────────────────────────────────────────────────

	def _tmi_1(ns):
	res = ns["merge_intervals"]([[1, 3], [2, 6]])
	assert res == [[1, 6]], f"Test failed: input=[[1, 3], [2, 6]], expected=[[1, 6]], got={res}"
	def _tmi_2(ns):
	res = ns["merge_intervals"]([[1, 4], [4, 5]])
	assert res == [[1, 5]], f"Test failed: input=[[1, 4], [4, 5]], expected=[[1, 5]], got={res}"
	def _tmi_3(ns):
	res = ns["merge_intervals"]([[1, 2], [3, 4]])
	assert res == [[1, 2], [3, 4]], f"Test failed: input=[[1, 2], [3, 4]], expected=[[1, 2], [3, 4]], got={res}"


	# ---------------------------------------------------------------------------
	# Static task registry
	# ---------------------------------------------------------------------------

	def _t(name, description, code, solution, tests, difficulty, bug_type):
	return dict(
	name=name, description=description,
	code=code, solution=solution,
	tests=tests, difficulty=difficulty, bug_type=bug_type,
	)


	# ── EASY ──────────────────────────────────────────────────────────────────

	TASK_SUM_EVEN_WRONG_OP = _t(
	name="sum_even_wrong_condition",
	description="Debug the sum_even_numbers function so it passes all tests.",
	difficulty="easy",
	bug_type="wrong_operator",
	code=[
	"def sum_even_numbers(nums):",
	" total = 0",
	" for n in nums:",
	" if n % 2 != 0:",
	" total += n",
	" return total",
	],
	solution=[
	"def sum_even_numbers(nums):",
	" total = 0",
	" for n in nums:",
	" if n % 2 == 0:",
	" total += n",
	" return total",
	],
	tests=[_tse_1, _tse_2, _tse_3, _tse_4],
	)

	TASK_SUM_EVEN_MISSING_INIT = _t(
	name="sum_even_missing_accumulator",
	description="Debug the sum_even_numbers function so it passes all tests.",
	difficulty="easy",
	bug_type="wrong_operator",
	code=[
	"def sum_even_numbers(nums):",
	" total = 0",
	" for n in nums:",
	" if n % 2 == 0:",
	" total -= n",
	" return total",
	],
	solution=[
	"def sum_even_numbers(nums):",
	" total = 0",
	" for n in nums:",
	" if n % 2 == 0:",
	" total += n",
	" return total",
	],
	tests=[_tse_1, _tse_2, _tse_3, _tse_4],
	)

	TASK_REVERSE_WRONG_STEP = _t(
	name="reverse_string_wrong_step",
	description="Debug the reverse_string function so it passes all tests.",
	difficulty="easy",
	bug_type="off_by_one",
	code=[
	"def reverse_string(s):",
	" return s[::-2]",
	],
	solution=[
	"def reverse_string(s):",
	" return s[::-1]",
	],
	tests=[_trs_1, _trs_2, _trs_3, _trs_4],
	)

	TASK_REVERSE_NO_REVERSE = _t(
	name="reverse_string_returns_original",
	description="Debug the reverse_string function so it passes all tests.",
	difficulty="easy",
	bug_type="wrong_operator",
	code=[
	"def reverse_string(s):",
	" return s[::1]",
	],
	solution=[
	"def reverse_string(s):",
	" return s[::-1]",
	],
	tests=[_trs_1, _trs_2, _trs_3, _trs_4],
	)


	# ── MEDIUM ────────────────────────────────────────────────────────────────

	TASK_BS_OFF_BY_ONE = _t(
	name="binary_search_off_by_one",
	description="Debug the binary_search function so it passes all tests.",
	difficulty="medium",
	bug_type="off_by_one",
	code=[
	"def binary_search(arr, target):",
	" left, right = 0, len(arr)",
	" while left <= right:",
	" mid = (left + right) // 2",
	" if arr[mid] == target:",
	" return mid",
	" elif arr[mid] < target:",
	" left = mid + 1",
	" else:",
	" right = mid - 1",
	" return -1",
	],
	solution=[
	"def binary_search(arr, target):",
	" left, right = 0, len(arr) - 1",
	" while left <= right:",
	" mid = (left + right) // 2",
	" if arr[mid] == target:",
	" return mid",
	" elif arr[mid] < target:",
	" left = mid + 1",
	" else:",
	" right = mid - 1",
	" return -1",
	],
	tests=[_tbs_1, _tbs_2, _tbs_3, _tbs_4],
	)

	TASK_BS_WRONG_MID = _t(
	name="binary_search_wrong_mid",
	description="Debug the binary_search function so it passes all tests.",
	difficulty="medium",
	bug_type="wrong_operator",
	code=[
	"def binary_search(arr, target):",
	" left, right = 0, len(arr) - 1",
	" while left <= right:",
	" mid = left + right",
	" if mid >= len(arr):",
	" return -1",
	" if arr[mid] == target:",
	" return mid",
	" elif arr[mid] < target:",
	" left = mid + 1",
	" else:",
	" right = mid - 1",
	" return -1",
	],
	solution=[
	"def binary_search(arr, target):",
	" left, right = 0, len(arr) - 1",
	" while left <= right:",
	" mid = (left + right) // 2",
	" if arr[mid] == target:",
	" return mid",
	" elif arr[mid] < target:",
	" left = mid + 1",
	" else:",
	" right = mid - 1",
	" return -1",
	],
	tests=[_tbs_1, _tbs_2, _tbs_3, _tbs_4],
	)

	TASK_FLATTEN_APPEND = _t(
	name="flatten_missing_recursion",
	description="Debug the flatten function so it passes all tests.",
	difficulty="medium",
	bug_type="wrong_function_call",
	code=[
	"def flatten(lst):",
	" result = []",
	" for item in lst:",
	" if isinstance(item, list):",
	" result.append(item)",
	" else:",
	" result.append(item)",
	" return result",
	],
	solution=[
	"def flatten(lst):",
	" result = []",
	" for item in lst:",
	" if isinstance(item, list):",
	" result.extend(flatten(item))",
	" else:",
	" result.append(item)",
	" return result",
	],
	tests=[_tfl_1, _tfl_2, _tfl_3, _tfl_4],
	)

	TASK_FLATTEN_LOGIC_INVERT = _t(
	name="flatten_inverted_branch",
	description="Debug the flatten function so it passes all tests.",
	difficulty="medium",
	bug_type="logic_inversion",
	code=[
	"def flatten(lst):",
	" result = []",
	" for item in lst:",
	" if not isinstance(item, list):",
	" result.extend(flatten(item))",
	" else:",
	" result.append(item)",
	" return result",
	],
	solution=[
	"def flatten(lst):",
	" result = []",
	" for item in lst:",
	" if isinstance(item, list):",
	" result.extend(flatten(item))",
	" else:",
	" result.append(item)",
	" return result",
	],
	tests=[_tfl_1, _tfl_2, _tfl_3, _tfl_4],
	)

	TASK_WC_NO_LOWER = _t(
	name="word_count_no_lower",
	description="Debug the word_count function so it passes all tests.",
	difficulty="medium",
	bug_type="missing_return",
	code=[
	"import string",
	"def word_count(text):",
	" for p in string.punctuation:",
	" text = text.replace(p, '')",
	" words = text.split()",
	" counts = {}",
	" for w in words:",
	" counts[w] = counts.get(w, 0) + 1",
	" return counts",
	],
	solution=[
	"import string",
	"def word_count(text):",
	" text = text.lower()",
	" for p in string.punctuation:",
	" text = text.replace(p, '')",
	" words = text.split()",
	" counts = {}",
	" for w in words:",
	" counts[w] = counts.get(w, 0) + 1",
	" return counts",
	],
	tests=[_twc_1, _twc_2, _twc_3, _twc_4],
	)

	TASK_WC_NO_PUNCT = _t(
	name="word_count_no_punct_strip",
	description="Debug the word_count function so it passes all tests.",
	difficulty="medium",
	bug_type="missing_return",
	code=[
	"def word_count(text):",
	" text = text.lower()",
	" words = text.split()",
	" counts = {}",
	" for w in words:",
	" counts[w] = counts.get(w, 0) + 1",
	" return counts",
	],
	solution=[
	"import string",
	"def word_count(text):",
	" text = text.lower()",
	" for p in string.punctuation:",
	" text = text.replace(p, '')",
	" words = text.split()",
	" counts = {}",
	" for w in words:",
	" counts[w] = counts.get(w, 0) + 1",
	" return counts",
	],
	tests=[_twc_1, _twc_2, _twc_3, _twc_4],
	)


	# ── HARD ──────────────────────────────────────────────────────────────────

	TASK_LRU_WRONG_EVICT = _t(
	name="lru_cache_wrong_eviction",
	description="Debug the LRUCache function so it passes all tests.",
	difficulty="hard",
	bug_type="off_by_one",
	code=[
	"class LRUCache:",
	" def __init__(self, capacity):",
	" self.capacity = capacity",
	" self.cache = []",
	" def get(self, key):",
	" for i, (k, v) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.append(self.cache.pop(i))",
	" return v",
	" return -1",
	" def put(self, key, value):",
	" for i, (k, _) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.pop(i)",
	" break",
	" if len(self.cache) >= self.capacity:",
	" self.cache.pop(0)",
	" self.cache.append((key, value))",
	],
	solution=[
	"class LRUCache:",
	" def __init__(self, capacity):",
	" self.capacity = capacity",
	" self.cache = []",
	" def get(self, key):",
	" for i, (k, v) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.append(self.cache.pop(i))",
	" return v",
	" return -1",
	" def put(self, key, value):",
	" for i, (k, _) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.pop(i)",
	" break",
	" if len(self.cache) >= self.capacity:",
	" self.cache.pop(0)",
	" self.cache.append((key, value))",
	],
	tests=[_tlru_1, _tlru_2, _tlru_3],
	)

	TASK_LRU_NO_PROMOTE = _t(
	name="lru_cache_no_promotion",
	description="Debug the LRUCache function so it passes all tests.",
	difficulty="hard",
	bug_type="missing_return",
	code=[
	"class LRUCache:",
	" def __init__(self, capacity):",
	" self.capacity = capacity",
	" self.cache = []",
	" def get(self, key):",
	" for i, (k, v) in enumerate(self.cache):",
	" if k == key:",
	" return v",
	" return -1",
	" def put(self, key, value):",
	" for i, (k, _) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.pop(i)",
	" break",
	" if len(self.cache) >= self.capacity:",
	" self.cache.pop(0)",
	" self.cache.append((key, value))",
	],
	solution=[
	"class LRUCache:",
	" def __init__(self, capacity):",
	" self.capacity = capacity",
	" self.cache = []",
	" def get(self, key):",
	" for i, (k, v) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.append(self.cache.pop(i))",
	" return v",
	" return -1",
	" def put(self, key, value):",
	" for i, (k, _) in enumerate(self.cache):",
	" if k == key:",
	" self.cache.pop(i)",
	" break",
	" if len(self.cache) >= self.capacity:",
	" self.cache.pop(0)",
	" self.cache.append((key, value))",
	],
	tests=[_tlru_1, _tlru_2, _tlru_3],
	)

	TASK_VP_WRONG_MAPPING = _t(
	name="valid_parentheses_wrong_mapping",
	description="Debug the is_valid function so it passes all tests.",
	difficulty="hard",
	bug_type="wrong_operator",
	code=[
	"def is_valid(s):",
	" stack = []",
	" mapping = {')': '[', ']': '{', '}': '('}",
	" for c in s:",
	" if c in mapping.values():",
	" stack.append(c)",
	" elif c in mapping:",
	" if not stack or stack.pop() != mapping[c]:",
	" return False",
	" return len(stack) == 0",
	],
	solution=[
	"def is_valid(s):",
	" stack = []",
	" mapping = {')': '(', ']': '[', '}': '{'}",
	" for c in s:",
	" if c in mapping.values():",
	" stack.append(c)",
	" elif c in mapping:",
	" if not stack or stack.pop() != mapping[c]:",
	" return False",
	" return len(stack) == 0",
	],
	tests=[_tvp_1, _tvp_2, _tvp_3, _tvp_4],
	)

	TASK_VP_MISSING_EMPTY_CHECK = _t(
	name="valid_parentheses_no_empty_check",
	description="Debug the is_valid function so it passes all tests.",
	difficulty="hard",
	bug_type="logic_inversion",
	code=[
	"def is_valid(s):",
	" stack = []",
	" mapping = {')': '(', ']': '[', '}': '{'}",
	" for c in s:",
	" if c in mapping.values():",
	" stack.append(c)",
	" elif c in mapping:",
	" if stack.pop() != mapping[c]:",
	" return False",
	" return len(stack) == 0",
	],
	solution=[
	"def is_valid(s):",
	" stack = []",
	" mapping = {')': '(', ']': '[', '}': '{'}",
	" for c in s:",
	" if c in mapping.values():",
	" stack.append(c)",
	" elif c in mapping:",
	" if not stack or stack.pop() != mapping[c]:",
	" return False",
	" return len(stack) == 0",
	],
	tests=[_tvp_1, _tvp_2, _tvp_3, _tvp_4],
	)

	TASK_MI_STRICT_OVERLAP = _t(
	name="merge_intervals_strict_overlap",
	description="Debug the merge_intervals function so it passes all tests.",
	difficulty="hard",
	bug_type="wrong_operator",
	code=[
	"def merge_intervals(intervals):",
	" intervals.sort()",
	" merged = []",
	" for interval in intervals:",
	" if not merged or merged[-1][1] < interval[0]:",
	" merged.append(list(interval))",
	" else:",
	" merged[-1][1] = max(merged[-1][1], interval[1])",
	" return merged",
	],
	solution=[
	"def merge_intervals(intervals):",
	" intervals.sort()",
	" merged = []",
	" for interval in intervals:",
	" if not merged or merged[-1][1] < interval[0]:",
	" merged.append(list(interval))",
	" else:",
	" merged[-1][1] = max(merged[-1][1], interval[1])",
	" return merged",
	],
	tests=[_tmi_1, _tmi_2, _tmi_3],
	)

	TASK_MI_NO_SORT = _t(
	name="merge_intervals_missing_sort",
	description="Debug the merge_intervals function so it passes all tests.",
	difficulty="hard",
	bug_type="missing_return",
	code=[
	"def merge_intervals(intervals):",
	" merged = []",
	" for interval in intervals:",
	" if not merged or merged[-1][1] < interval[0]:",
	" merged.append(list(interval))",
	" else:",
	" merged[-1][1] = max(merged[-1][1], interval[1])",
	" return merged",
	],
	solution=[
	"def merge_intervals(intervals):",
	" intervals.sort()",
	" merged = []",
	" for interval in intervals:",
	" if not merged or merged[-1][1] < interval[0]:",
	" merged.append(list(interval))",
	" else:",
	" merged[-1][1] = max(merged[-1][1], interval[1])",
	" return merged",
	],
	tests=[_tmi_1, _tmi_2, _tmi_3],
	)


	# ---------------------------------------------------------------------------
	# Registries
	# ---------------------------------------------------------------------------

	TASKS_BY_DIFFICULTY: Dict[str, List[Dict]] = {
	"easy": [
	TASK_SUM_EVEN_WRONG_OP,
	TASK_SUM_EVEN_MISSING_INIT,
	TASK_REVERSE_WRONG_STEP,
	TASK_REVERSE_NO_REVERSE,
	],
	"medium": [
	TASK_BS_OFF_BY_ONE,
	TASK_BS_WRONG_MID,
	TASK_FLATTEN_APPEND,
	TASK_FLATTEN_LOGIC_INVERT,
	TASK_WC_NO_LOWER,
	TASK_WC_NO_PUNCT,
	],
	"hard": [
	TASK_LRU_WRONG_EVICT,
	TASK_LRU_NO_PROMOTE,
	TASK_VP_WRONG_MAPPING,
	TASK_VP_MISSING_EMPTY_CHECK,
	TASK_MI_STRICT_OVERLAP,
	TASK_MI_NO_SORT,
	],
	}

	# Flat list — used for random sampling when training_step is not set
	ALL_TASKS: List[Dict] = [
	t for bucket in TASKS_BY_DIFFICULTY.values() for t in bucket
	]