moksh24's picture
Restructure to compliant multi-mode layout with server/ and uv.lock
e13f5d6
Raw
History Blame Contribute Delete
4.79 kB
from dataclasses import dataclass
@dataclass
class TaskDef:
id: str
name: str
difficulty: str
max_steps: int
initial_code: str
test_code_template: str
TASKS = {}
EASY_INIT = '''def calculate_total(prices, tax_rate):
total = 0
for p in prices:
total += p
return total * (1 + tax_rate)
'''
# The test script uses a custom runner to output JSON results
# so we can parse exactly how many passed vs failed.
EASY_TEST = '''
import json
import traceback
def run_tests():
{code}
tests = [
lambda: calculate_total([100, 200], 0.1) == 330.0,
lambda: calculate_total([], 0.2) == 0.0,
lambda: calculate_total([50], 0) == 50.0,
lambda: calculate_total([10, 20, 30], 0.05) == 63.0
]
passed = 0
errors = []
for i, t in enumerate(tests):
try:
if t():
passed += 1
else:
errors.append(f"Test {i+1} failed: returned False")
except Exception as e:
errors.append(f"Test {i+1} raised {type(e).__name__}: {str(e)}")
print(json.dumps({"passed": passed, "total": len(tests), "errors": errors}))
if __name__ == "__main__":
run_tests()
'''
TASKS['task_easy'] = TaskDef(
id='task_easy',
name='Python Syntax Fix',
difficulty='easy',
max_steps=10,
initial_code=EASY_INIT,
test_code_template=EASY_TEST
)
MEDIUM_INIT = '''def merge_intervals(intervals):
if not intervals:
return []
intervals.sort(key=lambda x: x[0])
merged = [intervals[0]]
for i in range(1, len(intervals)):
if intervals[i][0] < merged[-1][1]:
merged[-1][1] = intervals[i][1]
else:
merged.append(intervals[i])
return merged
'''
MEDIUM_TEST = '''
import json
import traceback
def run_tests():
{code}
tests = [
lambda: merge_intervals([[1,3],[2,6],[8,10],[15,18]]) == [[1,6],[8,10],[15,18]],
lambda: merge_intervals([[1,4],[4,5]]) == [[1,5]],
lambda: merge_intervals([[1,4],[2,3]]) == [[1,4]],
lambda: merge_intervals([[1,4],[0,4]]) == [[0,4]],
lambda: merge_intervals([[1,4],[2,5],[7,9],[14,15]]) == [[1,5],[7,9],[14,15]]
]
passed = 0
errors = []
for i, t in enumerate(tests):
try:
if t():
passed += 1
else:
errors.append(f"Test {i+1} failed")
except Exception as e:
errors.append(f"Test {i+1} failed: {type(e).__name__}")
print(json.dumps({"passed": passed, "total": len(tests), "errors": errors}))
if __name__ == "__main__":
run_tests()
'''
TASKS['task_medium'] = TaskDef(
id='task_medium',
name='Python Logic Repair',
difficulty='medium',
max_steps=15,
initial_code=MEDIUM_INIT,
test_code_template=MEDIUM_TEST
)
HARD_INIT = '''def find_longest_sequence(nums):
max_len = 0
for num in nums:
current_num = num
current_len = 1
while current_num + 1 in nums:
current_num += 1
current_len += 1
max_len = max(max_len, current_len)
return max_len
'''
HARD_TEST = '''
import json
import time
def run_tests():
{code}
passed = 0
errors = []
# Correctness tests
def t1(): return find_longest_sequence([100, 4, 200, 1, 3, 2]) == 4
def t2(): return find_longest_sequence([0,3,7,2,5,8,4,6,0,1]) == 9
def t3(): return find_longest_sequence([]) == 0
def t4(): return find_longest_sequence([1,2,0,1]) == 3
tests = [t1, t2, t3, t4]
for i, t in enumerate(tests):
try:
if t(): passed += 1
else: errors.append(f"Basic test {i+1} failed")
except Exception as e:
errors.append(f"Basic test {i+1} error: {e}")
# Performance test
try:
start = time.time()
large_input = list(range(10000)) + [1000000]
res = find_longest_sequence(large_input)
if res != 10000:
errors.append(f"Performance test logic failed: got {res}")
else:
if time.time() - start > 0.5:
# Timed out but answer was correct -- give partial progress
errors.append("Algorithm is too slow (O(N^2) or O(N^3)). Need O(N) using sets.")
else:
passed += 1
except Exception as e:
errors.append(f"Performance test err: {e}")
print(json.dumps({"passed": passed, "total": 5, "errors": errors}))
if __name__ == "__main__":
run_tests()
'''
TASKS['task_hard'] = TaskDef(
id='task_hard',
name='Algorithm Optimization & Correctness',
difficulty='hard',
max_steps=20,
initial_code=HARD_INIT,
test_code_template=HARD_TEST
)