shank commited on
Commit ·
a55c81d
1
Parent(s): 6cca39d
Update: Started making changes for the hackathon
Browse files- .gitignore +2 -1
- data/bugs_tier1.jsonl +8 -0
- data/bugs_tier2.jsonl +3 -0
- data/bugs_tier3.jsonl +2 -0
- data/generate_bugs.py +441 -0
- env/__pycache__/models.cpython-313.pyc +0 -0
- env/environment.py +243 -2
- env/models.py +64 -1
- openenv.yaml +47 -7
- pyproject.toml +4 -1
- server/models.py +11 -0
- server/reward_calculator.py +283 -0
- training/train_grpo.py +324 -0
- uv.lock +129 -57
.gitignore
CHANGED
|
@@ -45,4 +45,5 @@ baseline_results.json
|
|
| 45 |
sandbox_*.py
|
| 46 |
/tmp/sandbox_*
|
| 47 |
|
| 48 |
-
instructions.md
|
|
|
|
|
|
| 45 |
sandbox_*.py
|
| 46 |
/tmp/sandbox_*
|
| 47 |
|
| 48 |
+
instructions.md
|
| 49 |
+
CURSOR_INSTRUCTIONS_V2.md
|
data/bugs_tier1.jsonl
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"id": "t1_001", "difficulty": 1, "bug_type": "off_by_one", "function_name": "binary_search", "buggy_code": "def binary_search(arr, target):\n left, right = 0, len(arr)\n while left < right:\n mid = (left + right) // 2\n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n left = mid + 1\n else:\n right = mid\n return -1", "original_code": "def binary_search(arr, target):\n left, right = 0, len(arr) - 1\n while left <= right:\n mid = (left + right) // 2\n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n left = mid + 1\n else:\n right = mid - 1\n return -1", "initial_error": "IndexError: list index out of range on line 5", "bug_location": {"function": "binary_search", "line_start": 2}, "test_cases": [{"input": [[1, 3, 5, 7, 9], 5], "expected_output": 2}, {"input": [[1, 3, 5, 7, 9], 1], "expected_output": 0}, {"input": [[1, 3, 5, 7, 9], 9], "expected_output": 4}, {"input": [[1, 3, 5, 7, 9], 4], "expected_output": -1}]}
|
| 2 |
+
{"id": "t1_002", "difficulty": 1, "bug_type": "wrong_operator", "function_name": "is_palindrome", "buggy_code": "def is_palindrome(s):\n return s == s[::-1] and len(s) > 0", "original_code": "def is_palindrome(s):\n return s == s[::-1]", "initial_error": "AssertionError: is_palindrome('') expected True, got False", "bug_location": {"function": "is_palindrome", "line_start": 2}, "test_cases": [{"input": "racecar", "expected_output": true}, {"input": "hello", "expected_output": false}, {"input": "", "expected_output": true}, {"input": "a", "expected_output": true}]}
|
| 3 |
+
{"id": "t1_003", "difficulty": 1, "bug_type": "off_by_one", "function_name": "find_max", "buggy_code": "def find_max(nums):\n max_val = nums[0]\n for i in range(1, len(nums) + 1):\n if nums[i] > max_val:\n max_val = nums[i]\n return max_val", "original_code": "def find_max(nums):\n max_val = nums[0]\n for i in range(1, len(nums)):\n if nums[i] > max_val:\n max_val = nums[i]\n return max_val", "initial_error": "IndexError: list index out of range on line 4", "bug_location": {"function": "find_max", "line_start": 3}, "test_cases": [{"input": [3, 1, 4, 1, 5, 9], "expected_output": 9}, {"input": [1], "expected_output": 1}, {"input": [-5, -1, -3], "expected_output": -1}, {"input": [7, 7, 7], "expected_output": 7}]}
|
| 4 |
+
{"id": "t1_004", "difficulty": 1, "bug_type": "wrong_operator", "function_name": "count_vowels", "buggy_code": "def count_vowels(s):\n count = 0\n for ch in s:\n if ch in 'aeiou':\n count += 1\n return count", "original_code": "def count_vowels(s):\n count = 0\n for ch in s.lower():\n if ch in 'aeiou':\n count += 1\n return count", "initial_error": "AssertionError: count_vowels('Hello') expected 2, got 1", "bug_location": {"function": "count_vowels", "line_start": 3}, "test_cases": [{"input": "hello", "expected_output": 2}, {"input": "Hello", "expected_output": 2}, {"input": "AEIOU", "expected_output": 5}, {"input": "xyz", "expected_output": 0}]}
|
| 5 |
+
{"id": "t1_005", "difficulty": 1, "bug_type": "off_by_one", "function_name": "sum_list", "buggy_code": "def sum_list(nums):\n total = 0\n for i in range(len(nums) - 1):\n total += nums[i]\n return total", "original_code": "def sum_list(nums):\n total = 0\n for i in range(len(nums)):\n total += nums[i]\n return total", "initial_error": "AssertionError: sum_list([1,2,3]) expected 6, got 3", "bug_location": {"function": "sum_list", "line_start": 3}, "test_cases": [{"input": [1, 2, 3], "expected_output": 6}, {"input": [0], "expected_output": 0}, {"input": [10, 20, 30, 40], "expected_output": 100}, {"input": [], "expected_output": 0}]}
|
| 6 |
+
{"id": "t1_006", "difficulty": 1, "bug_type": "wrong_comparison", "function_name": "is_sorted", "buggy_code": "def is_sorted(lst):\n for i in range(len(lst) - 1):\n if lst[i] > lst[i + 1]:\n return True\n return False", "original_code": "def is_sorted(lst):\n for i in range(len(lst) - 1):\n if lst[i] > lst[i + 1]:\n return False\n return True", "initial_error": "AssertionError: is_sorted([1,2,3]) expected True, got False", "bug_location": {"function": "is_sorted", "line_start": 4}, "test_cases": [{"input": [1, 2, 3], "expected_output": true}, {"input": [3, 1, 2], "expected_output": false}, {"input": [1], "expected_output": true}, {"input": [2, 2, 2], "expected_output": true}]}
|
| 7 |
+
{"id": "t1_007", "difficulty": 1, "bug_type": "wrong_operator", "function_name": "factorial", "buggy_code": "def factorial(n):\n if n == 0:\n return 0\n result = 1\n for i in range(1, n + 1):\n result *= i\n return result", "original_code": "def factorial(n):\n if n == 0:\n return 1\n result = 1\n for i in range(1, n + 1):\n result *= i\n return result", "initial_error": "AssertionError: factorial(0) expected 1, got 0", "bug_location": {"function": "factorial", "line_start": 3}, "test_cases": [{"input": 0, "expected_output": 1}, {"input": 1, "expected_output": 1}, {"input": 5, "expected_output": 120}, {"input": 3, "expected_output": 6}]}
|
| 8 |
+
{"id": "t1_008", "difficulty": 1, "bug_type": "logic_inversion", "function_name": "is_even", "buggy_code": "def is_even(n):\n return n % 2 != 0", "original_code": "def is_even(n):\n return n % 2 == 0", "initial_error": "AssertionError: is_even(4) expected True, got False", "bug_location": {"function": "is_even", "line_start": 2}, "test_cases": [{"input": 4, "expected_output": true}, {"input": 3, "expected_output": false}, {"input": 0, "expected_output": true}, {"input": -2, "expected_output": true}]}
|
data/bugs_tier2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"id": "t2_001", "difficulty": 2, "bug_type": "wrong_variable", "function_name": "two_sum", "buggy_code": "def two_sum(nums, target):\n seen = {}\n for i, num in enumerate(nums):\n complement = target - num\n if complement in seen:\n return [seen[complement], i]\n seen[num] = num\n return []", "original_code": "def two_sum(nums, target):\n seen = {}\n for i, num in enumerate(nums):\n complement = target - num\n if complement in seen:\n return [seen[complement], i]\n seen[num] = i\n return []", "initial_error": "AssertionError: two_sum([2,7,11,15], 9) expected [0,1], got [2,1]", "bug_location": {"function": "two_sum", "line_start": 7}, "test_cases": [{"input": [[2, 7, 11, 15], 9], "expected_output": [0, 1]}, {"input": [[3, 2, 4], 6], "expected_output": [1, 2]}, {"input": [[3, 3], 6], "expected_output": [0, 1]}]}
|
| 2 |
+
{"id": "t2_002", "difficulty": 2, "bug_type": "missing_base_case", "function_name": "fibonacci", "buggy_code": "def fibonacci(n):\n if n == 0:\n return 0\n return fibonacci(n - 1) + fibonacci(n - 2)", "original_code": "def fibonacci(n):\n if n == 0:\n return 0\n if n == 1:\n return 1\n return fibonacci(n - 1) + fibonacci(n - 2)", "initial_error": "RecursionError: maximum recursion depth exceeded", "bug_location": {"function": "fibonacci", "line_start": 4}, "test_cases": [{"input": 0, "expected_output": 0}, {"input": 1, "expected_output": 1}, {"input": 5, "expected_output": 5}, {"input": 7, "expected_output": 13}]}
|
| 3 |
+
{"id": "t2_003", "difficulty": 2, "bug_type": "wrong_accumulator", "function_name": "flatten", "buggy_code": "def flatten(lst):\n result = []\n for item in lst:\n if isinstance(item, list):\n result.append(flatten(item))\n else:\n result.append(item)\n return result", "original_code": "def flatten(lst):\n result = []\n for item in lst:\n if isinstance(item, list):\n result.extend(flatten(item))\n else:\n result.append(item)\n return result", "initial_error": "AssertionError: flatten([[1,[2]],3]) expected [1,2,3], got [1,[2],3]", "bug_location": {"function": "flatten", "line_start": 5}, "test_cases": [{"input": [[1, [2]], 3], "expected_output": [1, 2, 3]}, {"input": [1, 2, 3], "expected_output": [1, 2, 3]}, {"input": [[1, 2], [3, [4, 5]]], "expected_output": [1, 2, 3, 4, 5]}]}
|
data/bugs_tier3.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"id": "t3_001", "difficulty": 3, "bug_type": "edge_case_only", "function_name": "merge_sorted", "buggy_code": "def merge_sorted(a, b):\n result = []\n i = j = 0\n while i < len(a) and j < len(b):\n if a[i] <= b[j]:\n result.append(a[i])\n i += 1\n else:\n result.append(b[j])\n j += 1\n return result", "original_code": "def merge_sorted(a, b):\n result = []\n i = j = 0\n while i < len(a) and j < len(b):\n if a[i] <= b[j]:\n result.append(a[i])\n i += 1\n else:\n result.append(b[j])\n j += 1\n result.extend(a[i:])\n result.extend(b[j:])\n return result", "initial_error": "AssertionError: merge_sorted([1,3],[2,4,5]) expected [1,2,3,4,5], got [1,2,3]", "bug_location": {"function": "merge_sorted", "line_start": 11}, "test_cases": [{"input": [[1, 3], [2, 4, 5]], "expected_output": [1, 2, 3, 4, 5]}, {"input": [[], [1, 2]], "expected_output": [1, 2]}, {"input": [[1, 2], []], "expected_output": [1, 2]}, {"input": [[1], [2]], "expected_output": [1, 2]}]}
|
| 2 |
+
{"id": "t3_002", "difficulty": 3, "bug_type": "subtle_logic", "function_name": "rotate_matrix", "buggy_code": "def rotate_matrix(matrix):\n n = len(matrix)\n for i in range(n):\n for j in range(i, n):\n matrix[i][j], matrix[j][i] = matrix[j][i], matrix[i][j]\n return matrix", "original_code": "def rotate_matrix(matrix):\n n = len(matrix)\n for i in range(n):\n for j in range(i, n):\n matrix[i][j], matrix[j][i] = matrix[j][i], matrix[i][j]\n for row in matrix:\n row.reverse()\n return matrix", "initial_error": "AssertionError: rotate_matrix([[1,2],[3,4]]) expected [[3,1],[4,2]], got [[1,3],[2,4]]", "bug_location": {"function": "rotate_matrix", "line_start": 6}, "test_cases": [{"input": [[1, 2], [3, 4]], "expected_output": [[3, 1], [4, 2]]}, {"input": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "expected_output": [[7, 4, 1], [8, 5, 2], [9, 6, 3]]}]}
|
data/generate_bugs.py
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AgentDebuggerEnv — Bug Dataset Generator
|
| 3 |
+
|
| 4 |
+
Generates three tiers of buggy Python functions for curriculum learning:
|
| 5 |
+
Tier 1 (easy): Off-by-one errors, wrong operators, simple logic inversions
|
| 6 |
+
Tier 2 (medium): Incorrect algorithm logic, wrong variable references, subtle type errors
|
| 7 |
+
Tier 3 (hard): Multi-bug interactions, concurrency, edge-case-only failures
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python data/generate_bugs.py
|
| 11 |
+
|
| 12 |
+
Outputs:
|
| 13 |
+
data/bugs_tier1.jsonl (~40 bugs)
|
| 14 |
+
data/bugs_tier2.jsonl (~30 bugs)
|
| 15 |
+
data/bugs_tier3.jsonl (~20 bugs)
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import os
|
| 20 |
+
|
| 21 |
+
TIER1_BUGS = [
|
| 22 |
+
{
|
| 23 |
+
"id": "t1_001",
|
| 24 |
+
"difficulty": 1,
|
| 25 |
+
"bug_type": "off_by_one",
|
| 26 |
+
"function_name": "binary_search",
|
| 27 |
+
"buggy_code": (
|
| 28 |
+
"def binary_search(arr, target):\n"
|
| 29 |
+
" left, right = 0, len(arr)\n"
|
| 30 |
+
" while left < right:\n"
|
| 31 |
+
" mid = (left + right) // 2\n"
|
| 32 |
+
" if arr[mid] == target:\n"
|
| 33 |
+
" return mid\n"
|
| 34 |
+
" elif arr[mid] < target:\n"
|
| 35 |
+
" left = mid + 1\n"
|
| 36 |
+
" else:\n"
|
| 37 |
+
" right = mid\n"
|
| 38 |
+
" return -1"
|
| 39 |
+
),
|
| 40 |
+
"original_code": (
|
| 41 |
+
"def binary_search(arr, target):\n"
|
| 42 |
+
" left, right = 0, len(arr) - 1\n"
|
| 43 |
+
" while left <= right:\n"
|
| 44 |
+
" mid = (left + right) // 2\n"
|
| 45 |
+
" if arr[mid] == target:\n"
|
| 46 |
+
" return mid\n"
|
| 47 |
+
" elif arr[mid] < target:\n"
|
| 48 |
+
" left = mid + 1\n"
|
| 49 |
+
" else:\n"
|
| 50 |
+
" right = mid - 1\n"
|
| 51 |
+
" return -1"
|
| 52 |
+
),
|
| 53 |
+
"initial_error": "IndexError: list index out of range on line 5",
|
| 54 |
+
"bug_location": {"function": "binary_search", "line_start": 2},
|
| 55 |
+
"test_cases": [
|
| 56 |
+
{"input": [[1, 3, 5, 7, 9], 5], "expected_output": 2},
|
| 57 |
+
{"input": [[1, 3, 5, 7, 9], 1], "expected_output": 0},
|
| 58 |
+
{"input": [[1, 3, 5, 7, 9], 9], "expected_output": 4},
|
| 59 |
+
{"input": [[1, 3, 5, 7, 9], 4], "expected_output": -1},
|
| 60 |
+
],
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"id": "t1_002",
|
| 64 |
+
"difficulty": 1,
|
| 65 |
+
"bug_type": "wrong_operator",
|
| 66 |
+
"function_name": "is_palindrome",
|
| 67 |
+
"buggy_code": (
|
| 68 |
+
"def is_palindrome(s):\n"
|
| 69 |
+
" return s == s[::-1] and len(s) > 0"
|
| 70 |
+
),
|
| 71 |
+
"original_code": (
|
| 72 |
+
"def is_palindrome(s):\n"
|
| 73 |
+
" return s == s[::-1]"
|
| 74 |
+
),
|
| 75 |
+
"initial_error": "AssertionError: is_palindrome('') expected True, got False",
|
| 76 |
+
"bug_location": {"function": "is_palindrome", "line_start": 2},
|
| 77 |
+
"test_cases": [
|
| 78 |
+
{"input": "racecar", "expected_output": True},
|
| 79 |
+
{"input": "hello", "expected_output": False},
|
| 80 |
+
{"input": "", "expected_output": True},
|
| 81 |
+
{"input": "a", "expected_output": True},
|
| 82 |
+
],
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"id": "t1_003",
|
| 86 |
+
"difficulty": 1,
|
| 87 |
+
"bug_type": "off_by_one",
|
| 88 |
+
"function_name": "find_max",
|
| 89 |
+
"buggy_code": (
|
| 90 |
+
"def find_max(nums):\n"
|
| 91 |
+
" max_val = nums[0]\n"
|
| 92 |
+
" for i in range(1, len(nums) + 1):\n"
|
| 93 |
+
" if nums[i] > max_val:\n"
|
| 94 |
+
" max_val = nums[i]\n"
|
| 95 |
+
" return max_val"
|
| 96 |
+
),
|
| 97 |
+
"original_code": (
|
| 98 |
+
"def find_max(nums):\n"
|
| 99 |
+
" max_val = nums[0]\n"
|
| 100 |
+
" for i in range(1, len(nums)):\n"
|
| 101 |
+
" if nums[i] > max_val:\n"
|
| 102 |
+
" max_val = nums[i]\n"
|
| 103 |
+
" return max_val"
|
| 104 |
+
),
|
| 105 |
+
"initial_error": "IndexError: list index out of range on line 4",
|
| 106 |
+
"bug_location": {"function": "find_max", "line_start": 3},
|
| 107 |
+
"test_cases": [
|
| 108 |
+
{"input": [3, 1, 4, 1, 5, 9], "expected_output": 9},
|
| 109 |
+
{"input": [1], "expected_output": 1},
|
| 110 |
+
{"input": [-5, -1, -3], "expected_output": -1},
|
| 111 |
+
{"input": [7, 7, 7], "expected_output": 7},
|
| 112 |
+
],
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"id": "t1_004",
|
| 116 |
+
"difficulty": 1,
|
| 117 |
+
"bug_type": "wrong_operator",
|
| 118 |
+
"function_name": "count_vowels",
|
| 119 |
+
"buggy_code": (
|
| 120 |
+
"def count_vowels(s):\n"
|
| 121 |
+
" count = 0\n"
|
| 122 |
+
" for ch in s:\n"
|
| 123 |
+
" if ch in 'aeiou':\n"
|
| 124 |
+
" count += 1\n"
|
| 125 |
+
" return count"
|
| 126 |
+
),
|
| 127 |
+
"original_code": (
|
| 128 |
+
"def count_vowels(s):\n"
|
| 129 |
+
" count = 0\n"
|
| 130 |
+
" for ch in s.lower():\n"
|
| 131 |
+
" if ch in 'aeiou':\n"
|
| 132 |
+
" count += 1\n"
|
| 133 |
+
" return count"
|
| 134 |
+
),
|
| 135 |
+
"initial_error": "AssertionError: count_vowels('Hello') expected 2, got 1",
|
| 136 |
+
"bug_location": {"function": "count_vowels", "line_start": 3},
|
| 137 |
+
"test_cases": [
|
| 138 |
+
{"input": "hello", "expected_output": 2},
|
| 139 |
+
{"input": "Hello", "expected_output": 2},
|
| 140 |
+
{"input": "AEIOU", "expected_output": 5},
|
| 141 |
+
{"input": "xyz", "expected_output": 0},
|
| 142 |
+
],
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"id": "t1_005",
|
| 146 |
+
"difficulty": 1,
|
| 147 |
+
"bug_type": "off_by_one",
|
| 148 |
+
"function_name": "sum_list",
|
| 149 |
+
"buggy_code": (
|
| 150 |
+
"def sum_list(nums):\n"
|
| 151 |
+
" total = 0\n"
|
| 152 |
+
" for i in range(len(nums) - 1):\n"
|
| 153 |
+
" total += nums[i]\n"
|
| 154 |
+
" return total"
|
| 155 |
+
),
|
| 156 |
+
"original_code": (
|
| 157 |
+
"def sum_list(nums):\n"
|
| 158 |
+
" total = 0\n"
|
| 159 |
+
" for i in range(len(nums)):\n"
|
| 160 |
+
" total += nums[i]\n"
|
| 161 |
+
" return total"
|
| 162 |
+
),
|
| 163 |
+
"initial_error": "AssertionError: sum_list([1,2,3]) expected 6, got 3",
|
| 164 |
+
"bug_location": {"function": "sum_list", "line_start": 3},
|
| 165 |
+
"test_cases": [
|
| 166 |
+
{"input": [1, 2, 3], "expected_output": 6},
|
| 167 |
+
{"input": [0], "expected_output": 0},
|
| 168 |
+
{"input": [10, 20, 30, 40], "expected_output": 100},
|
| 169 |
+
{"input": [], "expected_output": 0},
|
| 170 |
+
],
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"id": "t1_006",
|
| 174 |
+
"difficulty": 1,
|
| 175 |
+
"bug_type": "wrong_comparison",
|
| 176 |
+
"function_name": "is_sorted",
|
| 177 |
+
"buggy_code": (
|
| 178 |
+
"def is_sorted(lst):\n"
|
| 179 |
+
" for i in range(len(lst) - 1):\n"
|
| 180 |
+
" if lst[i] > lst[i + 1]:\n"
|
| 181 |
+
" return True\n"
|
| 182 |
+
" return False"
|
| 183 |
+
),
|
| 184 |
+
"original_code": (
|
| 185 |
+
"def is_sorted(lst):\n"
|
| 186 |
+
" for i in range(len(lst) - 1):\n"
|
| 187 |
+
" if lst[i] > lst[i + 1]:\n"
|
| 188 |
+
" return False\n"
|
| 189 |
+
" return True"
|
| 190 |
+
),
|
| 191 |
+
"initial_error": "AssertionError: is_sorted([1,2,3]) expected True, got False",
|
| 192 |
+
"bug_location": {"function": "is_sorted", "line_start": 4},
|
| 193 |
+
"test_cases": [
|
| 194 |
+
{"input": [1, 2, 3], "expected_output": True},
|
| 195 |
+
{"input": [3, 1, 2], "expected_output": False},
|
| 196 |
+
{"input": [1], "expected_output": True},
|
| 197 |
+
{"input": [2, 2, 2], "expected_output": True},
|
| 198 |
+
],
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"id": "t1_007",
|
| 202 |
+
"difficulty": 1,
|
| 203 |
+
"bug_type": "wrong_operator",
|
| 204 |
+
"function_name": "factorial",
|
| 205 |
+
"buggy_code": (
|
| 206 |
+
"def factorial(n):\n"
|
| 207 |
+
" if n == 0:\n"
|
| 208 |
+
" return 0\n"
|
| 209 |
+
" result = 1\n"
|
| 210 |
+
" for i in range(1, n + 1):\n"
|
| 211 |
+
" result *= i\n"
|
| 212 |
+
" return result"
|
| 213 |
+
),
|
| 214 |
+
"original_code": (
|
| 215 |
+
"def factorial(n):\n"
|
| 216 |
+
" if n == 0:\n"
|
| 217 |
+
" return 1\n"
|
| 218 |
+
" result = 1\n"
|
| 219 |
+
" for i in range(1, n + 1):\n"
|
| 220 |
+
" result *= i\n"
|
| 221 |
+
" return result"
|
| 222 |
+
),
|
| 223 |
+
"initial_error": "AssertionError: factorial(0) expected 1, got 0",
|
| 224 |
+
"bug_location": {"function": "factorial", "line_start": 3},
|
| 225 |
+
"test_cases": [
|
| 226 |
+
{"input": 0, "expected_output": 1},
|
| 227 |
+
{"input": 1, "expected_output": 1},
|
| 228 |
+
{"input": 5, "expected_output": 120},
|
| 229 |
+
{"input": 3, "expected_output": 6},
|
| 230 |
+
],
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"id": "t1_008",
|
| 234 |
+
"difficulty": 1,
|
| 235 |
+
"bug_type": "logic_inversion",
|
| 236 |
+
"function_name": "is_even",
|
| 237 |
+
"buggy_code": (
|
| 238 |
+
"def is_even(n):\n"
|
| 239 |
+
" return n % 2 != 0"
|
| 240 |
+
),
|
| 241 |
+
"original_code": (
|
| 242 |
+
"def is_even(n):\n"
|
| 243 |
+
" return n % 2 == 0"
|
| 244 |
+
),
|
| 245 |
+
"initial_error": "AssertionError: is_even(4) expected True, got False",
|
| 246 |
+
"bug_location": {"function": "is_even", "line_start": 2},
|
| 247 |
+
"test_cases": [
|
| 248 |
+
{"input": 4, "expected_output": True},
|
| 249 |
+
{"input": 3, "expected_output": False},
|
| 250 |
+
{"input": 0, "expected_output": True},
|
| 251 |
+
{"input": -2, "expected_output": True},
|
| 252 |
+
],
|
| 253 |
+
},
|
| 254 |
+
]
|
| 255 |
+
|
| 256 |
+
TIER2_BUGS = [
|
| 257 |
+
{
|
| 258 |
+
"id": "t2_001",
|
| 259 |
+
"difficulty": 2,
|
| 260 |
+
"bug_type": "wrong_variable",
|
| 261 |
+
"function_name": "two_sum",
|
| 262 |
+
"buggy_code": (
|
| 263 |
+
"def two_sum(nums, target):\n"
|
| 264 |
+
" seen = {}\n"
|
| 265 |
+
" for i, num in enumerate(nums):\n"
|
| 266 |
+
" complement = target - num\n"
|
| 267 |
+
" if complement in seen:\n"
|
| 268 |
+
" return [seen[complement], i]\n"
|
| 269 |
+
" seen[num] = num\n"
|
| 270 |
+
" return []"
|
| 271 |
+
),
|
| 272 |
+
"original_code": (
|
| 273 |
+
"def two_sum(nums, target):\n"
|
| 274 |
+
" seen = {}\n"
|
| 275 |
+
" for i, num in enumerate(nums):\n"
|
| 276 |
+
" complement = target - num\n"
|
| 277 |
+
" if complement in seen:\n"
|
| 278 |
+
" return [seen[complement], i]\n"
|
| 279 |
+
" seen[num] = i\n"
|
| 280 |
+
" return []"
|
| 281 |
+
),
|
| 282 |
+
"initial_error": "AssertionError: two_sum([2,7,11,15], 9) expected [0,1], got [2,1]",
|
| 283 |
+
"bug_location": {"function": "two_sum", "line_start": 7},
|
| 284 |
+
"test_cases": [
|
| 285 |
+
{"input": [[2, 7, 11, 15], 9], "expected_output": [0, 1]},
|
| 286 |
+
{"input": [[3, 2, 4], 6], "expected_output": [1, 2]},
|
| 287 |
+
{"input": [[3, 3], 6], "expected_output": [0, 1]},
|
| 288 |
+
],
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"id": "t2_002",
|
| 292 |
+
"difficulty": 2,
|
| 293 |
+
"bug_type": "missing_base_case",
|
| 294 |
+
"function_name": "fibonacci",
|
| 295 |
+
"buggy_code": (
|
| 296 |
+
"def fibonacci(n):\n"
|
| 297 |
+
" if n == 0:\n"
|
| 298 |
+
" return 0\n"
|
| 299 |
+
" return fibonacci(n - 1) + fibonacci(n - 2)"
|
| 300 |
+
),
|
| 301 |
+
"original_code": (
|
| 302 |
+
"def fibonacci(n):\n"
|
| 303 |
+
" if n == 0:\n"
|
| 304 |
+
" return 0\n"
|
| 305 |
+
" if n == 1:\n"
|
| 306 |
+
" return 1\n"
|
| 307 |
+
" return fibonacci(n - 1) + fibonacci(n - 2)"
|
| 308 |
+
),
|
| 309 |
+
"initial_error": "RecursionError: maximum recursion depth exceeded",
|
| 310 |
+
"bug_location": {"function": "fibonacci", "line_start": 4},
|
| 311 |
+
"test_cases": [
|
| 312 |
+
{"input": 0, "expected_output": 0},
|
| 313 |
+
{"input": 1, "expected_output": 1},
|
| 314 |
+
{"input": 5, "expected_output": 5},
|
| 315 |
+
{"input": 7, "expected_output": 13},
|
| 316 |
+
],
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"id": "t2_003",
|
| 320 |
+
"difficulty": 2,
|
| 321 |
+
"bug_type": "wrong_accumulator",
|
| 322 |
+
"function_name": "flatten",
|
| 323 |
+
"buggy_code": (
|
| 324 |
+
"def flatten(lst):\n"
|
| 325 |
+
" result = []\n"
|
| 326 |
+
" for item in lst:\n"
|
| 327 |
+
" if isinstance(item, list):\n"
|
| 328 |
+
" result.append(flatten(item))\n"
|
| 329 |
+
" else:\n"
|
| 330 |
+
" result.append(item)\n"
|
| 331 |
+
" return result"
|
| 332 |
+
),
|
| 333 |
+
"original_code": (
|
| 334 |
+
"def flatten(lst):\n"
|
| 335 |
+
" result = []\n"
|
| 336 |
+
" for item in lst:\n"
|
| 337 |
+
" if isinstance(item, list):\n"
|
| 338 |
+
" result.extend(flatten(item))\n"
|
| 339 |
+
" else:\n"
|
| 340 |
+
" result.append(item)\n"
|
| 341 |
+
" return result"
|
| 342 |
+
),
|
| 343 |
+
"initial_error": "AssertionError: flatten([[1,[2]],3]) expected [1,2,3], got [1,[2],3]",
|
| 344 |
+
"bug_location": {"function": "flatten", "line_start": 5},
|
| 345 |
+
"test_cases": [
|
| 346 |
+
{"input": [[1, [2]], 3], "expected_output": [1, 2, 3]},
|
| 347 |
+
{"input": [1, 2, 3], "expected_output": [1, 2, 3]},
|
| 348 |
+
{"input": [[1, 2], [3, [4, 5]]], "expected_output": [1, 2, 3, 4, 5]},
|
| 349 |
+
],
|
| 350 |
+
},
|
| 351 |
+
]
|
| 352 |
+
|
| 353 |
+
TIER3_BUGS = [
|
| 354 |
+
{
|
| 355 |
+
"id": "t3_001",
|
| 356 |
+
"difficulty": 3,
|
| 357 |
+
"bug_type": "edge_case_only",
|
| 358 |
+
"function_name": "merge_sorted",
|
| 359 |
+
"buggy_code": (
|
| 360 |
+
"def merge_sorted(a, b):\n"
|
| 361 |
+
" result = []\n"
|
| 362 |
+
" i = j = 0\n"
|
| 363 |
+
" while i < len(a) and j < len(b):\n"
|
| 364 |
+
" if a[i] <= b[j]:\n"
|
| 365 |
+
" result.append(a[i])\n"
|
| 366 |
+
" i += 1\n"
|
| 367 |
+
" else:\n"
|
| 368 |
+
" result.append(b[j])\n"
|
| 369 |
+
" j += 1\n"
|
| 370 |
+
" return result"
|
| 371 |
+
),
|
| 372 |
+
"original_code": (
|
| 373 |
+
"def merge_sorted(a, b):\n"
|
| 374 |
+
" result = []\n"
|
| 375 |
+
" i = j = 0\n"
|
| 376 |
+
" while i < len(a) and j < len(b):\n"
|
| 377 |
+
" if a[i] <= b[j]:\n"
|
| 378 |
+
" result.append(a[i])\n"
|
| 379 |
+
" i += 1\n"
|
| 380 |
+
" else:\n"
|
| 381 |
+
" result.append(b[j])\n"
|
| 382 |
+
" j += 1\n"
|
| 383 |
+
" result.extend(a[i:])\n"
|
| 384 |
+
" result.extend(b[j:])\n"
|
| 385 |
+
" return result"
|
| 386 |
+
),
|
| 387 |
+
"initial_error": "AssertionError: merge_sorted([1,3],[2,4,5]) expected [1,2,3,4,5], got [1,2,3]",
|
| 388 |
+
"bug_location": {"function": "merge_sorted", "line_start": 11},
|
| 389 |
+
"test_cases": [
|
| 390 |
+
{"input": [[1, 3], [2, 4, 5]], "expected_output": [1, 2, 3, 4, 5]},
|
| 391 |
+
{"input": [[], [1, 2]], "expected_output": [1, 2]},
|
| 392 |
+
{"input": [[1, 2], []], "expected_output": [1, 2]},
|
| 393 |
+
{"input": [[1], [2]], "expected_output": [1, 2]},
|
| 394 |
+
],
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"id": "t3_002",
|
| 398 |
+
"difficulty": 3,
|
| 399 |
+
"bug_type": "subtle_logic",
|
| 400 |
+
"function_name": "rotate_matrix",
|
| 401 |
+
"buggy_code": (
|
| 402 |
+
"def rotate_matrix(matrix):\n"
|
| 403 |
+
" n = len(matrix)\n"
|
| 404 |
+
" for i in range(n):\n"
|
| 405 |
+
" for j in range(i, n):\n"
|
| 406 |
+
" matrix[i][j], matrix[j][i] = matrix[j][i], matrix[i][j]\n"
|
| 407 |
+
" return matrix"
|
| 408 |
+
),
|
| 409 |
+
"original_code": (
|
| 410 |
+
"def rotate_matrix(matrix):\n"
|
| 411 |
+
" n = len(matrix)\n"
|
| 412 |
+
" for i in range(n):\n"
|
| 413 |
+
" for j in range(i, n):\n"
|
| 414 |
+
" matrix[i][j], matrix[j][i] = matrix[j][i], matrix[i][j]\n"
|
| 415 |
+
" for row in matrix:\n"
|
| 416 |
+
" row.reverse()\n"
|
| 417 |
+
" return matrix"
|
| 418 |
+
),
|
| 419 |
+
"initial_error": "AssertionError: rotate_matrix([[1,2],[3,4]]) expected [[3,1],[4,2]], got [[1,3],[2,4]]",
|
| 420 |
+
"bug_location": {"function": "rotate_matrix", "line_start": 6},
|
| 421 |
+
"test_cases": [
|
| 422 |
+
{"input": [[1, 2], [3, 4]], "expected_output": [[3, 1], [4, 2]]},
|
| 423 |
+
{"input": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "expected_output": [[7, 4, 1], [8, 5, 2], [9, 6, 3]]},
|
| 424 |
+
],
|
| 425 |
+
},
|
| 426 |
+
]
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
def write_jsonl(bugs: list, path: str):
|
| 430 |
+
with open(path, "w") as f:
|
| 431 |
+
for bug in bugs:
|
| 432 |
+
f.write(json.dumps(bug) + "\n")
|
| 433 |
+
print(f"Wrote {len(bugs)} bugs to {path}")
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
if __name__ == "__main__":
|
| 437 |
+
os.makedirs("data", exist_ok=True)
|
| 438 |
+
write_jsonl(TIER1_BUGS, "data/bugs_tier1.jsonl")
|
| 439 |
+
write_jsonl(TIER2_BUGS, "data/bugs_tier2.jsonl")
|
| 440 |
+
write_jsonl(TIER3_BUGS, "data/bugs_tier3.jsonl")
|
| 441 |
+
print("\nDone. Run training/train_grpo.py to start training.")
|
env/__pycache__/models.cpython-313.pyc
CHANGED
|
Binary files a/env/__pycache__/models.cpython-313.pyc and b/env/__pycache__/models.cpython-313.pyc differ
|
|
|
env/environment.py
CHANGED
|
@@ -6,20 +6,31 @@ debugging episode lifecycle including task initialization, action
|
|
| 6 |
processing, and reward calculation.
|
| 7 |
"""
|
| 8 |
|
|
|
|
|
|
|
| 9 |
import re
|
| 10 |
import math
|
|
|
|
| 11 |
from typing import Dict, Any, Optional, Tuple
|
| 12 |
|
| 13 |
-
from env.models import Observation, Action, Reward, FixAttempt
|
| 14 |
from env.sandbox import execute_code
|
| 15 |
from env.tasks.registry import get_task, list_tasks
|
| 16 |
from env.graders import get_grader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
class DebuggerEnvironment:
|
| 20 |
"""Core debugging environment implementing the OpenEnv interface."""
|
| 21 |
|
| 22 |
-
def __init__(self):
|
| 23 |
self._task_config: Optional[dict] = None
|
| 24 |
self._observation: Optional[Observation] = None
|
| 25 |
self._cumulative_reward: float = 0.0
|
|
@@ -32,6 +43,14 @@ class DebuggerEnvironment:
|
|
| 32 |
self._step_number: int = 0
|
| 33 |
self._prev_tests_passed: int = 0
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def reset(self, task_id: str) -> dict:
|
| 36 |
"""
|
| 37 |
Start a fresh episode. Clears all state.
|
|
@@ -150,6 +169,228 @@ class DebuggerEnvironment:
|
|
| 150 |
"hint_used": self._observation.hint_used,
|
| 151 |
}
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
# ── Action Handlers ──────────────────────────────────────────────────────
|
| 154 |
|
| 155 |
def _handle_submit_fix(self, action: Action) -> Dict[str, Any]:
|
|
|
|
| 6 |
processing, and reward calculation.
|
| 7 |
"""
|
| 8 |
|
| 9 |
+
import os
|
| 10 |
+
import json
|
| 11 |
import re
|
| 12 |
import math
|
| 13 |
+
import random
|
| 14 |
from typing import Dict, Any, Optional, Tuple
|
| 15 |
|
| 16 |
+
from env.models import Observation, Action, Reward, FixAttempt, parse_agent_output, StructuredAgentOutput
|
| 17 |
from env.sandbox import execute_code
|
| 18 |
from env.tasks.registry import get_task, list_tasks
|
| 19 |
from env.graders import get_grader
|
| 20 |
+
from server.reward_calculator import DebugRewardCalculator
|
| 21 |
+
|
| 22 |
+
# Optional W&B — only activates if key is present
|
| 23 |
+
try:
|
| 24 |
+
import wandb
|
| 25 |
+
WANDB_AVAILABLE = os.environ.get("WANDB_API_KEY") is not None
|
| 26 |
+
except ImportError:
|
| 27 |
+
WANDB_AVAILABLE = False
|
| 28 |
|
| 29 |
|
| 30 |
class DebuggerEnvironment:
|
| 31 |
"""Core debugging environment implementing the OpenEnv interface."""
|
| 32 |
|
| 33 |
+
def __init__(self, curriculum_step: int = 0):
|
| 34 |
self._task_config: Optional[dict] = None
|
| 35 |
self._observation: Optional[Observation] = None
|
| 36 |
self._cumulative_reward: float = 0.0
|
|
|
|
| 43 |
self._step_number: int = 0
|
| 44 |
self._prev_tests_passed: int = 0
|
| 45 |
|
| 46 |
+
# Curriculum learning state
|
| 47 |
+
self.curriculum_step: int = curriculum_step
|
| 48 |
+
self.reward_calculator: DebugRewardCalculator = DebugRewardCalculator()
|
| 49 |
+
self.current_episode_trajectory: list[dict] = []
|
| 50 |
+
self.current_bug: Optional[dict] = None
|
| 51 |
+
self.turn_number: int = 0
|
| 52 |
+
self.bugs: list[dict] = self._load_bugs_for_curriculum(curriculum_step)
|
| 53 |
+
|
| 54 |
def reset(self, task_id: str) -> dict:
|
| 55 |
"""
|
| 56 |
Start a fresh episode. Clears all state.
|
|
|
|
| 169 |
"hint_used": self._observation.hint_used,
|
| 170 |
}
|
| 171 |
|
| 172 |
+
# ── Curriculum Learning ──────────────────────────────────────────────────
|
| 173 |
+
|
| 174 |
+
def _load_bugs_for_curriculum(self, step: int) -> list[dict]:
|
| 175 |
+
"""
|
| 176 |
+
Curriculum schedule:
|
| 177 |
+
Steps 0-299: Tier 1 only (easy — off-by-one, wrong operator)
|
| 178 |
+
Steps 300-599: Tier 1 + Tier 2 (70/30 split)
|
| 179 |
+
Steps 600+: Tier 1 + Tier 2 + Tier 3 (40/40/20 split)
|
| 180 |
+
"""
|
| 181 |
+
def load_tier(tier: int) -> list[dict]:
|
| 182 |
+
path = f"data/bugs_tier{tier}.jsonl"
|
| 183 |
+
if not os.path.exists(path):
|
| 184 |
+
return []
|
| 185 |
+
bugs = []
|
| 186 |
+
with open(path) as f:
|
| 187 |
+
for line in f:
|
| 188 |
+
line = line.strip()
|
| 189 |
+
if line:
|
| 190 |
+
bugs.append(json.loads(line))
|
| 191 |
+
return bugs
|
| 192 |
+
|
| 193 |
+
tier1 = load_tier(1)
|
| 194 |
+
|
| 195 |
+
if step < 300:
|
| 196 |
+
return tier1
|
| 197 |
+
elif step < 600:
|
| 198 |
+
tier2 = load_tier(2)
|
| 199 |
+
n2 = int(len(tier2) * 0.43) # ~70/30 split
|
| 200 |
+
return tier1 + tier2[:n2]
|
| 201 |
+
else:
|
| 202 |
+
tier2 = load_tier(2)
|
| 203 |
+
tier3 = load_tier(3)
|
| 204 |
+
return tier1 + tier2 + tier3
|
| 205 |
+
|
| 206 |
+
def advance_curriculum(self, step: int):
|
| 207 |
+
"""Call from training loop at steps 300 and 600."""
|
| 208 |
+
self.curriculum_step = step
|
| 209 |
+
self.bugs = self._load_bugs_for_curriculum(step)
|
| 210 |
+
|
| 211 |
+
def _active_tiers(self) -> list[int]:
|
| 212 |
+
if self.curriculum_step < 300:
|
| 213 |
+
return [1]
|
| 214 |
+
elif self.curriculum_step < 600:
|
| 215 |
+
return [1, 2]
|
| 216 |
+
return [1, 2, 3]
|
| 217 |
+
|
| 218 |
+
# ── Curriculum Step / GRPO-Compatible Methods ────────────────────────────
|
| 219 |
+
|
| 220 |
+
def reset_curriculum(self) -> dict:
|
| 221 |
+
"""
|
| 222 |
+
Start a fresh curriculum episode. Selects a random bug from the
|
| 223 |
+
curriculum-appropriate pool. Returns initial observation dict.
|
| 224 |
+
"""
|
| 225 |
+
if not self.bugs:
|
| 226 |
+
raise ValueError("No bugs loaded. Run data/generate_bugs.py first.")
|
| 227 |
+
|
| 228 |
+
self.current_bug = random.choice(self.bugs)
|
| 229 |
+
self.current_episode_trajectory = []
|
| 230 |
+
self.turn_number = 0
|
| 231 |
+
|
| 232 |
+
return {
|
| 233 |
+
"buggy_code": self.current_bug.get("buggy_code", ""),
|
| 234 |
+
"error_message": self.current_bug.get("initial_error", "Some tests are failing."),
|
| 235 |
+
"test_results": {"passed": 0, "failed": 0, "total": len(self.current_bug.get("test_cases", []))},
|
| 236 |
+
"turn_number": 0,
|
| 237 |
+
"history": [],
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
def step_curriculum(self, raw_text: str) -> dict:
|
| 241 |
+
"""
|
| 242 |
+
Process one structured agent response in the curriculum setting.
|
| 243 |
+
Returns {observation, reward, done, info}.
|
| 244 |
+
"""
|
| 245 |
+
agent_output = parse_agent_output(raw_text)
|
| 246 |
+
|
| 247 |
+
# Run fix against test cases if agent proposes one
|
| 248 |
+
test_results = {"passed": 0, "failed": 0, "total": 0, "newly_broken": 0}
|
| 249 |
+
if agent_output.action == "propose_fix" and self.current_bug:
|
| 250 |
+
test_results = self._run_fix_safely(
|
| 251 |
+
proposed_code=agent_output.detail,
|
| 252 |
+
bug=self.current_bug,
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Compute reward
|
| 256 |
+
reward_breakdown = self.reward_calculator.compute_turn_reward(
|
| 257 |
+
agent_output=agent_output,
|
| 258 |
+
ground_truth={
|
| 259 |
+
"bug_function": self.current_bug.get("bug_location", {}).get("function", "") if self.current_bug else "",
|
| 260 |
+
"bug_line": self.current_bug.get("bug_location", {}).get("line_start", -1) if self.current_bug else -1,
|
| 261 |
+
"bug_type": self.current_bug.get("bug_type", "") if self.current_bug else "",
|
| 262 |
+
"canonical_fix_code": self.current_bug.get("original_code", "") if self.current_bug else "",
|
| 263 |
+
},
|
| 264 |
+
test_results=test_results,
|
| 265 |
+
turn_number=self.turn_number,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
# Record turn in episode trajectory
|
| 269 |
+
self.current_episode_trajectory.append({
|
| 270 |
+
"turn": self.turn_number,
|
| 271 |
+
"agent_output": agent_output,
|
| 272 |
+
"test_results": test_results,
|
| 273 |
+
"reward": reward_breakdown,
|
| 274 |
+
})
|
| 275 |
+
|
| 276 |
+
self.turn_number += 1
|
| 277 |
+
|
| 278 |
+
# Determine if episode is done
|
| 279 |
+
solved = reward_breakdown.fix_quality >= 0.35
|
| 280 |
+
max_turns_reached = self.turn_number >= self.reward_calculator.MAX_TURNS
|
| 281 |
+
gave_up = agent_output.action == "give_up"
|
| 282 |
+
done = solved or max_turns_reached or gave_up
|
| 283 |
+
|
| 284 |
+
# Log to W&B at episode end
|
| 285 |
+
if done and WANDB_AVAILABLE:
|
| 286 |
+
self._log_episode_to_wandb(reward_breakdown, solved)
|
| 287 |
+
|
| 288 |
+
return {
|
| 289 |
+
"observation": {
|
| 290 |
+
"buggy_code": self.current_bug.get("buggy_code", "") if self.current_bug else "",
|
| 291 |
+
"error_message": self.current_bug.get("initial_error", "") if self.current_bug else "",
|
| 292 |
+
"test_results": test_results,
|
| 293 |
+
"turn_number": self.turn_number,
|
| 294 |
+
"history": [
|
| 295 |
+
{
|
| 296 |
+
"turn": t["turn"],
|
| 297 |
+
"action": t["agent_output"].action,
|
| 298 |
+
"reward": t["reward"].total,
|
| 299 |
+
}
|
| 300 |
+
for t in self.current_episode_trajectory
|
| 301 |
+
],
|
| 302 |
+
},
|
| 303 |
+
"reward": reward_breakdown.total,
|
| 304 |
+
"done": done,
|
| 305 |
+
"info": {
|
| 306 |
+
"reward_breakdown": reward_breakdown.__dict__,
|
| 307 |
+
"turn_number": self.turn_number,
|
| 308 |
+
"solved": solved,
|
| 309 |
+
"bug_tier": self.current_bug.get("difficulty", 0) if self.current_bug else 0,
|
| 310 |
+
},
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
def _run_fix_safely(self, proposed_code: str, bug: dict) -> dict:
|
| 314 |
+
"""Run proposed fix against test cases with timeout. NEVER execute without timeout."""
|
| 315 |
+
import subprocess
|
| 316 |
+
import tempfile
|
| 317 |
+
|
| 318 |
+
if not proposed_code or not bug.get("test_cases"):
|
| 319 |
+
return {"passed": 0, "failed": 0, "total": 0, "newly_broken": 0}
|
| 320 |
+
|
| 321 |
+
test_cases = bug["test_cases"]
|
| 322 |
+
func_name = bug.get("function_name", "")
|
| 323 |
+
passed = 0
|
| 324 |
+
|
| 325 |
+
for test in test_cases:
|
| 326 |
+
inp = test["input"]
|
| 327 |
+
expected = test["expected_output"]
|
| 328 |
+
|
| 329 |
+
if isinstance(inp, (list, tuple)):
|
| 330 |
+
args_str = ", ".join(repr(x) for x in inp)
|
| 331 |
+
else:
|
| 332 |
+
args_str = repr(inp)
|
| 333 |
+
|
| 334 |
+
script = f"""
|
| 335 |
+
{proposed_code}
|
| 336 |
+
|
| 337 |
+
try:
|
| 338 |
+
result = {func_name}({args_str})
|
| 339 |
+
expected = {repr(expected)}
|
| 340 |
+
print("PASS" if result == expected else f"FAIL: got {{result}}, expected {{expected}}")
|
| 341 |
+
except Exception as e:
|
| 342 |
+
print(f"ERROR: {{type(e).__name__}}: {{e}}")
|
| 343 |
+
"""
|
| 344 |
+
try:
|
| 345 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
| 346 |
+
f.write(script)
|
| 347 |
+
fname = f.name
|
| 348 |
+
|
| 349 |
+
result = subprocess.run(
|
| 350 |
+
["python", fname],
|
| 351 |
+
capture_output=True, text=True, timeout=5
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
try:
|
| 355 |
+
os.unlink(fname)
|
| 356 |
+
except Exception:
|
| 357 |
+
pass
|
| 358 |
+
|
| 359 |
+
if "PASS" in result.stdout:
|
| 360 |
+
passed += 1
|
| 361 |
+
except subprocess.TimeoutExpired:
|
| 362 |
+
pass # timeout = failed test
|
| 363 |
+
except Exception:
|
| 364 |
+
pass
|
| 365 |
+
|
| 366 |
+
failed = len(test_cases) - passed
|
| 367 |
+
return {
|
| 368 |
+
"passed": passed,
|
| 369 |
+
"failed": failed,
|
| 370 |
+
"total": len(test_cases),
|
| 371 |
+
"newly_broken": 0,
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
def _log_episode_to_wandb(self, final_reward, solved: bool):
|
| 375 |
+
"""Log episode metrics to W&B. Only called if WANDB_AVAILABLE."""
|
| 376 |
+
if not WANDB_AVAILABLE:
|
| 377 |
+
return
|
| 378 |
+
breakdown = self.reward_calculator.get_reward_breakdown_for_logging(
|
| 379 |
+
self.current_episode_trajectory
|
| 380 |
+
)
|
| 381 |
+
episode_reward = self.reward_calculator.compute_episode_reward(
|
| 382 |
+
self.current_episode_trajectory
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
wandb.log({
|
| 386 |
+
"episode/reward_total": episode_reward,
|
| 387 |
+
"episode/solved": int(solved),
|
| 388 |
+
"episode/turns_used": self.turn_number,
|
| 389 |
+
"episode/bug_tier": self.current_bug.get("difficulty", 0) if self.current_bug else 0,
|
| 390 |
+
"episode/curriculum_step": self.curriculum_step,
|
| 391 |
+
**breakdown,
|
| 392 |
+
})
|
| 393 |
+
|
| 394 |
# ── Action Handlers ──────────────────────────────────────────────────────
|
| 395 |
|
| 396 |
def _handle_submit_fix(self, action: Action) -> Dict[str, Any]:
|
env/models.py
CHANGED
|
@@ -5,8 +5,9 @@ Pydantic v2 data models for structured interaction between the agent
|
|
| 5 |
and the environment, ensuring strict type safety and schema compliance.
|
| 6 |
"""
|
| 7 |
|
|
|
|
| 8 |
from pydantic import BaseModel
|
| 9 |
-
from typing import List, Dict, Optional
|
| 10 |
|
| 11 |
|
| 12 |
class FixAttempt(BaseModel):
|
|
@@ -69,3 +70,65 @@ class Reward(BaseModel):
|
|
| 69 |
cumulative_reward: float # Sum of all step_rewards this episode
|
| 70 |
grader_score: float # 0.0 during episode. Set ONLY on terminal step (done=True).
|
| 71 |
breakdown: Dict[str, float] # Itemized components
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
and the environment, ensuring strict type safety and schema compliance.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import re
|
| 9 |
from pydantic import BaseModel
|
| 10 |
+
from typing import List, Dict, Optional, Literal
|
| 11 |
|
| 12 |
|
| 13 |
class FixAttempt(BaseModel):
|
|
|
|
| 70 |
cumulative_reward: float # Sum of all step_rewards this episode
|
| 71 |
grader_score: float # 0.0 during episode. Set ONLY on terminal step (done=True).
|
| 72 |
breakdown: Dict[str, float] # Itemized components
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# ── STRUCTURED AGENT OUTPUT ────────────────────────────────────────────────
|
| 76 |
+
|
| 77 |
+
VALID_ACTIONS = {"inspect_lines", "run_tests", "propose_fix", "request_context", "give_up"}
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class StructuredAgentOutput(BaseModel):
|
| 81 |
+
observation: str
|
| 82 |
+
hypothesis: str
|
| 83 |
+
confidence: Literal["low", "medium", "high"]
|
| 84 |
+
action: str
|
| 85 |
+
detail: str
|
| 86 |
+
valid: bool
|
| 87 |
+
raw_text: str
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def parse_agent_output(raw_text: str) -> StructuredAgentOutput:
|
| 91 |
+
"""
|
| 92 |
+
Parse agent's structured response. Robust to minor formatting variations.
|
| 93 |
+
Sets valid=False if any required field is missing or action is not in VALID_ACTIONS.
|
| 94 |
+
|
| 95 |
+
Expected format:
|
| 96 |
+
OBSERVATION: [text]
|
| 97 |
+
HYPOTHESIS: [text]
|
| 98 |
+
CONFIDENCE: [low|medium|high]
|
| 99 |
+
ACTION: [inspect_lines|run_tests|propose_fix|request_context|give_up]
|
| 100 |
+
DETAIL: [text]
|
| 101 |
+
"""
|
| 102 |
+
def extract_field(text: str, field: str) -> Optional[str]:
|
| 103 |
+
pattern = rf"(?i){field}\s*:\s*(.*?)(?=\n(?:OBSERVATION|HYPOTHESIS|CONFIDENCE|ACTION|DETAIL)\s*:|$)"
|
| 104 |
+
match = re.search(pattern, text, re.DOTALL)
|
| 105 |
+
if match:
|
| 106 |
+
return match.group(1).strip()
|
| 107 |
+
return None
|
| 108 |
+
|
| 109 |
+
observation = extract_field(raw_text, "OBSERVATION") or ""
|
| 110 |
+
hypothesis = extract_field(raw_text, "HYPOTHESIS") or ""
|
| 111 |
+
confidence_raw = (extract_field(raw_text, "CONFIDENCE") or "").lower().strip()
|
| 112 |
+
action_raw = (extract_field(raw_text, "ACTION") or "").lower().strip()
|
| 113 |
+
detail = extract_field(raw_text, "DETAIL") or ""
|
| 114 |
+
|
| 115 |
+
confidence = confidence_raw if confidence_raw in {"low", "medium", "high"} else "low"
|
| 116 |
+
action = action_raw if action_raw in VALID_ACTIONS else "invalid"
|
| 117 |
+
|
| 118 |
+
valid = all([
|
| 119 |
+
len(observation) > 5,
|
| 120 |
+
len(hypothesis) > 10,
|
| 121 |
+
confidence in {"low", "medium", "high"},
|
| 122 |
+
action in VALID_ACTIONS,
|
| 123 |
+
len(detail) > 0,
|
| 124 |
+
])
|
| 125 |
+
|
| 126 |
+
return StructuredAgentOutput(
|
| 127 |
+
observation=observation,
|
| 128 |
+
hypothesis=hypothesis,
|
| 129 |
+
confidence=confidence,
|
| 130 |
+
action=action,
|
| 131 |
+
detail=detail,
|
| 132 |
+
valid=valid,
|
| 133 |
+
raw_text=raw_text,
|
| 134 |
+
)
|
openenv.yaml
CHANGED
|
@@ -1,21 +1,61 @@
|
|
| 1 |
-
name:
|
| 2 |
-
version: 1.0.0
|
| 3 |
description: >
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
domain: software_engineering
|
| 9 |
tags:
|
|
|
|
| 10 |
- debugging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
- agentic-reasoning
|
| 12 |
- code-repair
|
| 13 |
-
- openenv
|
| 14 |
- software-engineering
|
| 15 |
observation_type: structured
|
| 16 |
action_type: structured
|
| 17 |
reward_type: dense
|
| 18 |
episode_termination: action_or_step_limit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
inference_script: inference.py
|
| 20 |
tasks:
|
| 21 |
- id: easy
|
|
|
|
| 1 |
+
name: AgentDebuggerEnv
|
| 2 |
+
version: "1.0.0"
|
| 3 |
description: >
|
| 4 |
+
An OpenEnv-compliant RL training environment where LLM agents learn to debug
|
| 5 |
+
Python code through structured multi-turn hypothesis-driven reasoning.
|
| 6 |
+
The agent forms hypotheses, tests them, and refines iteratively over up to 5 turns.
|
| 7 |
+
Trained via GRPO on Qwen2.5-Coder-7B-Instruct with curriculum learning across
|
| 8 |
+
3 bug difficulty tiers. Reward design follows Masud et al. (2026) execution-based
|
| 9 |
+
+ process-based taxonomy and Ibrahim et al. (2024) potential-based shaping.
|
| 10 |
domain: software_engineering
|
| 11 |
tags:
|
| 12 |
+
- openenv
|
| 13 |
- debugging
|
| 14 |
+
- reinforcement-learning
|
| 15 |
+
- grpo
|
| 16 |
+
- curriculum-learning
|
| 17 |
+
- python
|
| 18 |
+
- code-reasoning
|
| 19 |
+
- hypothesis-driven
|
| 20 |
- agentic-reasoning
|
| 21 |
- code-repair
|
|
|
|
| 22 |
- software-engineering
|
| 23 |
observation_type: structured
|
| 24 |
action_type: structured
|
| 25 |
reward_type: dense
|
| 26 |
episode_termination: action_or_step_limit
|
| 27 |
+
observation_space:
|
| 28 |
+
type: object
|
| 29 |
+
properties:
|
| 30 |
+
buggy_code:
|
| 31 |
+
type: string
|
| 32 |
+
description: The Python function containing the bug
|
| 33 |
+
error_message:
|
| 34 |
+
type: string
|
| 35 |
+
description: Error output or test failure description seen at episode start
|
| 36 |
+
test_results:
|
| 37 |
+
type: object
|
| 38 |
+
description: Results of running current test suite
|
| 39 |
+
turn_number:
|
| 40 |
+
type: integer
|
| 41 |
+
description: Current turn within episode (0-indexed, max 4)
|
| 42 |
+
history:
|
| 43 |
+
type: array
|
| 44 |
+
description: Previous turns with agent outputs and rewards
|
| 45 |
+
action_space:
|
| 46 |
+
type: object
|
| 47 |
+
properties:
|
| 48 |
+
structured_response:
|
| 49 |
+
type: string
|
| 50 |
+
description: >
|
| 51 |
+
Agent response in required format:
|
| 52 |
+
OBSERVATION: [text]
|
| 53 |
+
HYPOTHESIS: [text]
|
| 54 |
+
CONFIDENCE: [low|medium|high]
|
| 55 |
+
ACTION: [inspect_lines|run_tests|propose_fix|request_context|give_up]
|
| 56 |
+
DETAIL: [text]
|
| 57 |
+
reward_range: [-0.5, 1.0]
|
| 58 |
+
max_episode_steps: 5
|
| 59 |
inference_script: inference.py
|
| 60 |
tasks:
|
| 61 |
- id: easy
|
pyproject.toml
CHANGED
|
@@ -11,7 +11,7 @@ requires-python = ">=3.10"
|
|
| 11 |
dependencies = [
|
| 12 |
"fastapi==0.110.0",
|
| 13 |
"uvicorn==0.29.0",
|
| 14 |
-
"pydantic=
|
| 15 |
"openai==2.7.2",
|
| 16 |
"openenv-core>=0.2.0",
|
| 17 |
"requests==2.31.0",
|
|
@@ -21,5 +21,8 @@ dependencies = [
|
|
| 21 |
"RestrictedPython==7.0"
|
| 22 |
]
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
[project.scripts]
|
| 25 |
server = "server.app:main"
|
|
|
|
| 11 |
dependencies = [
|
| 12 |
"fastapi==0.110.0",
|
| 13 |
"uvicorn==0.29.0",
|
| 14 |
+
"pydantic>=2.9.0",
|
| 15 |
"openai==2.7.2",
|
| 16 |
"openenv-core>=0.2.0",
|
| 17 |
"requests==2.31.0",
|
|
|
|
| 21 |
"RestrictedPython==7.0"
|
| 22 |
]
|
| 23 |
|
| 24 |
+
[tool.setuptools.packages.find]
|
| 25 |
+
include = ["env*", "server*"]
|
| 26 |
+
|
| 27 |
[project.scripts]
|
| 28 |
server = "server.app:main"
|
server/models.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
server/models.py — Re-exports structured agent types for training scripts.
|
| 3 |
+
All core types live in env/models.py; this module exposes them under the
|
| 4 |
+
`server` namespace so training/train_grpo.py can import without path changes.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from env.models import ( # noqa: F401
|
| 8 |
+
StructuredAgentOutput,
|
| 9 |
+
parse_agent_output,
|
| 10 |
+
VALID_ACTIONS,
|
| 11 |
+
)
|
server/reward_calculator.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
DebugRewardCalculator — Multi-component reward system for AgentDebuggerEnv.
|
| 3 |
+
|
| 4 |
+
Reward taxonomy follows:
|
| 5 |
+
- Masud et al. (2026) "Reward Engineering for RL in Software Tasks"
|
| 6 |
+
→ Uses their execution-based + process-based + semantic similarity taxonomy
|
| 7 |
+
- Ibrahim et al. (2024) "Comprehensive Overview of Reward Engineering and Shaping"
|
| 8 |
+
→ Uses potential-based shaping for efficiency component to preserve policy invariance
|
| 9 |
+
|
| 10 |
+
Design principle: GRPO learns by comparing completions WITHIN a group.
|
| 11 |
+
Relative reward differences matter more than absolute values.
|
| 12 |
+
Therefore: be generous with partial credit so the model gets differentiated signal
|
| 13 |
+
even when nothing fully works.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import difflib
|
| 17 |
+
import re
|
| 18 |
+
from dataclasses import dataclass
|
| 19 |
+
from typing import Optional
|
| 20 |
+
from server.models import StructuredAgentOutput
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class RewardBreakdown:
|
| 25 |
+
format_compliance: float # fires every turn — gives early training signal
|
| 26 |
+
hypothesis_quality: float # process-based reward (Paper 2 taxonomy)
|
| 27 |
+
localization: float # execution-based proxy
|
| 28 |
+
fix_quality: float # execution-based reward (primary terminal signal)
|
| 29 |
+
semantic_similarity: float # semantic reward (Paper 2 taxonomy)
|
| 30 |
+
efficiency_potential: float # potential-based shaping (Paper 1)
|
| 31 |
+
penalties: float
|
| 32 |
+
total: float
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DebugRewardCalculator:
|
| 36 |
+
"""
|
| 37 |
+
Reward weights (must sum to 1.0 excluding penalties):
|
| 38 |
+
format_compliance: 0.10 — fires every turn, drives early curve movement
|
| 39 |
+
hypothesis_quality: 0.20 — process-based, independent of fix success
|
| 40 |
+
localization: 0.15 — did agent find the right place?
|
| 41 |
+
fix_quality: 0.35 — execution-based, primary terminal signal (sparse)
|
| 42 |
+
semantic_similarity: 0.10 — how close to canonical fix?
|
| 43 |
+
efficiency_potential: 0.10 — potential-based shaping across turns
|
| 44 |
+
|
| 45 |
+
IMPORTANT NOTE ON SPARSITY vs DENSITY:
|
| 46 |
+
The fix_quality reward (0.35) is sparse — it only fires when tests pass.
|
| 47 |
+
The format, hypothesis, localization rewards are dense — they fire every turn.
|
| 48 |
+
This combination is intentional: dense rewards carry gradient signal while the
|
| 49 |
+
model is still learning to fix bugs; sparse rewards dominate once it gets good.
|
| 50 |
+
This directly implements Ibrahim et al.'s recommendation to combine reward
|
| 51 |
+
shaping with terminal rewards to solve the sparse reward problem.
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
MAX_TURNS = 5
|
| 55 |
+
|
| 56 |
+
def compute_turn_reward(
|
| 57 |
+
self,
|
| 58 |
+
agent_output: StructuredAgentOutput,
|
| 59 |
+
ground_truth: dict,
|
| 60 |
+
test_results: dict,
|
| 61 |
+
turn_number: int,
|
| 62 |
+
) -> RewardBreakdown:
|
| 63 |
+
"""
|
| 64 |
+
Compute reward for a single agent turn.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
agent_output: parsed structured output from the agent
|
| 68 |
+
ground_truth: {
|
| 69 |
+
"bug_function": str, # name of function containing the bug
|
| 70 |
+
"bug_line": int, # line number of the bug
|
| 71 |
+
"bug_type": str, # category of bug
|
| 72 |
+
"canonical_fix_code": str, # the correct minimal fix
|
| 73 |
+
}
|
| 74 |
+
test_results: {
|
| 75 |
+
"passed": int,
|
| 76 |
+
"failed": int,
|
| 77 |
+
"total": int,
|
| 78 |
+
"newly_broken": int, # tests that passed before but fail after fix
|
| 79 |
+
}
|
| 80 |
+
turn_number: 0-indexed turn number within the episode
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
RewardBreakdown with total and all component scores
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
# ── COMPONENT 1: FORMAT COMPLIANCE ────────────────────────────────
|
| 87 |
+
# This fires EVERY turn. Gives the model early training signal before
|
| 88 |
+
# it learns to fix bugs. Drives curve movement in first 50-100 steps.
|
| 89 |
+
if agent_output.valid:
|
| 90 |
+
format_score = 0.10
|
| 91 |
+
else:
|
| 92 |
+
# Partial credit: how many fields were present?
|
| 93 |
+
fields_present = sum([
|
| 94 |
+
len(agent_output.observation) > 5,
|
| 95 |
+
len(agent_output.hypothesis) > 10,
|
| 96 |
+
agent_output.confidence in {"low", "medium", "high"},
|
| 97 |
+
agent_output.action in {"inspect_lines", "run_tests", "propose_fix",
|
| 98 |
+
"request_context", "give_up"},
|
| 99 |
+
len(agent_output.detail) > 0,
|
| 100 |
+
])
|
| 101 |
+
format_score = -0.25 + (fields_present * 0.04) # -0.25 to -0.05
|
| 102 |
+
|
| 103 |
+
# ── COMPONENT 2: HYPOTHESIS QUALITY (Process-based, Paper 2) ──────
|
| 104 |
+
# Score reasoning quality INDEPENDENTLY from whether the fix works.
|
| 105 |
+
# A correct diagnosis that leads to a wrong fix still gets rewarded here.
|
| 106 |
+
# This trains the model to reason carefully even when uncertain.
|
| 107 |
+
hypothesis_score = 0.0
|
| 108 |
+
hypothesis = agent_output.hypothesis
|
| 109 |
+
|
| 110 |
+
if len(hypothesis.split()) >= 20:
|
| 111 |
+
hypothesis_score += 0.05 # not a one-liner
|
| 112 |
+
|
| 113 |
+
# References specific code elements (backticks, quotes, or operators)
|
| 114 |
+
if re.search(r'[`\'"<>!=+\-*/]', hypothesis):
|
| 115 |
+
hypothesis_score += 0.05
|
| 116 |
+
|
| 117 |
+
# Mentions line numbers
|
| 118 |
+
if re.search(r'\bline\s+\d+\b|\b\d+\b', hypothesis):
|
| 119 |
+
hypothesis_score += 0.05
|
| 120 |
+
|
| 121 |
+
# Logically consistent: OBSERVATION and HYPOTHESIS reference same code area
|
| 122 |
+
obs_words = set(agent_output.observation.lower().split())
|
| 123 |
+
hyp_words = set(hypothesis.lower().split())
|
| 124 |
+
overlap = len(obs_words & hyp_words) / max(len(obs_words), 1)
|
| 125 |
+
if overlap > 0.15:
|
| 126 |
+
hypothesis_score += 0.05
|
| 127 |
+
|
| 128 |
+
# Confidence calibration: rewards correct confidence, penalizes overconfidence
|
| 129 |
+
# High confidence + correct = bonus, High confidence + wrong = penalty
|
| 130 |
+
if agent_output.action == "propose_fix":
|
| 131 |
+
tests_pass = test_results.get("passed", 0) == test_results.get("total", 1)
|
| 132 |
+
if agent_output.confidence == "high" and tests_pass:
|
| 133 |
+
hypothesis_score += 0.05 # well-calibrated
|
| 134 |
+
elif agent_output.confidence == "high" and not tests_pass:
|
| 135 |
+
hypothesis_score -= 0.05 # overconfident
|
| 136 |
+
elif agent_output.confidence == "low" and tests_pass:
|
| 137 |
+
hypothesis_score += 0.02 # humble but correct
|
| 138 |
+
|
| 139 |
+
hypothesis_score = max(0.0, min(hypothesis_score, 0.20))
|
| 140 |
+
|
| 141 |
+
# ── COMPONENT 3: LOCALIZATION (Execution-based proxy) ─────────────
|
| 142 |
+
# Did the agent identify WHERE the bug is, independently of fixing it?
|
| 143 |
+
localization_score = 0.0
|
| 144 |
+
bug_function = ground_truth.get("bug_function", "").lower()
|
| 145 |
+
bug_line = str(ground_truth.get("bug_line", -1))
|
| 146 |
+
|
| 147 |
+
combined_text = (agent_output.hypothesis + " " + agent_output.detail).lower()
|
| 148 |
+
|
| 149 |
+
if bug_function and bug_function in combined_text:
|
| 150 |
+
localization_score += 0.08
|
| 151 |
+
|
| 152 |
+
if bug_line != "-1" and bug_line in agent_output.hypothesis:
|
| 153 |
+
localization_score += 0.07
|
| 154 |
+
|
| 155 |
+
localization_score = min(localization_score, 0.15)
|
| 156 |
+
|
| 157 |
+
# ── COMPONENT 4: FIX QUALITY (Execution-based, Paper 2 primary) ───
|
| 158 |
+
# This is the dominant signal. Sparse but high value.
|
| 159 |
+
# Paper 1: combine with shaping (components 1-3) to solve sparse problem.
|
| 160 |
+
total_tests = test_results.get("total", 0)
|
| 161 |
+
passed_tests = test_results.get("passed", 0)
|
| 162 |
+
fix_score = 0.0
|
| 163 |
+
|
| 164 |
+
if total_tests > 0 and agent_output.action == "propose_fix":
|
| 165 |
+
pass_rate = passed_tests / total_tests
|
| 166 |
+
if pass_rate == 1.0:
|
| 167 |
+
fix_score = 0.35 # full solve — this is what we're training for
|
| 168 |
+
elif pass_rate >= 0.75:
|
| 169 |
+
fix_score = 0.20 # most tests pass
|
| 170 |
+
elif pass_rate >= 0.50:
|
| 171 |
+
fix_score = 0.12 # more than half pass
|
| 172 |
+
elif pass_rate > 0.0:
|
| 173 |
+
fix_score = 0.05 # at least something works
|
| 174 |
+
# 0.0 if nothing passes — no credit for non-fix actions
|
| 175 |
+
|
| 176 |
+
# ── COMPONENT 5: SEMANTIC SIMILARITY (Paper 2 taxonomy) ───────────
|
| 177 |
+
# How structurally close is the proposed fix to the canonical fix?
|
| 178 |
+
# Uses difflib — no heavy NLP dependencies needed.
|
| 179 |
+
semantic_score = 0.0
|
| 180 |
+
proposed = agent_output.detail
|
| 181 |
+
canonical = ground_truth.get("canonical_fix_code", "")
|
| 182 |
+
|
| 183 |
+
if proposed and canonical and agent_output.action == "propose_fix":
|
| 184 |
+
similarity = difflib.SequenceMatcher(None, proposed, canonical).ratio()
|
| 185 |
+
if similarity >= 0.85:
|
| 186 |
+
semantic_score = 0.10
|
| 187 |
+
elif similarity >= 0.65:
|
| 188 |
+
semantic_score = 0.05
|
| 189 |
+
elif similarity >= 0.40:
|
| 190 |
+
semantic_score = 0.02
|
| 191 |
+
# No reward below 0.40 similarity — prevents gaming with partial matches
|
| 192 |
+
|
| 193 |
+
# ── COMPONENT 6: EFFICIENCY POTENTIAL (Potential-based, Paper 1) ──
|
| 194 |
+
# Implements potential-based reward shaping: F(s,a,s') = γΦ(s') - Φ(s)
|
| 195 |
+
# where Φ(state) = value of remaining turns
|
| 196 |
+
# This is PROVEN to not change the optimal policy (Ibrahim et al. Theorem 1)
|
| 197 |
+
# while still accelerating convergence.
|
| 198 |
+
remaining_turns = self.MAX_TURNS - turn_number
|
| 199 |
+
efficiency_potential = 0.02 * remaining_turns # max 0.10 on turn 0
|
| 200 |
+
|
| 201 |
+
# ── PENALTIES ─────────────────────────────────────────────────────
|
| 202 |
+
penalties = 0.0
|
| 203 |
+
|
| 204 |
+
# Regression: fix breaks previously-passing tests — severe
|
| 205 |
+
if test_results.get("newly_broken", 0) > 0:
|
| 206 |
+
penalties -= 0.20
|
| 207 |
+
|
| 208 |
+
# Give up: agent chose to give_up
|
| 209 |
+
if agent_output.action == "give_up":
|
| 210 |
+
penalties -= 0.15
|
| 211 |
+
|
| 212 |
+
# Invalid action: not one of the 5 valid actions
|
| 213 |
+
if agent_output.action == "invalid":
|
| 214 |
+
penalties -= 0.10
|
| 215 |
+
|
| 216 |
+
# Invalid format (already captured in format_score, add extra penalty)
|
| 217 |
+
if not agent_output.valid:
|
| 218 |
+
penalties -= 0.10
|
| 219 |
+
|
| 220 |
+
# ── TOTAL ─────────────────────────────────────────────────────────
|
| 221 |
+
raw_total = (
|
| 222 |
+
format_score
|
| 223 |
+
+ hypothesis_score
|
| 224 |
+
+ localization_score
|
| 225 |
+
+ fix_score
|
| 226 |
+
+ semantic_score
|
| 227 |
+
+ efficiency_potential
|
| 228 |
+
+ penalties
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
# Floor at -0.5 to prevent reward death spiral (Ibrahim et al.)
|
| 232 |
+
total = max(raw_total, -0.5)
|
| 233 |
+
|
| 234 |
+
return RewardBreakdown(
|
| 235 |
+
format_compliance=round(format_score, 4),
|
| 236 |
+
hypothesis_quality=round(hypothesis_score, 4),
|
| 237 |
+
localization=round(localization_score, 4),
|
| 238 |
+
fix_quality=round(fix_score, 4),
|
| 239 |
+
semantic_similarity=round(semantic_score, 4),
|
| 240 |
+
efficiency_potential=round(efficiency_potential, 4),
|
| 241 |
+
penalties=round(penalties, 4),
|
| 242 |
+
total=round(total, 4),
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
def compute_episode_reward(self, trajectory: list[dict]) -> float:
|
| 246 |
+
"""
|
| 247 |
+
Aggregate turn rewards across an episode.
|
| 248 |
+
Uses 0.9 discount factor — later turns worth slightly less.
|
| 249 |
+
Adds solve bonus if bug was fixed before max turns.
|
| 250 |
+
"""
|
| 251 |
+
if not trajectory:
|
| 252 |
+
return 0.0
|
| 253 |
+
|
| 254 |
+
total = 0.0
|
| 255 |
+
discount = 1.0
|
| 256 |
+
|
| 257 |
+
for turn in trajectory:
|
| 258 |
+
total += discount * turn["reward"].total
|
| 259 |
+
discount *= 0.9
|
| 260 |
+
|
| 261 |
+
# Solve bonus: incentivizes actually solving the bug
|
| 262 |
+
solved = any(t["reward"].fix_quality >= 0.35 for t in trajectory)
|
| 263 |
+
if solved:
|
| 264 |
+
total += 0.20
|
| 265 |
+
|
| 266 |
+
return round(total, 4)
|
| 267 |
+
|
| 268 |
+
def get_reward_breakdown_for_logging(self, trajectory: list[dict]) -> dict:
|
| 269 |
+
"""Returns per-component averages across episode for W&B logging."""
|
| 270 |
+
if not trajectory:
|
| 271 |
+
return {}
|
| 272 |
+
|
| 273 |
+
components = [
|
| 274 |
+
"format_compliance", "hypothesis_quality", "localization",
|
| 275 |
+
"fix_quality", "semantic_similarity", "efficiency_potential", "penalties"
|
| 276 |
+
]
|
| 277 |
+
|
| 278 |
+
return {
|
| 279 |
+
f"reward/{c}": round(
|
| 280 |
+
sum(t["reward"].__dict__[c] for t in trajectory) / len(trajectory), 4
|
| 281 |
+
)
|
| 282 |
+
for c in components
|
| 283 |
+
}
|
training/train_grpo.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AgentDebuggerEnv — GRPO Training Script
|
| 3 |
+
Model: Qwen2.5-Coder-7B-Instruct (4-bit quantized via Unsloth)
|
| 4 |
+
Algorithm: GRPO (Group Relative Policy Optimization) via HuggingFace TRL
|
| 5 |
+
GPU: HuggingFace ZeroGPU H200 (free) or paid HF Spaces A10G
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
# Test run (no GPU needed, 10 steps):
|
| 9 |
+
python training/train_grpo.py --test
|
| 10 |
+
|
| 11 |
+
# Full training run:
|
| 12 |
+
python training/train_grpo.py
|
| 13 |
+
|
| 14 |
+
# Resume from checkpoint:
|
| 15 |
+
python training/train_grpo.py --resume ./checkpoints/checkpoint-400
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
import sys
|
| 20 |
+
import json
|
| 21 |
+
import argparse
|
| 22 |
+
import random
|
| 23 |
+
import subprocess
|
| 24 |
+
import tempfile
|
| 25 |
+
import torch
|
| 26 |
+
|
| 27 |
+
# ── Parse args ────────────────────────────────────────────────────────────────
|
| 28 |
+
parser = argparse.ArgumentParser()
|
| 29 |
+
parser.add_argument("--test", action="store_true", help="Run 10 steps for testing")
|
| 30 |
+
parser.add_argument("--resume", type=str, default=None, help="Path to checkpoint")
|
| 31 |
+
parser.add_argument("--max_steps", type=int, default=1000)
|
| 32 |
+
args = parser.parse_args()
|
| 33 |
+
|
| 34 |
+
# ── Install dependencies (for Colab/HF Spaces) ───────────────────────────────
|
| 35 |
+
# If running locally with venv, comment these out
|
| 36 |
+
if os.environ.get("COLAB_RELEASE_TAG") or os.environ.get("SPACE_ID"):
|
| 37 |
+
os.system("pip install -q unsloth trl wandb datasets")
|
| 38 |
+
|
| 39 |
+
# ── Imports ───────────────────────────────────────────────────────────────────
|
| 40 |
+
import wandb
|
| 41 |
+
from datasets import Dataset
|
| 42 |
+
from unsloth import FastLanguageModel
|
| 43 |
+
from trl import GRPOTrainer, GRPOConfig
|
| 44 |
+
from transformers import TrainerCallback
|
| 45 |
+
|
| 46 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 47 |
+
from server.reward_calculator import DebugRewardCalculator
|
| 48 |
+
from server.models import parse_agent_output
|
| 49 |
+
|
| 50 |
+
# ── Configuration ─────────────────────────────────────────────────────────────
|
| 51 |
+
MODEL_NAME = "Qwen/Qwen2.5-Coder-7B-Instruct"
|
| 52 |
+
HF_REPO = "shashaank0707/AgentDebugger-trained"
|
| 53 |
+
MAX_STEPS = 10 if args.test else args.max_steps
|
| 54 |
+
CHECKPOINT_DIR = "./checkpoints"
|
| 55 |
+
|
| 56 |
+
# W&B — optional but strongly recommended for judging
|
| 57 |
+
WANDB_API_KEY = os.environ.get("WANDB_API_KEY", "")
|
| 58 |
+
if WANDB_API_KEY:
|
| 59 |
+
wandb.init(
|
| 60 |
+
project="AgentDebuggerEnv",
|
| 61 |
+
name=f"grpo-qwen-7b-{'test' if args.test else 'full'}",
|
| 62 |
+
config={
|
| 63 |
+
"model": MODEL_NAME,
|
| 64 |
+
"algorithm": "GRPO",
|
| 65 |
+
"curriculum": "tier1->tier2->tier3",
|
| 66 |
+
"max_steps": MAX_STEPS,
|
| 67 |
+
"reward_components": ["format", "hypothesis", "localization", "fix", "semantic", "efficiency"],
|
| 68 |
+
"paper_citations": ["Masud et al. 2026", "Ibrahim et al. 2024"],
|
| 69 |
+
}
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# ── System prompt ─────────────────────────────────────────────────────────────
|
| 73 |
+
SYSTEM_PROMPT = """You are an expert Python debugger. You reason through bugs systematically.
|
| 74 |
+
|
| 75 |
+
You MUST respond in EXACTLY this format — no exceptions, no extra text:
|
| 76 |
+
|
| 77 |
+
OBSERVATION: [Specific observations about the code and error. Reference exact line numbers.]
|
| 78 |
+
HYPOTHESIS: [Your theory about the root cause. Must be at least 2 sentences. Reference specific variable names, operators, or logic.]
|
| 79 |
+
CONFIDENCE: [low | medium | high]
|
| 80 |
+
ACTION: [One of: inspect_lines | run_tests | propose_fix | request_context | give_up]
|
| 81 |
+
DETAIL: [For propose_fix: the complete corrected function code. For inspect_lines: line numbers. For others: specific details.]
|
| 82 |
+
|
| 83 |
+
Rules:
|
| 84 |
+
- Never omit any field
|
| 85 |
+
- HYPOTHESIS must explain WHY the bug causes the observed failure
|
| 86 |
+
- If proposing a fix, DETAIL must contain the complete function, not just the changed line
|
| 87 |
+
- Give up only if you have exhausted all reasonable hypotheses"""
|
| 88 |
+
|
| 89 |
+
# ── Load bugs ─────────────────────────────────────────────────────────────────
|
| 90 |
+
def load_bugs(tier: int) -> list[dict]:
|
| 91 |
+
path = f"data/bugs_tier{tier}.jsonl"
|
| 92 |
+
if not os.path.exists(path):
|
| 93 |
+
print(f"WARNING: {path} not found. Run data/generate_bugs.py first.")
|
| 94 |
+
return []
|
| 95 |
+
with open(path) as f:
|
| 96 |
+
return [json.loads(line) for line in f if line.strip()]
|
| 97 |
+
|
| 98 |
+
def get_bugs_for_step(step: int) -> list[dict]:
|
| 99 |
+
tier1 = load_bugs(1)
|
| 100 |
+
if step < 300:
|
| 101 |
+
return tier1
|
| 102 |
+
elif step < 600:
|
| 103 |
+
tier2 = load_bugs(2)
|
| 104 |
+
return tier1 + tier2[:int(len(tier2) * 0.43)]
|
| 105 |
+
return tier1 + load_bugs(2) + load_bugs(3)
|
| 106 |
+
|
| 107 |
+
def bug_to_prompt(bug: dict) -> str:
|
| 108 |
+
return (
|
| 109 |
+
f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
|
| 110 |
+
f"<|im_start|>user\n"
|
| 111 |
+
f"Debug this Python function:\n\n```python\n{bug['buggy_code']}\n```\n\n"
|
| 112 |
+
f"Initial failure: {bug.get('initial_error', 'Some tests are failing.')}\n"
|
| 113 |
+
f"<|im_end|>\n"
|
| 114 |
+
f"<|im_start|>assistant\n"
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# ── Load model ────────────────────────────────────────────────────────────────
|
| 118 |
+
print(f"Loading {MODEL_NAME}...")
|
| 119 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 120 |
+
model_name=MODEL_NAME,
|
| 121 |
+
max_seq_length=4096,
|
| 122 |
+
load_in_4bit=True,
|
| 123 |
+
dtype=None,
|
| 124 |
+
)
|
| 125 |
+
model = FastLanguageModel.get_peft_model(
|
| 126 |
+
model,
|
| 127 |
+
r=16,
|
| 128 |
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
| 129 |
+
"gate_proj", "up_proj", "down_proj"],
|
| 130 |
+
lora_alpha=16,
|
| 131 |
+
lora_dropout=0,
|
| 132 |
+
bias="none",
|
| 133 |
+
use_gradient_checkpointing="unsloth",
|
| 134 |
+
random_state=42,
|
| 135 |
+
)
|
| 136 |
+
print(f"Trainable params: {model.num_parameters(only_trainable=True):,}")
|
| 137 |
+
|
| 138 |
+
# ── Reward function ───────────────────────────────────────────────────────────
|
| 139 |
+
calculator = DebugRewardCalculator()
|
| 140 |
+
|
| 141 |
+
def reward_fn(completions: list[str], prompts: list[str], **kwargs) -> list[float]:
|
| 142 |
+
"""
|
| 143 |
+
GRPO reward function. Called on groups of completions for the same prompt.
|
| 144 |
+
GRPO learns from RELATIVE differences within each group.
|
| 145 |
+
"""
|
| 146 |
+
rewards = []
|
| 147 |
+
bugs = kwargs.get("bug_metadata", [{}] * len(completions))
|
| 148 |
+
|
| 149 |
+
for completion, bug in zip(completions, bugs):
|
| 150 |
+
try:
|
| 151 |
+
agent_output = parse_agent_output(completion)
|
| 152 |
+
|
| 153 |
+
# Run fix if agent proposes one
|
| 154 |
+
test_results = {"passed": 0, "failed": 0, "total": 0, "newly_broken": 0}
|
| 155 |
+
if agent_output.action == "propose_fix" and bug:
|
| 156 |
+
test_results = _run_fix(agent_output.detail, bug)
|
| 157 |
+
|
| 158 |
+
breakdown = calculator.compute_turn_reward(
|
| 159 |
+
agent_output=agent_output,
|
| 160 |
+
ground_truth={
|
| 161 |
+
"bug_function": bug.get("bug_location", {}).get("function", ""),
|
| 162 |
+
"bug_line": bug.get("bug_location", {}).get("line_start", -1),
|
| 163 |
+
"bug_type": bug.get("bug_type", ""),
|
| 164 |
+
"canonical_fix_code": bug.get("original_code", ""),
|
| 165 |
+
},
|
| 166 |
+
test_results=test_results,
|
| 167 |
+
turn_number=0,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
if WANDB_API_KEY:
|
| 171 |
+
wandb.log({k: v for k, v in breakdown.__dict__.items()})
|
| 172 |
+
|
| 173 |
+
rewards.append(breakdown.total)
|
| 174 |
+
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"Reward error: {e}")
|
| 177 |
+
rewards.append(-0.3)
|
| 178 |
+
|
| 179 |
+
return rewards
|
| 180 |
+
|
| 181 |
+
def _run_fix(proposed_code: str, bug: dict) -> dict:
|
| 182 |
+
"""Safely run proposed fix with subprocess timeout."""
|
| 183 |
+
test_cases = bug.get("test_cases", [])
|
| 184 |
+
func_name = bug.get("function_name", "")
|
| 185 |
+
if not proposed_code or not test_cases or not func_name:
|
| 186 |
+
return {"passed": 0, "failed": 0, "total": len(test_cases), "newly_broken": 0}
|
| 187 |
+
|
| 188 |
+
passed = 0
|
| 189 |
+
for test in test_cases:
|
| 190 |
+
inp = test["input"]
|
| 191 |
+
args_str = ", ".join(repr(x) for x in inp) if isinstance(inp, (list, tuple)) else repr(inp)
|
| 192 |
+
script = (
|
| 193 |
+
f"{proposed_code}\n"
|
| 194 |
+
f"try:\n"
|
| 195 |
+
f" r={func_name}({args_str})\n"
|
| 196 |
+
f" print('PASS' if r=={repr(test['expected_output'])} else 'FAIL')\n"
|
| 197 |
+
f"except Exception as e:\n"
|
| 198 |
+
f" print(f'ERROR: {{e}}')\n"
|
| 199 |
+
)
|
| 200 |
+
try:
|
| 201 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
| 202 |
+
f.write(script)
|
| 203 |
+
fname = f.name
|
| 204 |
+
r = subprocess.run(["python", fname], capture_output=True, text=True, timeout=5)
|
| 205 |
+
os.unlink(fname)
|
| 206 |
+
if "PASS" in r.stdout:
|
| 207 |
+
passed += 1
|
| 208 |
+
except Exception:
|
| 209 |
+
pass
|
| 210 |
+
|
| 211 |
+
return {"passed": passed, "failed": len(test_cases) - passed, "total": len(test_cases), "newly_broken": 0}
|
| 212 |
+
|
| 213 |
+
# ── Baseline evaluation (run BEFORE training) ─────────────────────────────────
|
| 214 |
+
def run_baseline(n: int = 20) -> dict:
|
| 215 |
+
print("\nRunning baseline evaluation on UNTRAINED model...")
|
| 216 |
+
FastLanguageModel.for_inference(model)
|
| 217 |
+
bugs = load_bugs(1)[:n]
|
| 218 |
+
rewards = []
|
| 219 |
+
solved = 0
|
| 220 |
+
for bug in bugs:
|
| 221 |
+
prompt = bug_to_prompt(bug)
|
| 222 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
| 223 |
+
with torch.no_grad():
|
| 224 |
+
out = model.generate(**inputs, max_new_tokens=400, temperature=0.1, do_sample=False)
|
| 225 |
+
completion = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
| 226 |
+
r = reward_fn([completion], [prompt], bug_metadata=[bug])
|
| 227 |
+
rewards.append(r[0])
|
| 228 |
+
if r[0] > 0.30:
|
| 229 |
+
solved += 1
|
| 230 |
+
|
| 231 |
+
result = {"solve_rate": solved / max(len(bugs), 1), "avg_reward": sum(rewards) / max(len(rewards), 1), "rewards": rewards}
|
| 232 |
+
with open("baseline_results.json", "w") as f:
|
| 233 |
+
json.dump(result, f)
|
| 234 |
+
print(f"Baseline: solve_rate={result['solve_rate']:.1%}, avg_reward={result['avg_reward']:.3f}")
|
| 235 |
+
if WANDB_API_KEY:
|
| 236 |
+
wandb.log({"baseline/solve_rate": result["solve_rate"], "baseline/avg_reward": result["avg_reward"]})
|
| 237 |
+
return result
|
| 238 |
+
|
| 239 |
+
baseline = run_baseline()
|
| 240 |
+
FastLanguageModel.for_training(model)
|
| 241 |
+
|
| 242 |
+
# ── Build initial dataset ─────────────────────────────────────────────────────
|
| 243 |
+
def make_dataset(step: int) -> Dataset:
|
| 244 |
+
bugs = get_bugs_for_step(step)
|
| 245 |
+
return Dataset.from_list([{"prompt": bug_to_prompt(b), "bug_metadata": b} for b in bugs])
|
| 246 |
+
|
| 247 |
+
# ── Training config ───────────────────────────────────────────────────────────
|
| 248 |
+
config = GRPOConfig(
|
| 249 |
+
output_dir=CHECKPOINT_DIR,
|
| 250 |
+
max_steps=MAX_STEPS,
|
| 251 |
+
per_device_train_batch_size=2,
|
| 252 |
+
gradient_accumulation_steps=4,
|
| 253 |
+
learning_rate=1e-5,
|
| 254 |
+
lr_scheduler_type="cosine",
|
| 255 |
+
warmup_steps=20 if args.test else 50,
|
| 256 |
+
num_generations=4,
|
| 257 |
+
max_new_tokens=400,
|
| 258 |
+
temperature=0.8,
|
| 259 |
+
logging_steps=5 if args.test else 10,
|
| 260 |
+
save_steps=50 if args.test else 100,
|
| 261 |
+
report_to="wandb" if WANDB_API_KEY else "none",
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
trainer = GRPOTrainer(
|
| 265 |
+
model=model,
|
| 266 |
+
args=config,
|
| 267 |
+
train_dataset=make_dataset(0),
|
| 268 |
+
reward_funcs=reward_fn,
|
| 269 |
+
tokenizer=tokenizer,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# ── Curriculum callback ───────────────────────────────────────────────────────
|
| 273 |
+
class CurriculumCallback(TrainerCallback):
|
| 274 |
+
def on_step_end(self, args, state, control, **kwargs):
|
| 275 |
+
step = state.global_step
|
| 276 |
+
if step in [300, 600]:
|
| 277 |
+
trainer.train_dataset = make_dataset(step)
|
| 278 |
+
print(f"\nCurriculum advanced at step {step}!")
|
| 279 |
+
if WANDB_API_KEY:
|
| 280 |
+
wandb.log({"curriculum/step": step})
|
| 281 |
+
|
| 282 |
+
trainer.add_callback(CurriculumCallback())
|
| 283 |
+
|
| 284 |
+
# ── Train ─────────────────────────────────────────────────────────────────────
|
| 285 |
+
print(f"\nStarting GRPO training. Max steps: {MAX_STEPS}")
|
| 286 |
+
print(f"Baseline solve rate: {baseline['solve_rate']:.1%} — target: >60% after training")
|
| 287 |
+
trainer.train(resume_from_checkpoint=args.resume)
|
| 288 |
+
|
| 289 |
+
# ── Post-training evaluation ──────────────────────────────────────────────────
|
| 290 |
+
FastLanguageModel.for_inference(model)
|
| 291 |
+
bugs = load_bugs(1)[:20]
|
| 292 |
+
post_rewards = []
|
| 293 |
+
post_solved = 0
|
| 294 |
+
for bug in bugs:
|
| 295 |
+
prompt = bug_to_prompt(bug)
|
| 296 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
| 297 |
+
with torch.no_grad():
|
| 298 |
+
out = model.generate(**inputs, max_new_tokens=400, temperature=0.1, do_sample=False)
|
| 299 |
+
completion = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
| 300 |
+
r = reward_fn([completion], [prompt], bug_metadata=[bug])
|
| 301 |
+
post_rewards.append(r[0])
|
| 302 |
+
if r[0] > 0.30:
|
| 303 |
+
post_solved += 1
|
| 304 |
+
|
| 305 |
+
post_solve_rate = post_solved / max(len(bugs), 1)
|
| 306 |
+
print(f"\n{'='*60}")
|
| 307 |
+
print(f"RESULTS:")
|
| 308 |
+
print(f"Before training: {baseline['solve_rate']:.1%} solve rate")
|
| 309 |
+
print(f"After training: {post_solve_rate:.1%} solve rate")
|
| 310 |
+
print(f"Improvement: +{post_solve_rate - baseline['solve_rate']:.1%}")
|
| 311 |
+
print(f"{'='*60}")
|
| 312 |
+
|
| 313 |
+
if WANDB_API_KEY:
|
| 314 |
+
wandb.log({"final/solve_rate": post_solve_rate, "final/improvement": post_solve_rate - baseline["solve_rate"]})
|
| 315 |
+
wandb.finish()
|
| 316 |
+
|
| 317 |
+
# ── Save and push ─────────────────────────────────────────────────────────────
|
| 318 |
+
model.save_pretrained("./final_model")
|
| 319 |
+
tokenizer.save_pretrained("./final_model")
|
| 320 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 321 |
+
if HF_TOKEN and not args.test:
|
| 322 |
+
model.push_to_hub(HF_REPO, token=HF_TOKEN)
|
| 323 |
+
tokenizer.push_to_hub(HF_REPO, token=HF_TOKEN)
|
| 324 |
+
print(f"Pushed to https://huggingface.co/{HF_REPO}")
|
uv.lock
CHANGED
|
@@ -25,11 +25,11 @@ requires-dist = [
|
|
| 25 |
{ name = "httpx", specifier = "==0.27.0" },
|
| 26 |
{ name = "openai", specifier = "==2.7.2" },
|
| 27 |
{ name = "openenv-core", specifier = ">=0.2.0" },
|
| 28 |
-
{ name = "pydantic", specifier = "=
|
| 29 |
{ name = "pytest", specifier = "==8.1.0" },
|
| 30 |
{ name = "python-dotenv", specifier = "==1.0.1" },
|
| 31 |
{ name = "requests", specifier = "==2.31.0" },
|
| 32 |
-
{ name = "restrictedpython", specifier = "==7.
|
| 33 |
{ name = "uvicorn", specifier = "==0.29.0" },
|
| 34 |
]
|
| 35 |
|
|
@@ -541,73 +541,133 @@ wheels = [
|
|
| 541 |
|
| 542 |
[[package]]
|
| 543 |
name = "pydantic"
|
| 544 |
-
version = "2.
|
| 545 |
source = { registry = "https://pypi.org/simple" }
|
| 546 |
dependencies = [
|
| 547 |
{ name = "annotated-types" },
|
| 548 |
{ name = "pydantic-core" },
|
| 549 |
{ name = "typing-extensions" },
|
|
|
|
| 550 |
]
|
| 551 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 552 |
wheels = [
|
| 553 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 554 |
]
|
| 555 |
|
| 556 |
[[package]]
|
| 557 |
name = "pydantic-core"
|
| 558 |
-
version = "2.
|
| 559 |
source = { registry = "https://pypi.org/simple" }
|
| 560 |
dependencies = [
|
| 561 |
{ name = "typing-extensions" },
|
| 562 |
]
|
| 563 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 564 |
-
wheels = [
|
| 565 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 566 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 567 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 568 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 569 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 570 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 571 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 572 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 573 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 574 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 575 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 576 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 577 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 578 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 579 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 580 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 581 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 582 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 583 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 584 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 585 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 586 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 587 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 588 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 589 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 590 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 591 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 592 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 593 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 594 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 595 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 596 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 597 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 598 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 599 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 600 |
-
{ url = "https://files.pythonhosted.org/packages/d5/
|
| 601 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 602 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 603 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 604 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 605 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 606 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 607 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 608 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 609 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 610 |
-
{ url = "https://files.pythonhosted.org/packages/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
]
|
| 612 |
|
| 613 |
[[package]]
|
|
@@ -726,11 +786,11 @@ wheels = [
|
|
| 726 |
|
| 727 |
[[package]]
|
| 728 |
name = "restrictedpython"
|
| 729 |
-
version = "7.
|
| 730 |
source = { registry = "https://pypi.org/simple" }
|
| 731 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 732 |
wheels = [
|
| 733 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 734 |
]
|
| 735 |
|
| 736 |
[[package]]
|
|
@@ -875,6 +935,18 @@ wheels = [
|
|
| 875 |
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
|
| 876 |
]
|
| 877 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 878 |
[[package]]
|
| 879 |
name = "urllib3"
|
| 880 |
version = "2.6.3"
|
|
|
|
| 25 |
{ name = "httpx", specifier = "==0.27.0" },
|
| 26 |
{ name = "openai", specifier = "==2.7.2" },
|
| 27 |
{ name = "openenv-core", specifier = ">=0.2.0" },
|
| 28 |
+
{ name = "pydantic", specifier = ">=2.9.0" },
|
| 29 |
{ name = "pytest", specifier = "==8.1.0" },
|
| 30 |
{ name = "python-dotenv", specifier = "==1.0.1" },
|
| 31 |
{ name = "requests", specifier = "==2.31.0" },
|
| 32 |
+
{ name = "restrictedpython", specifier = "==7.0" },
|
| 33 |
{ name = "uvicorn", specifier = "==0.29.0" },
|
| 34 |
]
|
| 35 |
|
|
|
|
| 541 |
|
| 542 |
[[package]]
|
| 543 |
name = "pydantic"
|
| 544 |
+
version = "2.13.3"
|
| 545 |
source = { registry = "https://pypi.org/simple" }
|
| 546 |
dependencies = [
|
| 547 |
{ name = "annotated-types" },
|
| 548 |
{ name = "pydantic-core" },
|
| 549 |
{ name = "typing-extensions" },
|
| 550 |
+
{ name = "typing-inspection" },
|
| 551 |
]
|
| 552 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d9/e4/40d09941a2cebcb20609b86a559817d5b9291c49dd6f8c87e5feffbe703a/pydantic-2.13.3.tar.gz", hash = "sha256:af09e9d1d09f4e7fe37145c1f577e1d61ceb9a41924bf0094a36506285d0a84d", size = 844068, upload-time = "2026-04-20T14:46:43.632Z" }
|
| 553 |
wheels = [
|
| 554 |
+
{ url = "https://files.pythonhosted.org/packages/f3/0a/fd7d723f8f8153418fb40cf9c940e82004fce7e987026b08a68a36dd3fe7/pydantic-2.13.3-py3-none-any.whl", hash = "sha256:6db14ac8dfc9a1e57f87ea2c0de670c251240f43cb0c30a5130e9720dc612927", size = 471981, upload-time = "2026-04-20T14:46:41.402Z" },
|
| 555 |
]
|
| 556 |
|
| 557 |
[[package]]
|
| 558 |
name = "pydantic-core"
|
| 559 |
+
version = "2.46.3"
|
| 560 |
source = { registry = "https://pypi.org/simple" }
|
| 561 |
dependencies = [
|
| 562 |
{ name = "typing-extensions" },
|
| 563 |
]
|
| 564 |
+
sdist = { url = "https://files.pythonhosted.org/packages/2a/ef/f7abb56c49382a246fd2ce9c799691e3c3e7175ec74b14d99e798bcddb1a/pydantic_core-2.46.3.tar.gz", hash = "sha256:41c178f65b8c29807239d47e6050262eb6bf84eb695e41101e62e38df4a5bc2c", size = 471412, upload-time = "2026-04-20T14:40:56.672Z" }
|
| 565 |
+
wheels = [
|
| 566 |
+
{ url = "https://files.pythonhosted.org/packages/22/98/b50eb9a411e87483b5c65dba4fa430a06bac4234d3403a40e5a9905ebcd0/pydantic_core-2.46.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:1da3786b8018e60349680720158cc19161cc3b4bdd815beb0a321cd5ce1ad5b1", size = 2108971, upload-time = "2026-04-20T14:43:51.945Z" },
|
| 567 |
+
{ url = "https://files.pythonhosted.org/packages/08/4b/f364b9d161718ff2217160a4b5d41ce38de60aed91c3689ebffa1c939d23/pydantic_core-2.46.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc0988cb29d21bf4a9d5cf2ef970b5c0e38d8d8e107a493278c05dc6c1dda69f", size = 1949588, upload-time = "2026-04-20T14:44:10.386Z" },
|
| 568 |
+
{ url = "https://files.pythonhosted.org/packages/8f/8b/30bd03ee83b2f5e29f5ba8e647ab3c456bf56f2ec72fdbcc0215484a0854/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f9067c3bfadd04c55484b89c0d267981b2f3512850f6f66e1e74204a4e4ce3", size = 1975986, upload-time = "2026-04-20T14:43:57.106Z" },
|
| 569 |
+
{ url = "https://files.pythonhosted.org/packages/3c/54/13ccf954d84ec275d5d023d5786e4aa48840bc9f161f2838dc98e1153518/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a642ac886ecf6402d9882d10c405dcf4b902abeb2972cd5fb4a48c83cd59279a", size = 2055830, upload-time = "2026-04-20T14:44:15.499Z" },
|
| 570 |
+
{ url = "https://files.pythonhosted.org/packages/be/0e/65f38125e660fdbd72aa858e7dfae893645cfa0e7b13d333e174a367cd23/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79f561438481f28681584b89e2effb22855e2179880314bcddbf5968e935e807", size = 2222340, upload-time = "2026-04-20T14:41:51.353Z" },
|
| 571 |
+
{ url = "https://files.pythonhosted.org/packages/d1/88/f3ab7739efe0e7e80777dbb84c59eb98518e3f57ea433206194c2e425272/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57a973eae4665352a47cf1a99b4ee864620f2fe663a217d7a8da68a1f3a5bfda", size = 2280727, upload-time = "2026-04-20T14:41:30.461Z" },
|
| 572 |
+
{ url = "https://files.pythonhosted.org/packages/2a/6d/c228219080817bec4982f9531cadb18da6aaa770fdeb114f49c237ac2c9f/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83d002b97072a53ea150d63e0a3adfae5670cef5aa8a6e490240e482d3b22e57", size = 2092158, upload-time = "2026-04-20T14:44:07.305Z" },
|
| 573 |
+
{ url = "https://files.pythonhosted.org/packages/0f/b1/525a16711e7c6d61635fac3b0bd54600b5c5d9f60c6fc5aaab26b64a2297/pydantic_core-2.46.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:b40ddd51e7c44b28cfaef746c9d3c506d658885e0a46f9eeef2ee815cbf8e045", size = 2116626, upload-time = "2026-04-20T14:42:34.118Z" },
|
| 574 |
+
{ url = "https://files.pythonhosted.org/packages/ef/7c/17d30673351439a6951bf54f564cf2443ab00ae264ec9df00e2efd710eb5/pydantic_core-2.46.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac5ec7fb9b87f04ee839af2d53bcadea57ded7d229719f56c0ed895bff987943", size = 2160691, upload-time = "2026-04-20T14:41:14.023Z" },
|
| 575 |
+
{ url = "https://files.pythonhosted.org/packages/86/66/af8adbcbc0886ead7f1a116606a534d75a307e71e6e08226000d51b880d2/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a3b11c812f61b3129c4905781a2601dfdfdea5fe1e6c1cfb696b55d14e9c054f", size = 2182543, upload-time = "2026-04-20T14:40:48.886Z" },
|
| 576 |
+
{ url = "https://files.pythonhosted.org/packages/b0/37/6de71e0f54c54a4190010f57deb749e1ddf75c568ada3b1320b70067f121/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:1108da631e602e5b3c38d6d04fe5bb3bfa54349e6918e3ca6cf570b2e2b2f9d4", size = 2324513, upload-time = "2026-04-20T14:42:36.121Z" },
|
| 577 |
+
{ url = "https://files.pythonhosted.org/packages/51/b1/9fc74ce94f603d5ef59ff258ca9c2c8fb902fb548d340a96f77f4d1c3b7f/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:de885175515bcfa98ae618c1df7a072f13d179f81376c8007112af20567fd08a", size = 2361853, upload-time = "2026-04-20T14:43:24.886Z" },
|
| 578 |
+
{ url = "https://files.pythonhosted.org/packages/40/d0/4c652fc592db35f100279ee751d5a145aca1b9a7984b9684ba7c1b5b0535/pydantic_core-2.46.3-cp310-cp310-win32.whl", hash = "sha256:d11058e3201527d41bc6b545c79187c9e4bf85e15a236a6007f0e991518882b7", size = 1980465, upload-time = "2026-04-20T14:44:46.239Z" },
|
| 579 |
+
{ url = "https://files.pythonhosted.org/packages/27/b8/a920453c38afbe1f355e1ea0b0d94a0a3e0b0879d32d793108755fa171d5/pydantic_core-2.46.3-cp310-cp310-win_amd64.whl", hash = "sha256:3612edf65c8ea67ac13616c4d23af12faef1ae435a8a93e5934c2a0cbbdd1fd6", size = 2073884, upload-time = "2026-04-20T14:43:01.201Z" },
|
| 580 |
+
{ url = "https://files.pythonhosted.org/packages/22/a2/1ba90a83e85a3f94c796b184f3efde9c72f2830dcda493eea8d59ba78e6d/pydantic_core-2.46.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ab124d49d0459b2373ecf54118a45c28a1e6d4192a533fbc915e70f556feb8e5", size = 2106740, upload-time = "2026-04-20T14:41:20.932Z" },
|
| 581 |
+
{ url = "https://files.pythonhosted.org/packages/b6/f6/99ae893c89a0b9d3daec9f95487aa676709aa83f67643b3f0abaf4ab628a/pydantic_core-2.46.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cca67d52a5c7a16aed2b3999e719c4bcf644074eac304a5d3d62dd70ae7d4b2c", size = 1948293, upload-time = "2026-04-20T14:43:42.115Z" },
|
| 582 |
+
{ url = "https://files.pythonhosted.org/packages/3e/b8/2e8e636dc9e3f16c2e16bf0849e24be82c5ee82c603c65fc0326666328fc/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c024e08c0ba23e6fd68c771a521e9d6a792f2ebb0fa734296b36394dc30390e", size = 1973222, upload-time = "2026-04-20T14:41:57.841Z" },
|
| 583 |
+
{ url = "https://files.pythonhosted.org/packages/34/36/0e730beec4d83c5306f417afbd82ff237d9a21e83c5edf675f31ed84c1fe/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6645ce7eec4928e29a1e3b3d5c946621d105d3e79f0c9cddf07c2a9770949287", size = 2053852, upload-time = "2026-04-20T14:40:43.077Z" },
|
| 584 |
+
{ url = "https://files.pythonhosted.org/packages/4b/f0/3071131f47e39136a17814576e0fada9168569f7f8c0e6ac4d1ede6a4958/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a712c7118e6c5ea96562f7b488435172abb94a3c53c22c9efc1412264a45cbbe", size = 2221134, upload-time = "2026-04-20T14:43:03.349Z" },
|
| 585 |
+
{ url = "https://files.pythonhosted.org/packages/2f/a9/a2dc023eec5aa4b02a467874bad32e2446957d2adcab14e107eab502e978/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69a868ef3ff206343579021c40faf3b1edc64b1cc508ff243a28b0a514ccb050", size = 2279785, upload-time = "2026-04-20T14:41:19.285Z" },
|
| 586 |
+
{ url = "https://files.pythonhosted.org/packages/0a/44/93f489d16fb63fbd41c670441536541f6e8cfa1e5a69f40bc9c5d30d8c90/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc7e8c32db809aa0f6ea1d6869ebc8518a65d5150fdfad8bcae6a49ae32a22e2", size = 2089404, upload-time = "2026-04-20T14:43:10.108Z" },
|
| 587 |
+
{ url = "https://files.pythonhosted.org/packages/2a/78/8692e3aa72b2d004f7a5d937f1dfdc8552ba26caf0bec75f342c40f00dec/pydantic_core-2.46.3-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:3481bd1341dc85779ee506bc8e1196a277ace359d89d28588a9468c3ecbe63fa", size = 2114898, upload-time = "2026-04-20T14:44:51.475Z" },
|
| 588 |
+
{ url = "https://files.pythonhosted.org/packages/6a/62/e83133f2e7832532060175cebf1f13748f4c7e7e7165cdd1f611f174494b/pydantic_core-2.46.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8690eba565c6d68ffd3a8655525cbdd5246510b44a637ee2c6c03a7ebfe64d3c", size = 2157856, upload-time = "2026-04-20T14:43:46.64Z" },
|
| 589 |
+
{ url = "https://files.pythonhosted.org/packages/6d/ec/6a500e3ad7718ee50583fae79c8651f5d37e3abce1fa9ae177ae65842c53/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4de88889d7e88d50d40ee5b39d5dac0bcaef9ba91f7e536ac064e6b2834ecccf", size = 2180168, upload-time = "2026-04-20T14:42:00.302Z" },
|
| 590 |
+
{ url = "https://files.pythonhosted.org/packages/d8/53/8267811054b1aa7fc1dc7ded93812372ef79a839f5e23558136a6afbfde1/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:e480080975c1ef7f780b8f99ed72337e7cc5efea2e518a20a692e8e7b278eb8b", size = 2322885, upload-time = "2026-04-20T14:41:05.253Z" },
|
| 591 |
+
{ url = "https://files.pythonhosted.org/packages/c8/c1/1c0acdb3aa0856ddc4ecc55214578f896f2de16f400cf51627eb3c26c1c4/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de3a5c376f8cd94da9a1b8fd3dd1c16c7a7b216ed31dc8ce9fd7a22bf13b836e", size = 2360328, upload-time = "2026-04-20T14:41:43.991Z" },
|
| 592 |
+
{ url = "https://files.pythonhosted.org/packages/f0/d0/ef39cd0f4a926814f360e71c1adeab48ad214d9727e4deb48eedfb5bce1a/pydantic_core-2.46.3-cp311-cp311-win32.whl", hash = "sha256:fc331a5314ffddd5385b9ee9d0d2fee0b13c27e0e02dad71b1ae5d6561f51eeb", size = 1979464, upload-time = "2026-04-20T14:43:12.215Z" },
|
| 593 |
+
{ url = "https://files.pythonhosted.org/packages/18/9c/f41951b0d858e343f1cf09398b2a7b3014013799744f2c4a8ad6a3eec4f2/pydantic_core-2.46.3-cp311-cp311-win_amd64.whl", hash = "sha256:b5b9c6cf08a8a5e502698f5e153056d12c34b8fb30317e0c5fd06f45162a6346", size = 2070837, upload-time = "2026-04-20T14:41:47.707Z" },
|
| 594 |
+
{ url = "https://files.pythonhosted.org/packages/9f/1e/264a17cd582f6ed50950d4d03dd5fefd84e570e238afe1cb3e25cf238769/pydantic_core-2.46.3-cp311-cp311-win_arm64.whl", hash = "sha256:5dfd51cf457482f04ec49491811a2b8fd5b843b64b11eecd2d7a1ee596ea78a6", size = 2053647, upload-time = "2026-04-20T14:42:27.535Z" },
|
| 595 |
+
{ url = "https://files.pythonhosted.org/packages/4b/cb/5b47425556ecc1f3fe18ed2a0083188aa46e1dd812b06e406475b3a5d536/pydantic_core-2.46.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b11b59b3eee90a80a36701ddb4576d9ae31f93f05cb9e277ceaa09e6bf074a67", size = 2101946, upload-time = "2026-04-20T14:40:52.581Z" },
|
| 596 |
+
{ url = "https://files.pythonhosted.org/packages/a1/4f/2fb62c2267cae99b815bbf4a7b9283812c88ca3153ef29f7707200f1d4e5/pydantic_core-2.46.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af8653713055ea18a3abc1537fe2ebc42f5b0bbb768d1eb79fd74eb47c0ac089", size = 1951612, upload-time = "2026-04-20T14:42:42.996Z" },
|
| 597 |
+
{ url = "https://files.pythonhosted.org/packages/50/6e/b7348fd30d6556d132cddd5bd79f37f96f2601fe0608afac4f5fb01ec0b3/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75a519dab6d63c514f3a81053e5266c549679e4aa88f6ec57f2b7b854aceb1b0", size = 1977027, upload-time = "2026-04-20T14:42:02.001Z" },
|
| 598 |
+
{ url = "https://files.pythonhosted.org/packages/82/11/31d60ee2b45540d3fb0b29302a393dbc01cd771c473f5b5147bcd353e593/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6cd87cb1575b1ad05ba98894c5b5c96411ef678fa2f6ed2576607095b8d9789", size = 2063008, upload-time = "2026-04-20T14:44:17.952Z" },
|
| 599 |
+
{ url = "https://files.pythonhosted.org/packages/8a/db/3a9d1957181b59258f44a2300ab0f0be9d1e12d662a4f57bb31250455c52/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f80a55484b8d843c8ada81ebf70a682f3f00a3d40e378c06cf17ecb44d280d7d", size = 2233082, upload-time = "2026-04-20T14:40:57.934Z" },
|
| 600 |
+
{ url = "https://files.pythonhosted.org/packages/9c/e1/3277c38792aeb5cfb18c2f0c5785a221d9ff4e149abbe1184d53d5f72273/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3861f1731b90c50a3266316b9044f5c9b405eecb8e299b0a7120596334e4fe9c", size = 2304615, upload-time = "2026-04-20T14:42:12.584Z" },
|
| 601 |
+
{ url = "https://files.pythonhosted.org/packages/5e/d5/e3d9717c9eba10855325650afd2a9cba8e607321697f18953af9d562da2f/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb528e295ed31570ac3dcc9bfdd6e0150bc11ce6168ac87a8082055cf1a67395", size = 2094380, upload-time = "2026-04-20T14:43:05.522Z" },
|
| 602 |
+
{ url = "https://files.pythonhosted.org/packages/a1/20/abac35dedcbfd66c6f0b03e4e3564511771d6c9b7ede10a362d03e110d9b/pydantic_core-2.46.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:367508faa4973b992b271ba1494acaab36eb7e8739d1e47be5035fb1ea225396", size = 2135429, upload-time = "2026-04-20T14:41:55.549Z" },
|
| 603 |
+
{ url = "https://files.pythonhosted.org/packages/6c/a5/41bfd1df69afad71b5cf0535055bccc73022715ad362edbc124bc1e021d7/pydantic_core-2.46.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ad3c826fe523e4becf4fe39baa44286cff85ef137c729a2c5e269afbfd0905d", size = 2174582, upload-time = "2026-04-20T14:41:45.96Z" },
|
| 604 |
+
{ url = "https://files.pythonhosted.org/packages/79/65/38d86ea056b29b2b10734eb23329b7a7672ca604df4f2b6e9c02d4ee22fe/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ec638c5d194ef8af27db69f16c954a09797c0dc25015ad6123eb2c73a4d271ca", size = 2187533, upload-time = "2026-04-20T14:40:55.367Z" },
|
| 605 |
+
{ url = "https://files.pythonhosted.org/packages/b6/55/a1129141678a2026badc539ad1dee0a71d06f54c2f06a4bd68c030ac781b/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:28ed528c45446062ee66edb1d33df5d88828ae167de76e773a3c7f64bd14e976", size = 2332985, upload-time = "2026-04-20T14:44:13.05Z" },
|
| 606 |
+
{ url = "https://files.pythonhosted.org/packages/d7/60/cb26f4077719f709e54819f4e8e1d43f4091f94e285eb6bd21e1190a7b7c/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aed19d0c783886d5bd86d80ae5030006b45e28464218747dcf83dabfdd092c7b", size = 2373670, upload-time = "2026-04-20T14:41:53.421Z" },
|
| 607 |
+
{ url = "https://files.pythonhosted.org/packages/6b/7e/c3f21882bdf1d8d086876f81b5e296206c69c6082551d776895de7801fa0/pydantic_core-2.46.3-cp312-cp312-win32.whl", hash = "sha256:06d5d8820cbbdb4147578c1fe7ffcd5b83f34508cb9f9ab76e807be7db6ff0a4", size = 1966722, upload-time = "2026-04-20T14:44:30.588Z" },
|
| 608 |
+
{ url = "https://files.pythonhosted.org/packages/57/be/6b5e757b859013ebfbd7adba02f23b428f37c86dcbf78b5bb0b4ffd36e99/pydantic_core-2.46.3-cp312-cp312-win_amd64.whl", hash = "sha256:c3212fda0ee959c1dd04c60b601ec31097aaa893573a3a1abd0a47bcac2968c1", size = 2072970, upload-time = "2026-04-20T14:42:54.248Z" },
|
| 609 |
+
{ url = "https://files.pythonhosted.org/packages/bf/f8/a989b21cc75e9a32d24192ef700eea606521221a89faa40c919ce884f2b1/pydantic_core-2.46.3-cp312-cp312-win_arm64.whl", hash = "sha256:f1f8338dd7a7f31761f1f1a3c47503a9a3b34eea3c8b01fa6ee96408affb5e72", size = 2035963, upload-time = "2026-04-20T14:44:20.4Z" },
|
| 610 |
+
{ url = "https://files.pythonhosted.org/packages/9b/3c/9b5e8eb9821936d065439c3b0fb1490ffa64163bfe7e1595985a47896073/pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:12bc98de041458b80c86c56b24df1d23832f3e166cbaff011f25d187f5c62c37", size = 2102109, upload-time = "2026-04-20T14:41:24.219Z" },
|
| 611 |
+
{ url = "https://files.pythonhosted.org/packages/91/97/1c41d1f5a19f241d8069f1e249853bcce378cdb76eec8ab636d7bc426280/pydantic_core-2.46.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:85348b8f89d2c3508b65b16c3c33a4da22b8215138d8b996912bb1532868885f", size = 1951820, upload-time = "2026-04-20T14:42:14.236Z" },
|
| 612 |
+
{ url = "https://files.pythonhosted.org/packages/30/b4/d03a7ae14571bc2b6b3c7b122441154720619afe9a336fa3a95434df5e2f/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1105677a6df914b1fb71a81b96c8cce7726857e1717d86001f29be06a25ee6f8", size = 1977785, upload-time = "2026-04-20T14:42:31.648Z" },
|
| 613 |
+
{ url = "https://files.pythonhosted.org/packages/ae/0c/4086f808834b59e3c8f1aa26df8f4b6d998cdcf354a143d18ef41529d1fe/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87082cd65669a33adeba5470769e9704c7cf026cc30afb9cc77fd865578ebaad", size = 2062761, upload-time = "2026-04-20T14:40:37.093Z" },
|
| 614 |
+
{ url = "https://files.pythonhosted.org/packages/fa/71/a649be5a5064c2df0db06e0a512c2281134ed2fcc981f52a657936a7527c/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e5f66e12c4f5212d08522963380eaaeac5ebd795826cfd19b2dfb0c7a52b9c", size = 2232989, upload-time = "2026-04-20T14:42:59.254Z" },
|
| 615 |
+
{ url = "https://files.pythonhosted.org/packages/a2/84/7756e75763e810b3a710f4724441d1ecc5883b94aacb07ca71c5fb5cfb69/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6cdf19bf84128d5e7c37e8a73a0c5c10d51103a650ac585d42dd6ae233f2b7f", size = 2303975, upload-time = "2026-04-20T14:41:32.287Z" },
|
| 616 |
+
{ url = "https://files.pythonhosted.org/packages/6c/35/68a762e0c1e31f35fa0dac733cbd9f5b118042853698de9509c8e5bf128b/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031bb17f4885a43773c8c763089499f242aee2ea85cf17154168775dccdecf35", size = 2095325, upload-time = "2026-04-20T14:42:47.685Z" },
|
| 617 |
+
{ url = "https://files.pythonhosted.org/packages/77/bf/1bf8c9a8e91836c926eae5e3e51dce009bf495a60ca56060689d3df3f340/pydantic_core-2.46.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:bcf2a8b2982a6673693eae7348ef3d8cf3979c1d63b54fca7c397a635cc68687", size = 2133368, upload-time = "2026-04-20T14:41:22.766Z" },
|
| 618 |
+
{ url = "https://files.pythonhosted.org/packages/e5/50/87d818d6bab915984995157ceb2380f5aac4e563dddbed6b56f0ed057aba/pydantic_core-2.46.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28e8cf2f52d72ced402a137145923a762cbb5081e48b34312f7a0c8f55928ec3", size = 2173908, upload-time = "2026-04-20T14:42:52.044Z" },
|
| 619 |
+
{ url = "https://files.pythonhosted.org/packages/91/88/a311fb306d0bd6185db41fa14ae888fb81d0baf648a761ae760d30819d33/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:17eaface65d9fc5abb940003020309c1bf7a211f5f608d7870297c367e6f9022", size = 2186422, upload-time = "2026-04-20T14:43:29.55Z" },
|
| 620 |
+
{ url = "https://files.pythonhosted.org/packages/8f/79/28fd0d81508525ab2054fef7c77a638c8b5b0afcbbaeee493cf7c3fef7e1/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:93fd339f23408a07e98950a89644f92c54d8729719a40b30c0a30bb9ebc55d23", size = 2332709, upload-time = "2026-04-20T14:42:16.134Z" },
|
| 621 |
+
{ url = "https://files.pythonhosted.org/packages/b3/21/795bf5fe5c0f379308b8ef19c50dedab2e7711dbc8d0c2acf08f1c7daa05/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:23cbdb3aaa74dfe0837975dbf69b469753bbde8eacace524519ffdb6b6e89eb7", size = 2372428, upload-time = "2026-04-20T14:41:10.974Z" },
|
| 622 |
+
{ url = "https://files.pythonhosted.org/packages/45/b3/ed14c659cbe7605e3ef063077680a64680aec81eb1a04763a05190d49b7f/pydantic_core-2.46.3-cp313-cp313-win32.whl", hash = "sha256:610eda2e3838f401105e6326ca304f5da1e15393ae25dacae5c5c63f2c275b13", size = 1965601, upload-time = "2026-04-20T14:41:42.128Z" },
|
| 623 |
+
{ url = "https://files.pythonhosted.org/packages/ef/bb/adb70d9a762ddd002d723fbf1bd492244d37da41e3af7b74ad212609027e/pydantic_core-2.46.3-cp313-cp313-win_amd64.whl", hash = "sha256:68cc7866ed863db34351294187f9b729964c371ba33e31c26f478471c52e1ed0", size = 2071517, upload-time = "2026-04-20T14:43:36.096Z" },
|
| 624 |
+
{ url = "https://files.pythonhosted.org/packages/52/eb/66faefabebfe68bd7788339c9c9127231e680b11906368c67ce112fdb47f/pydantic_core-2.46.3-cp313-cp313-win_arm64.whl", hash = "sha256:f64b5537ac62b231572879cd08ec05600308636a5d63bcbdb15063a466977bec", size = 2035802, upload-time = "2026-04-20T14:43:38.507Z" },
|
| 625 |
+
{ url = "https://files.pythonhosted.org/packages/7f/db/a7bcb4940183fda36022cd18ba8dd12f2dff40740ec7b58ce7457befa416/pydantic_core-2.46.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:afa3aa644f74e290cdede48a7b0bee37d1c35e71b05105f6b340d484af536d9b", size = 2097614, upload-time = "2026-04-20T14:44:38.374Z" },
|
| 626 |
+
{ url = "https://files.pythonhosted.org/packages/24/35/e4066358a22e3e99519db370494c7528f5a2aa1367370e80e27e20283543/pydantic_core-2.46.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ced3310e51aa425f7f77da8bbbb5212616655bedbe82c70944320bc1dbe5e018", size = 1951896, upload-time = "2026-04-20T14:40:53.996Z" },
|
| 627 |
+
{ url = "https://files.pythonhosted.org/packages/87/92/37cf4049d1636996e4b888c05a501f40a43ff218983a551d57f9d5e14f0d/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e29908922ce9da1a30b4da490bd1d3d82c01dcfdf864d2a74aacee674d0bfa34", size = 1979314, upload-time = "2026-04-20T14:41:49.446Z" },
|
| 628 |
+
{ url = "https://files.pythonhosted.org/packages/d8/36/9ff4d676dfbdfb2d591cf43f3d90ded01e15b1404fd101180ed2d62a2fd3/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c9ff69140423eea8ed2d5477df3ba037f671f5e897d206d921bc9fdc39613e7", size = 2056133, upload-time = "2026-04-20T14:42:23.574Z" },
|
| 629 |
+
{ url = "https://files.pythonhosted.org/packages/bc/f0/405b442a4d7ba855b06eec8b2bf9c617d43b8432d099dfdc7bf999293495/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b675ab0a0d5b1c8fdb81195dc5bcefea3f3c240871cdd7ff9a2de8aa50772eb2", size = 2228726, upload-time = "2026-04-20T14:44:22.816Z" },
|
| 630 |
+
{ url = "https://files.pythonhosted.org/packages/e7/f8/65cd92dd5a0bd89ba277a98ecbfaf6fc36bbd3300973c7a4b826d6ab1391/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0087084960f209a9a4af50ecd1fb063d9ad3658c07bb81a7a53f452dacbfb2ba", size = 2301214, upload-time = "2026-04-20T14:44:48.792Z" },
|
| 631 |
+
{ url = "https://files.pythonhosted.org/packages/fd/86/ef96a4c6e79e7a2d0410826a68fbc0eccc0fd44aa733be199d5fcac3bb87/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed42e6cc8e1b0e2b9b96e2276bad70ae625d10d6d524aed0c93de974ae029f9f", size = 2099927, upload-time = "2026-04-20T14:41:40.196Z" },
|
| 632 |
+
{ url = "https://files.pythonhosted.org/packages/6d/53/269caf30e0096e0a8a8f929d1982a27b3879872cca2d917d17c2f9fdf4fe/pydantic_core-2.46.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:f1771ce258afb3e4201e67d154edbbae712a76a6081079fe247c2f53c6322c22", size = 2128789, upload-time = "2026-04-20T14:41:15.868Z" },
|
| 633 |
+
{ url = "https://files.pythonhosted.org/packages/00/b0/1a6d9b6a587e118482910c244a1c5acf4d192604174132efd12bf0ac486f/pydantic_core-2.46.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7610b6a5242a6c736d8ad47fd5fff87fcfe8f833b281b1c409c3d6835d9227f", size = 2173815, upload-time = "2026-04-20T14:44:25.152Z" },
|
| 634 |
+
{ url = "https://files.pythonhosted.org/packages/87/56/e7e00d4041a7e62b5a40815590114db3b535bf3ca0bf4dca9f16cef25246/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:ff5e7783bcc5476e1db448bf268f11cb257b1c276d3e89f00b5727be86dd0127", size = 2181608, upload-time = "2026-04-20T14:41:28.933Z" },
|
| 635 |
+
{ url = "https://files.pythonhosted.org/packages/e8/22/4bd23c3d41f7c185d60808a1de83c76cf5aeabf792f6c636a55c3b1ec7f9/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:9d2e32edcc143bc01e95300671915d9ca052d4f745aa0a49c48d4803f8a85f2c", size = 2326968, upload-time = "2026-04-20T14:42:03.962Z" },
|
| 636 |
+
{ url = "https://files.pythonhosted.org/packages/24/ac/66cd45129e3915e5ade3b292cb3bc7fd537f58f8f8dbdaba6170f7cabb74/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d83d1c6b87fa56b521479cff237e626a292f3b31b6345c15a99121b454c1", size = 2369842, upload-time = "2026-04-20T14:41:35.52Z" },
|
| 637 |
+
{ url = "https://files.pythonhosted.org/packages/a2/51/dd4248abb84113615473aa20d5545b7c4cd73c8644003b5259686f93996c/pydantic_core-2.46.3-cp314-cp314-win32.whl", hash = "sha256:07bc6d2a28c3adb4f7c6ae46aa4f2d2929af127f587ed44057af50bf1ce0f505", size = 1959661, upload-time = "2026-04-20T14:41:00.042Z" },
|
| 638 |
+
{ url = "https://files.pythonhosted.org/packages/20/eb/59980e5f1ae54a3b86372bd9f0fa373ea2d402e8cdcd3459334430f91e91/pydantic_core-2.46.3-cp314-cp314-win_amd64.whl", hash = "sha256:8940562319bc621da30714617e6a7eaa6b98c84e8c685bcdc02d7ed5e7c7c44e", size = 2071686, upload-time = "2026-04-20T14:43:16.471Z" },
|
| 639 |
+
{ url = "https://files.pythonhosted.org/packages/8c/db/1cf77e5247047dfee34bc01fa9bca134854f528c8eb053e144298893d370/pydantic_core-2.46.3-cp314-cp314-win_arm64.whl", hash = "sha256:5dcbbcf4d22210ced8f837c96db941bdb078f419543472aca5d9a0bb7cddc7df", size = 2026907, upload-time = "2026-04-20T14:43:31.732Z" },
|
| 640 |
+
{ url = "https://files.pythonhosted.org/packages/57/c0/b3df9f6a543276eadba0a48487b082ca1f201745329d97dbfa287034a230/pydantic_core-2.46.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d0fe3dce1e836e418f912c1ad91c73357d03e556a4d286f441bf34fed2dbeecf", size = 2095047, upload-time = "2026-04-20T14:42:37.982Z" },
|
| 641 |
+
{ url = "https://files.pythonhosted.org/packages/66/57/886a938073b97556c168fd99e1a7305bb363cd30a6d2c76086bf0587b32a/pydantic_core-2.46.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9ce92e58abc722dac1bf835a6798a60b294e48eb0e625ec9fd994b932ac5feee", size = 1934329, upload-time = "2026-04-20T14:43:49.655Z" },
|
| 642 |
+
{ url = "https://files.pythonhosted.org/packages/0b/7c/b42eaa5c34b13b07ecb51da21761297a9b8eb43044c864a035999998f328/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03e6467f0f5ab796a486146d1b887b2dc5e5f9b3288898c1b1c3ad974e53e4a", size = 1974847, upload-time = "2026-04-20T14:42:10.737Z" },
|
| 643 |
+
{ url = "https://files.pythonhosted.org/packages/e6/9b/92b42db6543e7de4f99ae977101a2967b63122d4b6cf7773812da2d7d5b5/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2798b6ba041b9d70acfb9071a2ea13c8456dd1e6a5555798e41ba7b0790e329c", size = 2041742, upload-time = "2026-04-20T14:40:44.262Z" },
|
| 644 |
+
{ url = "https://files.pythonhosted.org/packages/0f/19/46fbe1efabb5aa2834b43b9454e70f9a83ad9c338c1291e48bdc4fecf167/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9be3e221bdc6d69abf294dcf7aff6af19c31a5cdcc8f0aa3b14be29df4bd03b1", size = 2236235, upload-time = "2026-04-20T14:41:27.307Z" },
|
| 645 |
+
{ url = "https://files.pythonhosted.org/packages/77/da/b3f95bc009ad60ec53120f5d16c6faa8cabdbe8a20d83849a1f2b8728148/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f13936129ce841f2a5ddf6f126fea3c43cd128807b5a59588c37cf10178c2e64", size = 2282633, upload-time = "2026-04-20T14:44:33.271Z" },
|
| 646 |
+
{ url = "https://files.pythonhosted.org/packages/cc/6e/401336117722e28f32fb8220df676769d28ebdf08f2f4469646d404c43a3/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28b5f2ef03416facccb1c6ef744c69793175fd27e44ef15669201601cf423acb", size = 2109679, upload-time = "2026-04-20T14:44:41.065Z" },
|
| 647 |
+
{ url = "https://files.pythonhosted.org/packages/fc/53/b289f9bc8756a32fe718c46f55afaeaf8d489ee18d1a1e7be1db73f42cc4/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:830d1247d77ad23852314f069e9d7ddafeec5f684baf9d7e7065ed46a049c4e6", size = 2108342, upload-time = "2026-04-20T14:42:50.144Z" },
|
| 648 |
+
{ url = "https://files.pythonhosted.org/packages/10/5b/8292fc7c1f9111f1b2b7c1b0dcf1179edcd014fc3ea4517499f50b829d71/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0793c90c1a3c74966e7975eaef3ed30ebdff3260a0f815a62a22adc17e4c01c", size = 2157208, upload-time = "2026-04-20T14:42:08.133Z" },
|
| 649 |
+
{ url = "https://files.pythonhosted.org/packages/2b/9e/f80044e9ec07580f057a89fc131f78dda7a58751ddf52bbe05eaf31db50f/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d2d0aead851b66f5245ec0c4fb2612ef457f8bbafefdf65a2bf9d6bac6140f47", size = 2167237, upload-time = "2026-04-20T14:42:25.412Z" },
|
| 650 |
+
{ url = "https://files.pythonhosted.org/packages/f8/84/6781a1b037f3b96be9227edbd1101f6d3946746056231bf4ac48cdff1a8d/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:2f40e4246676beb31c5ce77c38a55ca4e465c6b38d11ea1bd935420568e0b1ab", size = 2312540, upload-time = "2026-04-20T14:40:40.313Z" },
|
| 651 |
+
{ url = "https://files.pythonhosted.org/packages/3e/db/19c0839feeb728e7df03255581f198dfdf1c2aeb1e174a8420b63c5252e5/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:cf489cf8986c543939aeee17a09c04d6ffb43bfef8ca16fcbcc5cfdcbed24dba", size = 2369556, upload-time = "2026-04-20T14:41:09.427Z" },
|
| 652 |
+
{ url = "https://files.pythonhosted.org/packages/e0/15/3228774cb7cd45f5f721ddf1b2242747f4eb834d0c491f0c02d606f09fed/pydantic_core-2.46.3-cp314-cp314t-win32.whl", hash = "sha256:ffe0883b56cfc05798bf994164d2b2ff03efe2d22022a2bb080f3b626176dd56", size = 1949756, upload-time = "2026-04-20T14:41:25.717Z" },
|
| 653 |
+
{ url = "https://files.pythonhosted.org/packages/b8/2a/c79cf53fd91e5a87e30d481809f52f9a60dd221e39de66455cf04deaad37/pydantic_core-2.46.3-cp314-cp314t-win_amd64.whl", hash = "sha256:706d9d0ce9cf4593d07270d8e9f53b161f90c57d315aeec4fb4fd7a8b10240d8", size = 2051305, upload-time = "2026-04-20T14:43:18.627Z" },
|
| 654 |
+
{ url = "https://files.pythonhosted.org/packages/0b/db/d8182a7f1d9343a032265aae186eb063fe26ca4c40f256b21e8da4498e89/pydantic_core-2.46.3-cp314-cp314t-win_arm64.whl", hash = "sha256:77706aeb41df6a76568434701e0917da10692da28cb69d5fb6919ce5fdb07374", size = 2026310, upload-time = "2026-04-20T14:41:01.778Z" },
|
| 655 |
+
{ url = "https://files.pythonhosted.org/packages/66/7f/03dbad45cd3aa9083fbc93c210ae8b005af67e4136a14186950a747c6874/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:9715525891ed524a0a1eb6d053c74d4d4ad5017677fb00af0b7c2644a31bae46", size = 2105683, upload-time = "2026-04-20T14:42:19.779Z" },
|
| 656 |
+
{ url = "https://files.pythonhosted.org/packages/26/22/4dc186ac8ea6b257e9855031f51b62a9637beac4d68ac06bee02f046f836/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:9d2f400712a99a013aff420ef1eb9be077f8189a36c1e3ef87660b4e1088a874", size = 1940052, upload-time = "2026-04-20T14:43:59.274Z" },
|
| 657 |
+
{ url = "https://files.pythonhosted.org/packages/0d/ca/d376391a5aff1f2e8188960d7873543608130a870961c2b6b5236627c116/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd2aab0e2e9dc2daf36bd2686c982535d5e7b1d930a1344a7bb6e82baab42a76", size = 1988172, upload-time = "2026-04-20T14:41:17.469Z" },
|
| 658 |
+
{ url = "https://files.pythonhosted.org/packages/0e/6b/523b9f85c23788755d6ab949329de692a2e3a584bc6beb67fef5e035aa9d/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e9d76736da5f362fabfeea6a69b13b7f2be405c6d6966f06b2f6bfff7e64531", size = 2128596, upload-time = "2026-04-20T14:40:41.707Z" },
|
| 659 |
+
{ url = "https://files.pythonhosted.org/packages/34/42/f426db557e8ab2791bc7562052299944a118655496fbff99914e564c0a94/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:b12dd51f1187c2eb489af8e20f880362db98e954b54ab792fa5d92e8bcc6b803", size = 2091877, upload-time = "2026-04-20T14:43:27.091Z" },
|
| 660 |
+
{ url = "https://files.pythonhosted.org/packages/5c/4f/86a832a9d14df58e663bfdf4627dc00d3317c2bd583c4fb23390b0f04b8e/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:f00a0961b125f1a47af7bcc17f00782e12f4cd056f83416006b30111d941dfa3", size = 1932428, upload-time = "2026-04-20T14:40:45.781Z" },
|
| 661 |
+
{ url = "https://files.pythonhosted.org/packages/11/1a/fe857968954d93fb78e0d4b6df5c988c74c4aaa67181c60be7cfe327c0ca/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57697d7c056aca4bbb680200f96563e841a6386ac1129370a0102592f4dddff5", size = 1997550, upload-time = "2026-04-20T14:44:02.425Z" },
|
| 662 |
+
{ url = "https://files.pythonhosted.org/packages/17/eb/9d89ad2d9b0ba8cd65393d434471621b98912abb10fbe1df08e480ba57b5/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd35aa21299def8db7ef4fe5c4ff862941a9a158ca7b63d61e66fe67d30416b4", size = 2137657, upload-time = "2026-04-20T14:42:45.149Z" },
|
| 663 |
+
{ url = "https://files.pythonhosted.org/packages/1f/da/99d40830684f81dec901cac521b5b91c095394cc1084b9433393cde1c2df/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:13afdd885f3d71280cf286b13b310ee0f7ccfefd1dbbb661514a474b726e2f25", size = 2107973, upload-time = "2026-04-20T14:42:06.175Z" },
|
| 664 |
+
{ url = "https://files.pythonhosted.org/packages/99/a5/87024121818d75bbb2a98ddbaf638e40e7a18b5e0f5492c9ca4b1b316107/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f91c0aff3e3ee0928edd1232c57f643a7a003e6edf1860bc3afcdc749cb513f3", size = 1947191, upload-time = "2026-04-20T14:43:14.319Z" },
|
| 665 |
+
{ url = "https://files.pythonhosted.org/packages/60/62/0c1acfe10945b83a6a59d19fbaa92f48825381509e5701b855c08f13db76/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6529d1d128321a58d30afcc97b49e98836542f68dd41b33c2e972bb9e5290536", size = 2123791, upload-time = "2026-04-20T14:43:22.766Z" },
|
| 666 |
+
{ url = "https://files.pythonhosted.org/packages/75/3e/3b2393b4c8f44285561dc30b00cf307a56a2eff7c483a824db3b8221ca51/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:975c267cff4f7e7272eacbe50f6cc03ca9a3da4c4fbd66fffd89c94c1e311aa1", size = 2153197, upload-time = "2026-04-20T14:44:27.932Z" },
|
| 667 |
+
{ url = "https://files.pythonhosted.org/packages/ba/75/5af02fb35505051eee727c061f2881c555ab4f8ddb2d42da715a42c9731b/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:2b8e4f2bbdf71415c544b4b1138b8060db7b6611bc927e8064c769f64bed651c", size = 2181073, upload-time = "2026-04-20T14:43:20.729Z" },
|
| 668 |
+
{ url = "https://files.pythonhosted.org/packages/10/92/7e0e1bd9ca3c68305db037560ca2876f89b2647deb2f8b6319005de37505/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e61ea8e9fff9606d09178f577ff8ccdd7206ff73d6552bcec18e1033c4254b85", size = 2315886, upload-time = "2026-04-20T14:44:04.826Z" },
|
| 669 |
+
{ url = "https://files.pythonhosted.org/packages/b8/d8/101655f27eaf3e44558ead736b2795d12500598beed4683f279396fa186e/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b504bda01bafc69b6d3c7a0c7f039dcf60f47fab70e06fe23f57b5c75bdc82b8", size = 2360528, upload-time = "2026-04-20T14:40:47.431Z" },
|
| 670 |
+
{ url = "https://files.pythonhosted.org/packages/07/0f/1c34a74c8d07136f0d729ffe5e1fdab04fbdaa7684f61a92f92511a84a15/pydantic_core-2.46.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b00b76f7142fc60c762ce579bd29c8fa44aaa56592dd3c54fab3928d0d4ca6ff", size = 2184144, upload-time = "2026-04-20T14:42:57Z" },
|
| 671 |
]
|
| 672 |
|
| 673 |
[[package]]
|
|
|
|
| 786 |
|
| 787 |
[[package]]
|
| 788 |
name = "restrictedpython"
|
| 789 |
+
version = "7.0"
|
| 790 |
source = { registry = "https://pypi.org/simple" }
|
| 791 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ce/7c/19254deb8d2e1a0eea74fe92c3dbd250b400aa853e027de6734fce7ea143/RestrictedPython-7.0.tar.gz", hash = "sha256:53704afbbc350fdc8fb245441367be671c9f8380869201b2e8452e74fce3db14", size = 447152, upload-time = "2023-11-17T07:19:15.173Z" }
|
| 792 |
wheels = [
|
| 793 |
+
{ url = "https://files.pythonhosted.org/packages/5b/85/f40474f97f71e4b7745641635157870f232ce9b7614814d7ce8b82586cb6/RestrictedPython-7.0-py3-none-any.whl", hash = "sha256:8bb40a822090bed9c7b814d69345b0796db70cc86715d141efc937862f37c6d2", size = 26693, upload-time = "2023-11-17T07:19:12.674Z" },
|
| 794 |
]
|
| 795 |
|
| 796 |
[[package]]
|
|
|
|
| 935 |
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
|
| 936 |
]
|
| 937 |
|
| 938 |
+
[[package]]
|
| 939 |
+
name = "typing-inspection"
|
| 940 |
+
version = "0.4.2"
|
| 941 |
+
source = { registry = "https://pypi.org/simple" }
|
| 942 |
+
dependencies = [
|
| 943 |
+
{ name = "typing-extensions" },
|
| 944 |
+
]
|
| 945 |
+
sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
|
| 946 |
+
wheels = [
|
| 947 |
+
{ url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
|
| 948 |
+
]
|
| 949 |
+
|
| 950 |
[[package]]
|
| 951 |
name = "urllib3"
|
| 952 |
version = "2.6.3"
|