Update app.py
Browse files
app.py
CHANGED
|
@@ -806,126 +806,157 @@ stack_opcodes_functionalities = "\n".join([
|
|
| 806 |
ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
|
| 807 |
|
| 808 |
# Helper function to extract JSON from LLM response
|
| 809 |
-
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
|
| 854 |
-
|
| 855 |
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
|
| 865 |
-
|
| 866 |
|
| 867 |
-
|
| 868 |
-
|
| 869 |
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
|
| 874 |
-
|
| 875 |
-
|
| 876 |
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
|
| 881 |
-
|
| 882 |
-
|
| 883 |
|
| 884 |
-
|
| 885 |
-
|
| 886 |
|
| 887 |
-
|
| 888 |
-
|
| 889 |
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
|
| 917 |
-
|
| 918 |
-
|
| 919 |
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 928 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
# def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 930 |
# # --- 1) Pull out the JSON code‑block if present ---
|
| 931 |
# md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
|
|
|
|
| 806 |
ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
|
| 807 |
|
| 808 |
# Helper function to extract JSON from LLM response
|
| 809 |
+
# def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 810 |
+
# """
|
| 811 |
+
# Improved JSON extraction with better error handling and validation
|
| 812 |
+
# """
|
| 813 |
+
# print(f"Raw LLM response: {raw_response[:200]}...")
|
| 814 |
|
| 815 |
+
# # Try to find JSON in code blocks first
|
| 816 |
+
# md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
|
| 817 |
+
# if md:
|
| 818 |
+
# json_string = md.group(1).strip()
|
| 819 |
+
# else:
|
| 820 |
+
# json_string = raw_response.strip()
|
| 821 |
+
|
| 822 |
+
# # Find the first complete JSON object (handle cases with multiple objects/arrays)
|
| 823 |
+
# first_brace = json_string.find('{')
|
| 824 |
+
# if first_brace == -1:
|
| 825 |
+
# print("No JSON object found in response")
|
| 826 |
+
# return {
|
| 827 |
+
# "refined_logic": {
|
| 828 |
+
# "name_variable": "No Code-blocks",
|
| 829 |
+
# "pseudocode": "No Code-blocks"
|
| 830 |
+
# }
|
| 831 |
+
# }
|
| 832 |
|
| 833 |
+
# # Find the matching closing brace for the first opening brace
|
| 834 |
+
# brace_count = 0
|
| 835 |
+
# last_brace = -1
|
| 836 |
+
# for i, char in enumerate(json_string[first_brace:], first_brace):
|
| 837 |
+
# if char == '{':
|
| 838 |
+
# brace_count += 1
|
| 839 |
+
# elif char == '}':
|
| 840 |
+
# brace_count -= 1
|
| 841 |
+
# if brace_count == 0:
|
| 842 |
+
# last_brace = i
|
| 843 |
+
# break
|
| 844 |
|
| 845 |
+
# if last_brace == -1:
|
| 846 |
+
# print("No matching closing brace found")
|
| 847 |
+
# return {
|
| 848 |
+
# "refined_logic": {
|
| 849 |
+
# "name_variable": "Parse Error",
|
| 850 |
+
# "pseudocode": "Malformed JSON"
|
| 851 |
+
# }
|
| 852 |
+
# }
|
| 853 |
|
| 854 |
+
# json_string = json_string[first_brace:last_brace+1]
|
| 855 |
|
| 856 |
+
# # Simple cleanup - just handle the most common issues
|
| 857 |
+
# # 1. Remove trailing commas
|
| 858 |
+
# json_string = re.sub(r',\s*}', '}', json_string)
|
| 859 |
+
# json_string = re.sub(r',\s*]', ']', json_string)
|
| 860 |
|
| 861 |
+
# # 2. Fix single quotes around simple values (not containing quotes)
|
| 862 |
+
# json_string = re.sub(r"'([^'\"]*)'(\s*:)", r'"\1"\2', json_string) # Keys
|
| 863 |
+
# json_string = re.sub(r"(:\s*)'([^'\"]*)'(\s*[,}])", r'\1"\2"\3', json_string) # Simple values
|
| 864 |
|
| 865 |
+
# print(f"Cleaned JSON string: {json_string[:200]}...")
|
| 866 |
|
| 867 |
+
# try:
|
| 868 |
+
# parsed = json.loads(json_string)
|
| 869 |
|
| 870 |
+
# # Validate the expected structure
|
| 871 |
+
# if not isinstance(parsed, dict):
|
| 872 |
+
# raise ValueError("Response is not a JSON object")
|
| 873 |
|
| 874 |
+
# if "refined_logic" not in parsed:
|
| 875 |
+
# raise ValueError("Missing 'refined_logic' key")
|
| 876 |
|
| 877 |
+
# refined_logic = parsed["refined_logic"]
|
| 878 |
+
# if not isinstance(refined_logic, dict):
|
| 879 |
+
# raise ValueError("'refined_logic' is not an object")
|
| 880 |
|
| 881 |
+
# if "name_variable" not in refined_logic or "pseudocode" not in refined_logic:
|
| 882 |
+
# raise ValueError("Missing required keys in 'refined_logic'")
|
| 883 |
|
| 884 |
+
# print("Successfully parsed and validated JSON")
|
| 885 |
+
# return parsed
|
| 886 |
|
| 887 |
+
# except (json.JSONDecodeError, ValueError) as e:
|
| 888 |
+
# print(f"JSON parsing failed: {e}")
|
| 889 |
|
| 890 |
+
# # Try to extract meaningful data even from malformed JSON using regex
|
| 891 |
+
# try:
|
| 892 |
+
# # Look for name_variable and pseudocode patterns with more flexible matching
|
| 893 |
+
# name_match = re.search(r'"name_variable":\s*["\']([^"\']*)["\']', raw_response)
|
| 894 |
+
# pseudo_match = re.search(r'"pseudocode":\s*["\']([^"\']*)["\']', raw_response)
|
| 895 |
|
| 896 |
+
# if name_match and pseudo_match:
|
| 897 |
+
# print("Extracted data using regex fallback")
|
| 898 |
+
# return {
|
| 899 |
+
# "refined_logic": {
|
| 900 |
+
# "name_variable": name_match.group(1),
|
| 901 |
+
# "pseudocode": pseudo_match.group(1)
|
| 902 |
+
# }
|
| 903 |
+
# }
|
| 904 |
|
| 905 |
+
# # Try to find any valid JSON-like structure in the response
|
| 906 |
+
# # Look for patterns like {'refined_logic': 'pseudocode', 'block_relationships': [...]}
|
| 907 |
+
# alt_match = re.search(r"'name_variable':\s*'([^']*)'.*?'pseudocode':\s*'([^']*)'", raw_response, re.DOTALL)
|
| 908 |
+
# if alt_match:
|
| 909 |
+
# print("Extracted data using alternative pattern")
|
| 910 |
+
# return {
|
| 911 |
+
# "refined_logic": {
|
| 912 |
+
# "name_variable": alt_match.group(1),
|
| 913 |
+
# "pseudocode": alt_match.group(2)
|
| 914 |
+
# }
|
| 915 |
+
# }
|
| 916 |
|
| 917 |
+
# except Exception as regex_error:
|
| 918 |
+
# print(f"Regex extraction also failed: {regex_error}")
|
| 919 |
|
| 920 |
+
# # Return a default structure on parsing failure
|
| 921 |
+
# return {
|
| 922 |
+
# "refined_logic": {
|
| 923 |
+
# "name_variable": "Parse Error",
|
| 924 |
+
# "pseudocode": "Failed to parse response"
|
| 925 |
+
# }
|
| 926 |
+
# }
|
| 927 |
|
| 928 |
+
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 929 |
+
"""
|
| 930 |
+
Finds and parses the first valid JSON object from a raw LLM response string.
|
| 931 |
+
"""
|
| 932 |
+
logger.debug("Attempting to extract JSON from raw LLM response...")
|
| 933 |
|
| 934 |
+
# 1. Look for a JSON markdown block first
|
| 935 |
+
match = re.search(r"```(?:json)?\s*({[\s\S]*?})\s*```", raw_response)
|
| 936 |
+
if match:
|
| 937 |
+
json_string = match.group(1)
|
| 938 |
+
logger.debug("Found JSON inside a markdown block.")
|
| 939 |
+
try:
|
| 940 |
+
return json.loads(json_string)
|
| 941 |
+
except json.JSONDecodeError as e:
|
| 942 |
+
logger.warning(f"Failed to parse JSON from markdown block: {e}")
|
| 943 |
+
# Fall through to the next method if parsing fails
|
| 944 |
+
|
| 945 |
+
# 2. If no block is found (or it failed), find the outermost braces
|
| 946 |
+
logger.debug("Markdown block not found or failed. Searching for outermost braces.")
|
| 947 |
+
try:
|
| 948 |
+
first_brace = raw_response.find('{')
|
| 949 |
+
last_brace = raw_response.rfind('}')
|
| 950 |
+
if first_brace != -1 and last_brace != -1 and first_brace < last_brace:
|
| 951 |
+
json_string = raw_response[first_brace : last_brace + 1]
|
| 952 |
+
return json.loads(json_string)
|
| 953 |
+
else:
|
| 954 |
+
logger.error("Could not find a valid JSON structure (outermost braces).")
|
| 955 |
+
raise json.JSONDecodeError("No valid JSON object found in the response.", raw_response, 0)
|
| 956 |
+
except json.JSONDecodeError as e:
|
| 957 |
+
logger.error(f"Final JSON parsing attempt failed: {e}")
|
| 958 |
+
# Re-raise the exception to be caught by the calling logic (to invoke the corrector agent)
|
| 959 |
+
raise
|
| 960 |
# def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 961 |
# # --- 1) Pull out the JSON code‑block if present ---
|
| 962 |
# md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
|