prthm11 commited on
Commit
c16de1d
·
verified ·
1 Parent(s): 5e4aebd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -99
app.py CHANGED
@@ -806,126 +806,157 @@ stack_opcodes_functionalities = "\n".join([
806
  ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
807
 
808
  # Helper function to extract JSON from LLM response
809
- def extract_json_from_llm_response(raw_response: str) -> dict:
810
- """
811
- Improved JSON extraction with better error handling and validation
812
- """
813
- print(f"Raw LLM response: {raw_response[:200]}...")
814
 
815
- # Try to find JSON in code blocks first
816
- md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
817
- if md:
818
- json_string = md.group(1).strip()
819
- else:
820
- json_string = raw_response.strip()
821
-
822
- # Find the first complete JSON object (handle cases with multiple objects/arrays)
823
- first_brace = json_string.find('{')
824
- if first_brace == -1:
825
- print("No JSON object found in response")
826
- return {
827
- "refined_logic": {
828
- "name_variable": "No Code-blocks",
829
- "pseudocode": "No Code-blocks"
830
- }
831
- }
832
 
833
- # Find the matching closing brace for the first opening brace
834
- brace_count = 0
835
- last_brace = -1
836
- for i, char in enumerate(json_string[first_brace:], first_brace):
837
- if char == '{':
838
- brace_count += 1
839
- elif char == '}':
840
- brace_count -= 1
841
- if brace_count == 0:
842
- last_brace = i
843
- break
844
 
845
- if last_brace == -1:
846
- print("No matching closing brace found")
847
- return {
848
- "refined_logic": {
849
- "name_variable": "Parse Error",
850
- "pseudocode": "Malformed JSON"
851
- }
852
- }
853
 
854
- json_string = json_string[first_brace:last_brace+1]
855
 
856
- # Simple cleanup - just handle the most common issues
857
- # 1. Remove trailing commas
858
- json_string = re.sub(r',\s*}', '}', json_string)
859
- json_string = re.sub(r',\s*]', ']', json_string)
860
 
861
- # 2. Fix single quotes around simple values (not containing quotes)
862
- json_string = re.sub(r"'([^'\"]*)'(\s*:)", r'"\1"\2', json_string) # Keys
863
- json_string = re.sub(r"(:\s*)'([^'\"]*)'(\s*[,}])", r'\1"\2"\3', json_string) # Simple values
864
 
865
- print(f"Cleaned JSON string: {json_string[:200]}...")
866
 
867
- try:
868
- parsed = json.loads(json_string)
869
 
870
- # Validate the expected structure
871
- if not isinstance(parsed, dict):
872
- raise ValueError("Response is not a JSON object")
873
 
874
- if "refined_logic" not in parsed:
875
- raise ValueError("Missing 'refined_logic' key")
876
 
877
- refined_logic = parsed["refined_logic"]
878
- if not isinstance(refined_logic, dict):
879
- raise ValueError("'refined_logic' is not an object")
880
 
881
- if "name_variable" not in refined_logic or "pseudocode" not in refined_logic:
882
- raise ValueError("Missing required keys in 'refined_logic'")
883
 
884
- print("Successfully parsed and validated JSON")
885
- return parsed
886
 
887
- except (json.JSONDecodeError, ValueError) as e:
888
- print(f"JSON parsing failed: {e}")
889
 
890
- # Try to extract meaningful data even from malformed JSON using regex
891
- try:
892
- # Look for name_variable and pseudocode patterns with more flexible matching
893
- name_match = re.search(r'"name_variable":\s*["\']([^"\']*)["\']', raw_response)
894
- pseudo_match = re.search(r'"pseudocode":\s*["\']([^"\']*)["\']', raw_response)
895
 
896
- if name_match and pseudo_match:
897
- print("Extracted data using regex fallback")
898
- return {
899
- "refined_logic": {
900
- "name_variable": name_match.group(1),
901
- "pseudocode": pseudo_match.group(1)
902
- }
903
- }
904
 
905
- # Try to find any valid JSON-like structure in the response
906
- # Look for patterns like {'refined_logic': 'pseudocode', 'block_relationships': [...]}
907
- alt_match = re.search(r"'name_variable':\s*'([^']*)'.*?'pseudocode':\s*'([^']*)'", raw_response, re.DOTALL)
908
- if alt_match:
909
- print("Extracted data using alternative pattern")
910
- return {
911
- "refined_logic": {
912
- "name_variable": alt_match.group(1),
913
- "pseudocode": alt_match.group(2)
914
- }
915
- }
916
 
917
- except Exception as regex_error:
918
- print(f"Regex extraction also failed: {regex_error}")
919
 
920
- # Return a default structure on parsing failure
921
- return {
922
- "refined_logic": {
923
- "name_variable": "Parse Error",
924
- "pseudocode": "Failed to parse response"
925
- }
926
- }
927
 
 
 
 
 
 
928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929
  # def extract_json_from_llm_response(raw_response: str) -> dict:
930
  # # --- 1) Pull out the JSON code‑block if present ---
931
  # md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
 
806
  ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
807
 
808
  # Helper function to extract JSON from LLM response
809
+ # def extract_json_from_llm_response(raw_response: str) -> dict:
810
+ # """
811
+ # Improved JSON extraction with better error handling and validation
812
+ # """
813
+ # print(f"Raw LLM response: {raw_response[:200]}...")
814
 
815
+ # # Try to find JSON in code blocks first
816
+ # md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
817
+ # if md:
818
+ # json_string = md.group(1).strip()
819
+ # else:
820
+ # json_string = raw_response.strip()
821
+
822
+ # # Find the first complete JSON object (handle cases with multiple objects/arrays)
823
+ # first_brace = json_string.find('{')
824
+ # if first_brace == -1:
825
+ # print("No JSON object found in response")
826
+ # return {
827
+ # "refined_logic": {
828
+ # "name_variable": "No Code-blocks",
829
+ # "pseudocode": "No Code-blocks"
830
+ # }
831
+ # }
832
 
833
+ # # Find the matching closing brace for the first opening brace
834
+ # brace_count = 0
835
+ # last_brace = -1
836
+ # for i, char in enumerate(json_string[first_brace:], first_brace):
837
+ # if char == '{':
838
+ # brace_count += 1
839
+ # elif char == '}':
840
+ # brace_count -= 1
841
+ # if brace_count == 0:
842
+ # last_brace = i
843
+ # break
844
 
845
+ # if last_brace == -1:
846
+ # print("No matching closing brace found")
847
+ # return {
848
+ # "refined_logic": {
849
+ # "name_variable": "Parse Error",
850
+ # "pseudocode": "Malformed JSON"
851
+ # }
852
+ # }
853
 
854
+ # json_string = json_string[first_brace:last_brace+1]
855
 
856
+ # # Simple cleanup - just handle the most common issues
857
+ # # 1. Remove trailing commas
858
+ # json_string = re.sub(r',\s*}', '}', json_string)
859
+ # json_string = re.sub(r',\s*]', ']', json_string)
860
 
861
+ # # 2. Fix single quotes around simple values (not containing quotes)
862
+ # json_string = re.sub(r"'([^'\"]*)'(\s*:)", r'"\1"\2', json_string) # Keys
863
+ # json_string = re.sub(r"(:\s*)'([^'\"]*)'(\s*[,}])", r'\1"\2"\3', json_string) # Simple values
864
 
865
+ # print(f"Cleaned JSON string: {json_string[:200]}...")
866
 
867
+ # try:
868
+ # parsed = json.loads(json_string)
869
 
870
+ # # Validate the expected structure
871
+ # if not isinstance(parsed, dict):
872
+ # raise ValueError("Response is not a JSON object")
873
 
874
+ # if "refined_logic" not in parsed:
875
+ # raise ValueError("Missing 'refined_logic' key")
876
 
877
+ # refined_logic = parsed["refined_logic"]
878
+ # if not isinstance(refined_logic, dict):
879
+ # raise ValueError("'refined_logic' is not an object")
880
 
881
+ # if "name_variable" not in refined_logic or "pseudocode" not in refined_logic:
882
+ # raise ValueError("Missing required keys in 'refined_logic'")
883
 
884
+ # print("Successfully parsed and validated JSON")
885
+ # return parsed
886
 
887
+ # except (json.JSONDecodeError, ValueError) as e:
888
+ # print(f"JSON parsing failed: {e}")
889
 
890
+ # # Try to extract meaningful data even from malformed JSON using regex
891
+ # try:
892
+ # # Look for name_variable and pseudocode patterns with more flexible matching
893
+ # name_match = re.search(r'"name_variable":\s*["\']([^"\']*)["\']', raw_response)
894
+ # pseudo_match = re.search(r'"pseudocode":\s*["\']([^"\']*)["\']', raw_response)
895
 
896
+ # if name_match and pseudo_match:
897
+ # print("Extracted data using regex fallback")
898
+ # return {
899
+ # "refined_logic": {
900
+ # "name_variable": name_match.group(1),
901
+ # "pseudocode": pseudo_match.group(1)
902
+ # }
903
+ # }
904
 
905
+ # # Try to find any valid JSON-like structure in the response
906
+ # # Look for patterns like {'refined_logic': 'pseudocode', 'block_relationships': [...]}
907
+ # alt_match = re.search(r"'name_variable':\s*'([^']*)'.*?'pseudocode':\s*'([^']*)'", raw_response, re.DOTALL)
908
+ # if alt_match:
909
+ # print("Extracted data using alternative pattern")
910
+ # return {
911
+ # "refined_logic": {
912
+ # "name_variable": alt_match.group(1),
913
+ # "pseudocode": alt_match.group(2)
914
+ # }
915
+ # }
916
 
917
+ # except Exception as regex_error:
918
+ # print(f"Regex extraction also failed: {regex_error}")
919
 
920
+ # # Return a default structure on parsing failure
921
+ # return {
922
+ # "refined_logic": {
923
+ # "name_variable": "Parse Error",
924
+ # "pseudocode": "Failed to parse response"
925
+ # }
926
+ # }
927
 
928
+ def extract_json_from_llm_response(raw_response: str) -> dict:
929
+ """
930
+ Finds and parses the first valid JSON object from a raw LLM response string.
931
+ """
932
+ logger.debug("Attempting to extract JSON from raw LLM response...")
933
 
934
+ # 1. Look for a JSON markdown block first
935
+ match = re.search(r"```(?:json)?\s*({[\s\S]*?})\s*```", raw_response)
936
+ if match:
937
+ json_string = match.group(1)
938
+ logger.debug("Found JSON inside a markdown block.")
939
+ try:
940
+ return json.loads(json_string)
941
+ except json.JSONDecodeError as e:
942
+ logger.warning(f"Failed to parse JSON from markdown block: {e}")
943
+ # Fall through to the next method if parsing fails
944
+
945
+ # 2. If no block is found (or it failed), find the outermost braces
946
+ logger.debug("Markdown block not found or failed. Searching for outermost braces.")
947
+ try:
948
+ first_brace = raw_response.find('{')
949
+ last_brace = raw_response.rfind('}')
950
+ if first_brace != -1 and last_brace != -1 and first_brace < last_brace:
951
+ json_string = raw_response[first_brace : last_brace + 1]
952
+ return json.loads(json_string)
953
+ else:
954
+ logger.error("Could not find a valid JSON structure (outermost braces).")
955
+ raise json.JSONDecodeError("No valid JSON object found in the response.", raw_response, 0)
956
+ except json.JSONDecodeError as e:
957
+ logger.error(f"Final JSON parsing attempt failed: {e}")
958
+ # Re-raise the exception to be caught by the calling logic (to invoke the corrector agent)
959
+ raise
960
  # def extract_json_from_llm_response(raw_response: str) -> dict:
961
  # # --- 1) Pull out the JSON code‑block if present ---
962
  # md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)