finagent / app /utils /json_parser.py
omgy's picture
Create json_parser.py
61f8894 verified
"""
JSON parsing utilities for handling malformed LLM outputs.
Provides robust parsing with fallback strategies.
"""
import json
import re
from typing import Any, Dict, Optional
def parse_llm_json(text: str) -> Optional[Dict[str, Any]]:
"""
Parse JSON from LLM output with multiple fallback strategies.
Handles common issues:
- Extra text before/after JSON
- Line breaks within JSON
- Single quotes instead of double quotes
- Trailing commas
Args:
text: Raw text from LLM that should contain JSON
Returns:
Parsed JSON dictionary or None if parsing fails
"""
if not text or not isinstance(text, str):
return None
# Strategy 1: Try direct parsing
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Strategy 2: Extract JSON from text (look for {...} or [...])
try:
# Find first { and last }
start = text.find("{")
end = text.rfind("}")
if start != -1 and end != -1 and end > start:
json_str = text[start : end + 1]
return json.loads(json_str)
except json.JSONDecodeError:
pass
# Strategy 3: Try to extract JSON array
try:
start = text.find("[")
end = text.rfind("]")
if start != -1 and end != -1 and end > start:
json_str = text[start : end + 1]
return json.loads(json_str)
except json.JSONDecodeError:
pass
# Strategy 4: Fix common issues and retry
try:
# Remove line breaks within JSON
cleaned = re.sub(r"\n\s*", " ", text)
# Extract JSON portion
start = cleaned.find("{")
end = cleaned.rfind("}")
if start != -1 and end != -1 and end > start:
json_str = cleaned[start : end + 1]
# Replace single quotes with double quotes (carefully)
# This is a simple heuristic and may not work for all cases
json_str = json_str.replace("'", '"')
# Remove trailing commas before } or ]
json_str = re.sub(r",(\s*[}\]])", r"\1", json_str)
return json.loads(json_str)
except (json.JSONDecodeError, Exception):
pass
# Strategy 5: Try to parse as key-value pairs using regex
try:
# Look for key: value patterns
pattern = r'"?(\w+)"?\s*:\s*"?([^",}\]]+)"?'
matches = re.findall(pattern, text)
if matches:
result = {}
for key, value in matches:
# Try to parse value as number if possible
try:
if "." in value:
result[key] = float(value)
else:
result[key] = int(value)
except ValueError:
result[key] = value.strip()
if result:
return result
except Exception:
pass
return None
def parse_tool_input(input_str: str) -> Dict[str, Any]:
"""
Parse tool input from LLM, handling both string and JSON inputs.
Args:
input_str: Input string from LLM (may be JSON or plain string)
Returns:
Dictionary with parsed values
"""
# If it's already a dict, return it
if isinstance(input_str, dict):
return input_str
# Try to parse as JSON
parsed = parse_llm_json(input_str)
if parsed:
return parsed
# If it's a simple string that might be a user_id, wrap it
if isinstance(input_str, str):
input_str = input_str.strip().strip('"').strip("'")
# Check if it looks like JSON but failed to parse
if "{" in input_str or "[" in input_str:
# Return empty dict to signal parsing failure
return {}
# If it's a simple value, treat it as user_id
if not any(char in input_str for char in ["{", "}", "[", "]", ":"]):
return {"user_id": input_str}
return {}
def extract_json_value(text: str, key: str, default: Any = None) -> Any:
"""
Extract a specific value from JSON text without full parsing.
Args:
text: Text containing JSON
key: Key to extract
default: Default value if key not found
Returns:
Extracted value or default
"""
try:
parsed = parse_llm_json(text)
if parsed and isinstance(parsed, dict):
return parsed.get(key, default)
except Exception:
pass
# Try regex extraction as fallback
try:
pattern = rf'"{key}"\s*:\s*"?([^",}}\]]+)"?'
match = re.search(pattern, text)
if match:
value = match.group(1).strip()
# Try to convert to number
try:
if "." in value:
return float(value)
return int(value)
except ValueError:
return value
except Exception:
pass
return default