Rohan03
/

purpose-agent

@@ -1,13 +1,13 @@
 """
-robust_parser.py — Universal LLM output parser that never requires JSON.
-The problem: LLMs are unreliable at producing valid JSON. Different models
-format differently. Structured output (json_schema) isn't supported everywhere.
-The solution: Parse whatever the LLM gives you. Extract fields by multiple
-strategies, fall back gracefully, and always return something usable.
-This replaces the fragile generate_structured → json.loads → crash pattern.
 """
 from __future__ import annotations
@@ -19,6 +19,90 @@ from typing import Any
 logger = logging.getLogger(__name__)
 def extract_json(text: str) -> dict[str, Any] | None:
     """
     Try to extract a JSON object from arbitrary LLM text.
@@ -64,6 +148,7 @@ def extract_field(text: str, field_name: str, default: str = "") -> str:
     Extract a named field value from LLM text, regardless of format.
     Handles:
     - JSON: {"field": "value"}
     - Markdown: **field:** value / field: value
     - Labeled: FIELD: value
@@ -72,12 +157,12 @@ def extract_field(text: str, field_name: str, default: str = "") -> str:
     text_lower = text.lower()
     name_lower = field_name.lower()
-    # Try JSON first
-    obj = extract_json(text)
     if obj and field_name in obj:
         return str(obj[field_name])
-    # Pattern: "field_name": "value" or field_name: value
     patterns = [
         rf'"{field_name}"\s*:\s*"((?:[^"\\]|\\.)*)"',          # JSON string
         rf'"{field_name}"\s*:\s*(\d+\.?\d*)',                   # JSON number
@@ -168,10 +253,10 @@ def extract_code(text: str) -> str:
 def parse_actor_response(text: str) -> dict[str, Any]:
     """
     Parse an actor's response into thought/action/expected_delta.
-    Works with any format the LLM produces.
     """
-    # Try JSON first (best case)
-    obj = extract_json(text)
     if obj and ("action" in obj or "thought" in obj):
         return obj
@@ -209,10 +294,10 @@ def parse_actor_response(text: str) -> dict[str, Any]:
 def parse_critic_response(text: str) -> dict[str, Any]:
     """
     Parse a critic's response into phi_before/phi_after/reasoning/evidence/confidence.
-    Works with any format.
     """
-    # Try JSON first
-    obj = extract_json(text)
     if obj and ("phi_before" in obj or "phi_after" in obj):
         return {
             "phi_before": float(obj.get("phi_before", 0)),

 """
+robust_parser.py — Universal LLM output parser. Handles JSON, TOML, or free text.
+TOML is the preferred output format because:
+  - ~40% fewer tokens than JSON (no quotes on keys, no braces, no commas)
+  - More natural for LLMs to generate (looks like config files)
+  - Python 3.11+ has tomllib in stdlib; we include a minimal fallback parser
+The parser tries in order: TOML → JSON → field extraction → regex fallback.
+Always returns something usable. Never crashes.
 """
 from __future__ import annotations
 logger = logging.getLogger(__name__)
+def _parse_toml_minimal(text: str) -> dict[str, Any] | None:
+    """
+    Minimal TOML parser for LLM output. Handles the subset LLMs actually produce:
+      key = "value"
+      key = number
+      [section]
+      key = "value"
+    Uses stdlib tomllib (Python 3.11+) with fallback regex parser.
+    """
+    try:
+        import tomllib
+        return tomllib.loads(text)
+    except ImportError:
+        pass
+    except Exception:
+        pass
+    # Fallback: regex-based TOML subset parser
+    result = {}
+    current_section = result
+    section_path = []
+    for line in text.split('\n'):
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        # Section header: [section] or [section.subsection]
+        sec_match = re.match(r'^\[([^\]]+)\]$', line)
+        if sec_match:
+            parts = sec_match.group(1).split('.')
+            current_section = result
+            for part in parts:
+                part = part.strip()
+                if part not in current_section:
+                    current_section[part] = {}
+                current_section = current_section[part]
+            section_path = parts
+            continue
+        # Key = value
+        kv_match = re.match(r'^(\w+)\s*=\s*(.+)$', line)
+        if kv_match:
+            key = kv_match.group(1).strip()
+            val = kv_match.group(2).strip()
+            # Parse value type
+            if val.startswith('"') and val.endswith('"'):
+                val = val[1:-1].replace('\\n', '\n').replace('\\"', '"')
+            elif val.startswith("'") and val.endswith("'"):
+                val = val[1:-1]
+            elif val.lower() in ('true', 'false'):
+                val = val.lower() == 'true'
+            else:
+                try:
+                    val = float(val) if '.' in val else int(val)
+                except ValueError:
+                    pass  # Keep as string
+            current_section[key] = val
+    return result if result else None
+def extract_structured(text: str) -> dict[str, Any] | None:
+    """
+    Try to extract structured data from LLM text.
+    Order: TOML → JSON → None.
+    """
+    text = text.strip()
+    # Try TOML first (preferred — fewer tokens, more natural)
+    # Look for TOML-like content (key = value patterns)
+    if re.search(r'^\w+\s*=\s*', text, re.MULTILINE):
+        # Extract TOML block if in code fence
+        toml_match = re.search(r'```(?:toml)?\s*\n(.*?)```', text, re.DOTALL)
+        toml_text = toml_match.group(1) if toml_match else text
+        result = _parse_toml_minimal(toml_text)
+        if result:
+            return result
+    # Try JSON
+    return extract_json(text)
 def extract_json(text: str) -> dict[str, Any] | None:
     """
     Try to extract a JSON object from arbitrary LLM text.
     Extract a named field value from LLM text, regardless of format.
     Handles:
+    - TOML: field = "value"
     - JSON: {"field": "value"}
     - Markdown: **field:** value / field: value
     - Labeled: FIELD: value
     text_lower = text.lower()
     name_lower = field_name.lower()
+    # Try structured parse first (TOML → JSON)
+    obj = extract_structured(text)
     if obj and field_name in obj:
         return str(obj[field_name])
+    # Pattern: "field_name": "value" or field_name = value or field_name: value
     patterns = [
         rf'"{field_name}"\s*:\s*"((?:[^"\\]|\\.)*)"',          # JSON string
         rf'"{field_name}"\s*:\s*(\d+\.?\d*)',                   # JSON number
 def parse_actor_response(text: str) -> dict[str, Any]:
     """
     Parse an actor's response into thought/action/expected_delta.
+    Works with TOML, JSON, or free text.
     """
+    # Try structured parse (TOML → JSON)
+    obj = extract_structured(text)
     if obj and ("action" in obj or "thought" in obj):
         return obj
 def parse_critic_response(text: str) -> dict[str, Any]:
     """
     Parse a critic's response into phi_before/phi_after/reasoning/evidence/confidence.
+    Works with TOML, JSON, or free text.
     """
+    # Try structured parse (TOML → JSON)
+    obj = extract_structured(text)
     if obj and ("phi_before" in obj or "phi_after" in obj):
         return {
             "phi_before": float(obj.get("phi_before", 0)),