Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import re | |
| from typing import Callable | |
| from ..models.spans import SpanDescriptor | |
| def _strip_fences(text: str) -> str: | |
| """Remove markdown code fences, even if preceded by explanatory text.""" | |
| text = text.strip() | |
| m = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", text, re.DOTALL) | |
| if m: | |
| return m.group(1).strip() | |
| return text | |
| def _parse_json_list(text: str) -> list[dict] | None: | |
| """Parse text as a JSON list; return None on failure.""" | |
| try: | |
| result = json.loads(text) | |
| return result if isinstance(result, list) else None | |
| except json.JSONDecodeError: | |
| return None | |
| def _dicts_to_spans(raw: list[dict]) -> list[SpanDescriptor]: | |
| spans: list[SpanDescriptor] = [] | |
| for item in raw: | |
| if not isinstance(item, dict): | |
| continue | |
| element = item.get("element", "") | |
| text = item.get("text", "") | |
| context = item.get("context", "") | |
| attrs = item.get("attrs", {}) | |
| if not (element and text and context): | |
| continue | |
| spans.append( | |
| SpanDescriptor( | |
| element=element, | |
| text=text, | |
| context=context, | |
| attrs=attrs if isinstance(attrs, dict) else {}, | |
| ) | |
| ) | |
| return spans | |
| def parse_response( | |
| response: str, | |
| call_fn: Callable[[str], str] | None = None, | |
| make_correction_prompt: Callable[[str, str], str] | None = None, | |
| ) -> list[SpanDescriptor]: | |
| """ | |
| Parse an LLM response string into a list of SpanDescriptors. | |
| - Strips markdown code fences automatically. | |
| - If parsing fails and *call_fn* + *make_correction_prompt* are provided, | |
| retries once with a self-correction prompt that includes the bad response. | |
| - Raises ValueError if parsing fails after the retry (or if no retry is configured). | |
| """ | |
| cleaned = _strip_fences(response) | |
| raw = _parse_json_list(cleaned) | |
| if raw is not None: | |
| return _dicts_to_spans(raw) | |
| if call_fn is None or make_correction_prompt is None: | |
| raise ValueError(f"Failed to parse JSON from response: {response[:300]!r}") | |
| error_msg = "Response is not valid JSON" | |
| correction_prompt = make_correction_prompt(response, error_msg) | |
| retry_response = call_fn(correction_prompt) | |
| retry_cleaned = _strip_fences(retry_response) | |
| raw = _parse_json_list(retry_cleaned) | |
| if raw is None: | |
| raise ValueError(f"Failed to parse JSON after retry: {retry_response[:300]!r}") | |
| return _dicts_to_spans(raw) | |