| from dataclasses import dataclass, field | |
| from typing import Optional | |
| import re | |
| class Citation: | |
| id: int | |
| chunk_id: int | |
| source_url: str | |
| page_title: str | |
| section: str | |
| snippet: str | |
| score: float = 0.0 | |
| def build_context_block(citations: list[Citation]) -> str: | |
| """Format citations as numbered context for the LLM prompt.""" | |
| parts = [] | |
| for c in citations: | |
| header = f"[{c.id}] (Page: {c.page_title} | Section: {c.section} | URL: {c.source_url})" | |
| parts.append(f"{header}\n{c.snippet}") | |
| return "\n\n---\n\n".join(parts) | |
| def extract_inline_refs(text: str) -> set[int]: | |
| """Return the set of citation IDs referenced inline, e.g. [1], [2].""" | |
| return {int(m) for m in re.findall(r"\[(\d+)\]", text)} | |
| def render_citation_markdown(citations: list[Citation], used_ids: Optional[set[int]] = None) -> str: | |
| lines = ["### Sources"] | |
| for c in citations: | |
| if used_ids is not None and c.id not in used_ids: | |
| continue | |
| lines.append(f"{c.id}. [{c.page_title} — {c.section}]({c.source_url})") | |
| return "\n".join(lines) | |