NLP_Lab / src /citations.py
apytel
Redesigns UI for FreeCAD RAG Python script generator
11ba2bd
from dataclasses import dataclass, field
from typing import Optional
import re
@dataclass
class Citation:
id: int
chunk_id: int
source_url: str
page_title: str
section: str
snippet: str
score: float = 0.0
def build_context_block(citations: list[Citation]) -> str:
"""Format citations as numbered context for the LLM prompt."""
parts = []
for c in citations:
header = f"[{c.id}] (Page: {c.page_title} | Section: {c.section} | URL: {c.source_url})"
parts.append(f"{header}\n{c.snippet}")
return "\n\n---\n\n".join(parts)
def extract_inline_refs(text: str) -> set[int]:
"""Return the set of citation IDs referenced inline, e.g. [1], [2]."""
return {int(m) for m in re.findall(r"\[(\d+)\]", text)}
def render_citation_markdown(citations: list[Citation], used_ids: Optional[set[int]] = None) -> str:
lines = ["### Sources"]
for c in citations:
if used_ids is not None and c.id not in used_ids:
continue
lines.append(f"{c.id}. [{c.page_title}{c.section}]({c.source_url})")
return "\n".join(lines)