File size: 1,112 Bytes
11ba2bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from dataclasses import dataclass, field
from typing import Optional
import re


@dataclass
class Citation:
    id: int
    chunk_id: int
    source_url: str
    page_title: str
    section: str
    snippet: str
    score: float = 0.0


def build_context_block(citations: list[Citation]) -> str:
    """Format citations as numbered context for the LLM prompt."""
    parts = []
    for c in citations:
        header = f"[{c.id}] (Page: {c.page_title} | Section: {c.section} | URL: {c.source_url})"
        parts.append(f"{header}\n{c.snippet}")
    return "\n\n---\n\n".join(parts)


def extract_inline_refs(text: str) -> set[int]:
    """Return the set of citation IDs referenced inline, e.g. [1], [2]."""
    return {int(m) for m in re.findall(r"\[(\d+)\]", text)}


def render_citation_markdown(citations: list[Citation], used_ids: Optional[set[int]] = None) -> str:
    lines = ["### Sources"]
    for c in citations:
        if used_ids is not None and c.id not in used_ids:
            continue
        lines.append(f"{c.id}. [{c.page_title}{c.section}]({c.source_url})")
    return "\n".join(lines)