Spaces:

Undrick
/

NLP_Lab

Running

NLP_Lab / src /citations.py

apytel

Redesigns UI for FreeCAD RAG Python script generator

11ba2bd 27 days ago

1.11 kB

	from dataclasses import dataclass, field
	from typing import Optional
	import re


	@dataclass
	class Citation:
	id: int
	chunk_id: int
	source_url: str
	page_title: str
	section: str
	snippet: str
	score: float = 0.0


	def build_context_block(citations: list[Citation]) -> str:
	"""Format citations as numbered context for the LLM prompt."""
	parts = []
	for c in citations:
	header = f"[{c.id}] (Page: {c.page_title} \| Section: {c.section} \| URL: {c.source_url})"
	parts.append(f"{header}\n{c.snippet}")
	return "\n\n---\n\n".join(parts)


	def extract_inline_refs(text: str) -> set[int]:
	"""Return the set of citation IDs referenced inline, e.g. [1], [2]."""
	return {int(m) for m in re.findall(r"\[(\d+)\]", text)}


	def render_citation_markdown(citations: list[Citation], used_ids: Optional[set[int]] = None) -> str:
	lines = ["### Sources"]
	for c in citations:
	if used_ids is not None and c.id not in used_ids:
	continue
	lines.append(f"{c.id}. [{c.page_title} — {c.section}]({c.source_url})")
	return "\n".join(lines)