Blu3Orange
feat: Introduce argument direction handling and enhance conviction mechanics for juror interactions
373ff24
"""LlamaIndex case index for semantic search over case documents.
Provides RAG capabilities for smolagents tools to query evidence and case details.
Uses Nebius embeddings via the decoupled embedding service.
"""
from typing import TYPE_CHECKING
from llama_index.core import VectorStoreIndex, Document
from services.embeddings import get_embedding_service
if TYPE_CHECKING:
from case_db.models import CriminalCase
class CaseIndex:
"""Semantic search over case documents for agent tool use.
Creates a vector index from case summary, evidence, and witness testimonies.
Agents can query this index to find relevant information during deliberation.
Uses Nebius embeddings (4096-dim) via the centralized embedding service.
"""
def __init__(self, case: "CriminalCase"):
"""Initialize the case index.
Args:
case: The criminal case to index
"""
self.case = case
# Initialize embedding service (configures LlamaIndex Settings)
self._embedding_service = get_embedding_service()
# Build the index
self.index = self._build_index()
# Use retriever instead of query_engine to avoid redundant LLM calls
# The CodeAgent will reason about raw retrieved docs directly
self.retriever = self.index.as_retriever(similarity_top_k=3)
def _build_index(self) -> VectorStoreIndex:
"""Build vector index from case documents."""
documents = []
# Index case summary
documents.append(Document(
text=self.case.summary,
metadata={
"type": "summary",
"case_id": self.case.case_id
}
))
# Index charges
if self.case.charges:
charges_text = f"Charges: {', '.join(self.case.charges)}"
documents.append(Document(
text=charges_text,
metadata={
"type": "charges",
"case_id": self.case.case_id
}
))
# Index each piece of evidence
for evidence in self.case.evidence:
doc_text = f"Evidence ({evidence.type}): {evidence.description}"
if evidence.contestable and evidence.contest_reason:
doc_text += f" [Contestable: {evidence.contest_reason}]"
documents.append(Document(
text=doc_text,
metadata={
"type": "evidence",
"evidence_type": evidence.type,
"evidence_id": evidence.evidence_id,
"case_id": self.case.case_id,
"strength_prosecution": evidence.strength_prosecution,
"strength_defense": evidence.strength_defense,
}
))
# Index witness testimonies
for witness in self.case.witnesses:
doc_text = (
f"Witness {witness.name} ({witness.role}, {witness.side}): "
f"{witness.testimony_summary}"
)
if witness.credibility_issues:
doc_text += f" [Credibility issues: {', '.join(witness.credibility_issues)}]"
documents.append(Document(
text=doc_text,
metadata={
"type": "witness",
"witness_id": witness.witness_id,
"witness_name": witness.name,
"witness_role": witness.role,
"witness_side": witness.side,
"case_id": self.case.case_id,
}
))
# Index defendant background if available
if self.case.defendant:
defendant_text = f"Defendant: {self.case.defendant.name}"
if self.case.defendant.age:
defendant_text += f", age {self.case.defendant.age}"
if self.case.defendant.occupation:
defendant_text += f", {self.case.defendant.occupation}"
if self.case.defendant.background:
defendant_text += f". Background: {self.case.defendant.background}"
if self.case.defendant.prior_record:
defendant_text += f". Prior record: {', '.join(self.case.defendant.prior_record)}"
documents.append(Document(
text=defendant_text,
metadata={
"type": "defendant",
"case_id": self.case.case_id,
}
))
# Index prosecution arguments
for i, arg in enumerate(self.case.prosecution_arguments):
documents.append(Document(
text=f"Prosecution argument: {arg}",
metadata={
"type": "prosecution_argument",
"argument_index": i,
"case_id": self.case.case_id,
}
))
# Index defense arguments
for i, arg in enumerate(self.case.defense_arguments):
documents.append(Document(
text=f"Defense argument: {arg}",
metadata={
"type": "defense_argument",
"argument_index": i,
"case_id": self.case.case_id,
}
))
return VectorStoreIndex.from_documents(documents)
def query(self, question: str) -> str:
"""Query the case index for relevant information.
Args:
question: Natural language question about the case
Returns:
Relevant case documents (raw text, no LLM synthesis)
"""
nodes = self.retriever.retrieve(question)
if not nodes:
return "No relevant information found."
# Return raw document text for CodeAgent to reason about
return "\n\n".join([node.text for node in nodes])
def query_evidence(self, query: str) -> str:
"""Query specifically for evidence-related information.
Args:
query: What evidence to search for
Returns:
Relevant evidence information
"""
full_query = f"Evidence related to: {query}"
return self.query(full_query)
def query_witnesses(self, query: str) -> str:
"""Query specifically for witness testimony.
Args:
query: What witness information to search for
Returns:
Relevant witness testimony
"""
full_query = f"Witness testimony about: {query}"
return self.query(full_query)
def get_all_evidence_summaries(self) -> list[str]:
"""Get list of all evidence summaries for quick reference."""
return [
f"[{e.evidence_id}] {e.type}: {e.description}"
for e in self.case.evidence
]
def get_all_witness_summaries(self) -> list[str]:
"""Get list of all witness summaries for quick reference."""
return [
f"[{w.witness_id}] {w.name} ({w.role}): {w.testimony_summary[:100]}..."
for w in self.case.witnesses
]
class CaseIndexFactory:
"""Factory for creating and caching case indices."""
_cache: dict[str, CaseIndex] = {}
@classmethod
def get_index(cls, case: "CriminalCase") -> CaseIndex:
"""Get or create a case index.
Caches indices by case_id to avoid rebuilding.
Args:
case: The criminal case to index
Returns:
CaseIndex for the case
"""
if case.case_id not in cls._cache:
cls._cache[case.case_id] = CaseIndex(case)
return cls._cache[case.case_id]
@classmethod
def clear_cache(cls) -> None:
"""Clear the index cache."""
cls._cache.clear()