Agentic-RagBot / src /agents /disease_explainer.py
T0X1N's picture
chore: codebase audit and fixes (ruff, mypy, pytest)
9659593
"""
MediGuard AI RAG-Helper
Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs
"""
from pathlib import Path
from langchain_core.prompts import ChatPromptTemplate
from src.llm_config import llm_config
from src.state import AgentOutput, GuildState
class DiseaseExplainerAgent:
"""Agent that retrieves and explains disease mechanisms using RAG"""
def __init__(self, retriever):
"""
Initialize with a retriever for medical PDFs.
Args:
retriever: Vector store retriever for disease documents
"""
self.retriever = retriever
self.llm = llm_config.explainer
def explain(self, state: GuildState) -> GuildState:
"""
Retrieve and explain disease pathophysiology.
Args:
state: Current guild state
Returns:
Updated state with disease explanation
"""
print("\n" + "=" * 70)
print("EXECUTING: Disease Explainer Agent (RAG)")
print("=" * 70)
model_prediction = state["model_prediction"]
disease = model_prediction["disease"]
confidence = model_prediction["confidence"]
# Configure retrieval based on SOP — create a copy to avoid mutating shared retriever
retrieval_k = state["sop"].disease_explainer_k
original_search_kwargs = dict(self.retriever.search_kwargs)
self.retriever.search_kwargs = {**original_search_kwargs, "k": retrieval_k}
# Retrieve relevant documents
print(f"\nRetrieving information about: {disease}")
print(f"Retrieval k={state['sop'].disease_explainer_k}")
query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria,
and clinical presentation. Focus on mechanisms relevant to blood biomarkers."""
try:
docs = self.retriever.invoke(query)
finally:
# Restore original search_kwargs to avoid side effects
self.retriever.search_kwargs = original_search_kwargs
print(f"Retrieved {len(docs)} relevant document chunks")
if state["sop"].require_pdf_citations and not docs:
explanation = {
"pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.",
"diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.",
"clinical_presentation": "Insufficient evidence available to describe clinical presentation.",
"summary": "Insufficient evidence available for a detailed explanation.",
}
citations = []
output = AgentOutput(
agent_name="Disease Explainer",
findings={
"disease": disease,
"pathophysiology": explanation["pathophysiology"],
"diagnostic_criteria": explanation["diagnostic_criteria"],
"clinical_presentation": explanation["clinical_presentation"],
"mechanism_summary": explanation["summary"],
"citations": citations,
"confidence": confidence,
"retrieval_quality": 0,
"citations_missing": True,
},
)
print("\nDisease explanation generated")
print(" - Pathophysiology: insufficient evidence")
print(" - Citations: 0 sources")
return {"agent_outputs": [output]}
# Generate explanation
explanation = self._generate_explanation(disease, docs, confidence)
# Extract citations
citations = self._extract_citations(docs)
# Create agent output
output = AgentOutput(
agent_name="Disease Explainer",
findings={
"disease": disease,
"pathophysiology": explanation["pathophysiology"],
"diagnostic_criteria": explanation["diagnostic_criteria"],
"clinical_presentation": explanation["clinical_presentation"],
"mechanism_summary": explanation["summary"],
"citations": citations,
"confidence": confidence,
"retrieval_quality": len(docs),
"citations_missing": False,
},
)
# Update state
print("\nDisease explanation generated")
print(f" - Pathophysiology: {len(explanation['pathophysiology'])} chars")
print(f" - Citations: {len(citations)} sources")
return {"agent_outputs": [output]}
def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict:
"""Generate structured disease explanation using LLM and retrieved docs"""
# Format retrieved context
context = "\n\n---\n\n".join(
[f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs]
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a medical expert explaining diseases for patient self-assessment.
Based on the provided medical literature, explain the disease in clear, accessible language.
Structure your response with these sections:
1. PATHOPHYSIOLOGY: The underlying biological mechanisms
2. DIAGNOSTIC_CRITERIA: How the disease is diagnosed
3. CLINICAL_PRESENTATION: Common symptoms and signs
4. SUMMARY: A 2-3 sentence overview
Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers.""",
),
(
"human",
"""Disease: {disease}
Prediction Confidence: {confidence:.1%}
Medical Literature Context:
{context}
Please provide a structured explanation.""",
),
]
)
chain = prompt | self.llm
try:
response = chain.invoke({"disease": disease, "confidence": confidence, "context": context})
# Parse structured response
content = response.content
explanation = self._parse_explanation(content)
except Exception as e:
print(f"Warning: LLM explanation generation failed: {e}")
explanation = {
"pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.",
"diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.",
"clinical_presentation": "Clinical presentation varies by individual.",
"summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider.",
}
return explanation
def _parse_explanation(self, content: str) -> dict:
"""Parse LLM response into structured sections"""
sections = {"pathophysiology": "", "diagnostic_criteria": "", "clinical_presentation": "", "summary": ""}
# Simple parsing logic
current_section = None
lines = content.split("\n")
for line in lines:
line_upper = line.upper().strip()
if "PATHOPHYSIOLOGY" in line_upper:
current_section = "pathophysiology"
elif "DIAGNOSTIC" in line_upper:
current_section = "diagnostic_criteria"
elif "CLINICAL" in line_upper or "PRESENTATION" in line_upper:
current_section = "clinical_presentation"
elif "SUMMARY" in line_upper:
current_section = "summary"
elif current_section and line.strip():
sections[current_section] += line + "\n"
# If parsing failed, use full content as summary
if not any(sections.values()):
sections["summary"] = content[:500]
return sections
def _extract_citations(self, docs: list) -> list:
"""Extract citations from retrieved documents"""
citations = []
for doc in docs:
source = doc.metadata.get("source", "Unknown")
page = doc.metadata.get("page", "N/A")
# Clean up source path
if "\\" in source or "/" in source:
source = Path(source).name
citation = f"{source}"
if page != "N/A":
citation += f" (Page {page})"
citations.append(citation)
return citations
def create_disease_explainer_agent(retriever):
"""Factory function to create agent with retriever"""
return DiseaseExplainerAgent(retriever)