Spaces:
Running
Running
| """ | |
| MediGuard AI RAG-Helper | |
| Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs | |
| """ | |
| from pathlib import Path | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from src.llm_config import llm_config | |
| from src.state import AgentOutput, GuildState | |
| class DiseaseExplainerAgent: | |
| """Agent that retrieves and explains disease mechanisms using RAG""" | |
| def __init__(self, retriever): | |
| """ | |
| Initialize with a retriever for medical PDFs. | |
| Args: | |
| retriever: Vector store retriever for disease documents | |
| """ | |
| self.retriever = retriever | |
| self.llm = llm_config.explainer | |
| def explain(self, state: GuildState) -> GuildState: | |
| """ | |
| Retrieve and explain disease pathophysiology. | |
| Args: | |
| state: Current guild state | |
| Returns: | |
| Updated state with disease explanation | |
| """ | |
| print("\n" + "=" * 70) | |
| print("EXECUTING: Disease Explainer Agent (RAG)") | |
| print("=" * 70) | |
| model_prediction = state["model_prediction"] | |
| disease = model_prediction["disease"] | |
| confidence = model_prediction["confidence"] | |
| # Configure retrieval based on SOP — create a copy to avoid mutating shared retriever | |
| retrieval_k = state["sop"].disease_explainer_k | |
| original_search_kwargs = dict(self.retriever.search_kwargs) | |
| self.retriever.search_kwargs = {**original_search_kwargs, "k": retrieval_k} | |
| # Retrieve relevant documents | |
| print(f"\nRetrieving information about: {disease}") | |
| print(f"Retrieval k={state['sop'].disease_explainer_k}") | |
| query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria, | |
| and clinical presentation. Focus on mechanisms relevant to blood biomarkers.""" | |
| try: | |
| docs = self.retriever.invoke(query) | |
| finally: | |
| # Restore original search_kwargs to avoid side effects | |
| self.retriever.search_kwargs = original_search_kwargs | |
| print(f"Retrieved {len(docs)} relevant document chunks") | |
| if state["sop"].require_pdf_citations and not docs: | |
| explanation = { | |
| "pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.", | |
| "diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.", | |
| "clinical_presentation": "Insufficient evidence available to describe clinical presentation.", | |
| "summary": "Insufficient evidence available for a detailed explanation.", | |
| } | |
| citations = [] | |
| output = AgentOutput( | |
| agent_name="Disease Explainer", | |
| findings={ | |
| "disease": disease, | |
| "pathophysiology": explanation["pathophysiology"], | |
| "diagnostic_criteria": explanation["diagnostic_criteria"], | |
| "clinical_presentation": explanation["clinical_presentation"], | |
| "mechanism_summary": explanation["summary"], | |
| "citations": citations, | |
| "confidence": confidence, | |
| "retrieval_quality": 0, | |
| "citations_missing": True, | |
| }, | |
| ) | |
| print("\nDisease explanation generated") | |
| print(" - Pathophysiology: insufficient evidence") | |
| print(" - Citations: 0 sources") | |
| return {"agent_outputs": [output]} | |
| # Generate explanation | |
| explanation = self._generate_explanation(disease, docs, confidence) | |
| # Extract citations | |
| citations = self._extract_citations(docs) | |
| # Create agent output | |
| output = AgentOutput( | |
| agent_name="Disease Explainer", | |
| findings={ | |
| "disease": disease, | |
| "pathophysiology": explanation["pathophysiology"], | |
| "diagnostic_criteria": explanation["diagnostic_criteria"], | |
| "clinical_presentation": explanation["clinical_presentation"], | |
| "mechanism_summary": explanation["summary"], | |
| "citations": citations, | |
| "confidence": confidence, | |
| "retrieval_quality": len(docs), | |
| "citations_missing": False, | |
| }, | |
| ) | |
| # Update state | |
| print("\nDisease explanation generated") | |
| print(f" - Pathophysiology: {len(explanation['pathophysiology'])} chars") | |
| print(f" - Citations: {len(citations)} sources") | |
| return {"agent_outputs": [output]} | |
| def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict: | |
| """Generate structured disease explanation using LLM and retrieved docs""" | |
| # Format retrieved context | |
| context = "\n\n---\n\n".join( | |
| [f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs] | |
| ) | |
| prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ( | |
| "system", | |
| """You are a medical expert explaining diseases for patient self-assessment. | |
| Based on the provided medical literature, explain the disease in clear, accessible language. | |
| Structure your response with these sections: | |
| 1. PATHOPHYSIOLOGY: The underlying biological mechanisms | |
| 2. DIAGNOSTIC_CRITERIA: How the disease is diagnosed | |
| 3. CLINICAL_PRESENTATION: Common symptoms and signs | |
| 4. SUMMARY: A 2-3 sentence overview | |
| Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers.""", | |
| ), | |
| ( | |
| "human", | |
| """Disease: {disease} | |
| Prediction Confidence: {confidence:.1%} | |
| Medical Literature Context: | |
| {context} | |
| Please provide a structured explanation.""", | |
| ), | |
| ] | |
| ) | |
| chain = prompt | self.llm | |
| try: | |
| response = chain.invoke({"disease": disease, "confidence": confidence, "context": context}) | |
| # Parse structured response | |
| content = response.content | |
| explanation = self._parse_explanation(content) | |
| except Exception as e: | |
| print(f"Warning: LLM explanation generation failed: {e}") | |
| explanation = { | |
| "pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.", | |
| "diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.", | |
| "clinical_presentation": "Clinical presentation varies by individual.", | |
| "summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider.", | |
| } | |
| return explanation | |
| def _parse_explanation(self, content: str) -> dict: | |
| """Parse LLM response into structured sections""" | |
| sections = {"pathophysiology": "", "diagnostic_criteria": "", "clinical_presentation": "", "summary": ""} | |
| # Simple parsing logic | |
| current_section = None | |
| lines = content.split("\n") | |
| for line in lines: | |
| line_upper = line.upper().strip() | |
| if "PATHOPHYSIOLOGY" in line_upper: | |
| current_section = "pathophysiology" | |
| elif "DIAGNOSTIC" in line_upper: | |
| current_section = "diagnostic_criteria" | |
| elif "CLINICAL" in line_upper or "PRESENTATION" in line_upper: | |
| current_section = "clinical_presentation" | |
| elif "SUMMARY" in line_upper: | |
| current_section = "summary" | |
| elif current_section and line.strip(): | |
| sections[current_section] += line + "\n" | |
| # If parsing failed, use full content as summary | |
| if not any(sections.values()): | |
| sections["summary"] = content[:500] | |
| return sections | |
| def _extract_citations(self, docs: list) -> list: | |
| """Extract citations from retrieved documents""" | |
| citations = [] | |
| for doc in docs: | |
| source = doc.metadata.get("source", "Unknown") | |
| page = doc.metadata.get("page", "N/A") | |
| # Clean up source path | |
| if "\\" in source or "/" in source: | |
| source = Path(source).name | |
| citation = f"{source}" | |
| if page != "N/A": | |
| citation += f" (Page {page})" | |
| citations.append(citation) | |
| return citations | |
| def create_disease_explainer_agent(retriever): | |
| """Factory function to create agent with retriever""" | |
| return DiseaseExplainerAgent(retriever) | |