Risk-Adjustment-Version1 / ComorbidityCheckerAgent.py
sujataprakashdatycs's picture
Update ComorbidityCheckerAgent.py
a7fab2a verified
# comorbidity_checker.py
import json
from typing import List, Dict
from json_repair import repair_json
from crewai import Agent, Task, Crew, Process
from crewai_tools import tool, SerperDevTool
from langchain_openai import ChatOpenAI
from embedding_manager import DirectoryEmbeddingManager
class ComorbidityCheckerAgent:
"""
Two-step flow:
1) Identify clinically significant comorbidities for the primary diagnosis (HCC-aware).
2) Verify each comorbidity against the patient chart embeddings (top-15).
"""
def __init__(self, pdf_dir_or_file: str, hcc_code: str, model_version: str, model: str = "gpt-4o"):
self.embed_manager = DirectoryEmbeddingManager(pdf_dir_or_file)
self.llm = ChatOpenAI(model=model, temperature=0)
self.hcc_code = hcc_code.strip()
self.model_version = model_version.strip().upper()
self.search_tool = SerperDevTool() # available if you want to expand later
@tool("patient_chart_search")
def patient_chart_search(query: str) -> str:
"""
Query persistent patient-chart embeddings.
Returns the top-15 results concatenated with separators.
"""
print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
vectordb = self.embed_manager.get_or_create_embeddings()
results = vectordb.similarity_search(query, k=15)
return "\n---\n".join([res.page_content for res in results])
self.patient_chart_search = patient_chart_search
self.agent = Agent(
role="Clinical Coding and Comorbidity Analyst",
goal=(
"Identify clinically significant comorbidities for a primary diagnosis relevant to HCC; "
"verify presence in the patient's chart with embeddings."
),
backstory=(
"Expert risk-adjustment analyst who cross-references guidelines with chart evidence."
),
tools=[self.patient_chart_search],
verbose=True,
memory=False,
llm=self.llm,
)
def _check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
primary_diagnosis = diagnosis_entry["diagnosis"]
final_result: Dict = {"diagnosis": primary_diagnosis, "comorbidities": []}
# Task 1 — Identify comorbidities
identify_task = Task(
description=(
f"For primary diagnosis '{primary_diagnosis}', list common and clinically meaningful comorbidities "
f"that matter for HCC {self.hcc_code} in {self.model_version}."
"Return STRICT JSON: {\"potential_comorbidities\": [\"...\"]}"
),
expected_output="Strict JSON with key potential_comorbidities (list of strings).",
agent=self.agent,
json_mode=True
)
crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
identified = crew.kickoff()
try:
comorbidities = json.loads(repair_json(identified)).get("potential_comorbidities", [])
except Exception:
comorbidities = []
if not comorbidities:
return final_result
# Task 2 — Verify each comorbidity via patient_chart_search
verify_task = Task(
description=(
f"Primary diagnosis: '{primary_diagnosis}'. Potential comorbidities: {comorbidities}.\n"
"For EACH comorbidity, call the patient_chart_search tool (top-15). "
"Decide presence/absence using ONLY returned snippets.\n\n"
"Return STRICT JSON:\n"
"{ \"comorbidity_analysis\": [\n"
" {\"condition\":\"...\",\"is_present\":true/false,\"context\":\"<combined snippets>\",\"rationale\":\"...\"},\n"
" ... ] }"
),
expected_output="Strict JSON with key comorbidity_analysis (list of objects).",
agent=self.agent,
json_mode=True
)
crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
verified = crew.kickoff()
try:
analysis = json.loads(repair_json(verified))
final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
except Exception:
final_result["comorbidities"] = []
return final_result
def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
"""
Accepts entries that already passed MEAT (i.e., meat dict exists and has True somewhere).
"""
out: List[Dict] = []
for entry in meat_validated_results:
meat = entry.get("meat", {})
if isinstance(meat, dict) and any(meat.values()):
print(f"[INFO] Checking structured comorbidities for: {entry['diagnosis']}")
out.append(self._check_comorbidities_for_one(entry))
else:
# If earlier stages claim 'yes' but MEAT not met, pass through with a note.
if entry.get("answer_explicit", "").lower() == "yes" or entry.get("answer_implicit", "").lower() == "yes":
entry["comorbidities"] = {
"status": "MEAT criteria not met; not proceeding with comorbidity analysis."
}
out.append(entry)
return out
# import os
# import json
# import pandas as pd
# from PyPDF2 import PdfReader
# from json_repair import repair_json
# from typing import List, Dict, Any, Optional
# from crewai import Agent, Task, Crew, Process
# from crewai_tools import SerperDevTool,tool
# from langchain_openai import ChatOpenAI
# from langchain_community.vectorstores import Chroma
# from embedding_manager import DirectoryEmbeddingManager
# SEED_SOURCES = [
# "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment",
# "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program",
# "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf",
# "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/",
# ]
# class ComorbidityCheckerAgent:
# """
# Uses a two-step AI agent process to first identify potential comorbidities for a
# MEAT-validated diagnosis and then verifies each one against the patient chart context.
# """
# def __init__(self, pdf_dir: str, hcc_code: str, model_version: str):
# self.embed_manager = DirectoryEmbeddingManager(pdf_dir)
# self.llm = ChatOpenAI(model=os.environ.get("OPENAI_MODEL_NAME", "gpt-4o"), temperature=0)
# self.hcc_code = hcc_code.strip()
# self.model_version = model_version.strip().upper()
# self.search_tool = SerperDevTool()
# #self.search_tool = SerperDevTool(seed_sources=SEED_SOURCES)
# @tool("patient_chart_search")
# def patient_chart_search(query: str) -> str:
# """
# Search the patient chart embeddings and return all top 15 results as a single string.
# Each result is preserved individually and then combined at the end.
# """
# print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
# vectordb = self.embed_manager.get_or_create_embeddings()
# results = vectordb.similarity_search(query, k=15)
# # Keep all 15 results separate internally
# all_results = [res.page_content for res in results]
# # Combine into a single string for output (same format as before)
# combined_results = "\n---\n".join(all_results)
# return combined_results
# # Register the agent with the tool
# self.agent = Agent(
# role="Clinical Coding and Comorbidity Analyst",
# goal=(
# "First, identify all clinically significant comorbidities for a given primary diagnosis, "
# "focusing on those relevant to HCC risk adjustment. Second, verify the presence of "
# "these comorbidities in a patient's chart and present the findings in a structured JSON format."
# ),
# backstory=(
# "You are an expert clinical coding analyst specializing in risk adjustment and Hierarchical Condition Categories (HCC). "
# "Your primary skill is to research disease patterns and then meticulously cross-reference them with patient records embeddings "
# "to ensure accurate documentation and coding. You provide clear, evidence-based findings."
# ),
# tools=[patient_chart_search],
# verbose=True,
# memory=False,
# llm=self.llm,
# )
# def check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
# """
# Orchestrates the two-task process for a single primary diagnosis.
# """
# primary_diagnosis = diagnosis_entry["diagnosis"]
# final_result = {"diagnosis": primary_diagnosis, "comorbidities": []}
# # --- Task 1: Identify Potential Comorbidities ---
# identify_task = Task(
# description=(
# f"For the primary diagnosis of '{primary_diagnosis}', generate a full list of common and clinically "
# f"significant comorbidities. Focus on conditions relevant for HCC {self.hcc_code} risk adjustment "
# f"in the {self.model_version} model. Use your search tool for research if needed."
# ),
# expected_output=(
# "A JSON object with a single key 'potential_comorbidities' containing a list of strings. "
# "Example: {\"potential_comorbidities\": [\"Hypertension\", \"Diabetes Mellitus Type 2\"]}"
# ),
# agent=self.agent,
# json_mode=True
# )
# print(f"\n[TASK 1] Identifying potential comorbidities for '{primary_diagnosis}'...")
# crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
# result = crew.kickoff()
# try:
# comorbidities_to_check = json.loads(repair_json(result)).get("potential_comorbidities", [])
# if not comorbidities_to_check:
# print("[INFO] No potential comorbidities were identified by the agent.")
# return final_result
# print(f"[INFO] Identified potential comorbidities: {comorbidities_to_check}")
# except (json.JSONDecodeError, TypeError):
# print("[ERROR] Failed to decode the list of potential comorbidities. Aborting.")
# return final_result
# # --- Task 2: Verify Each Comorbidity in the Chart ---
# verify_task = Task(
# description=(
# f"The patient has a primary diagnosis of '{primary_diagnosis}'.\n"
# f"A list of potential comorbidities has been identified: {comorbidities_to_check}.\n\n"
# "For EACH comorbidity, you MUST use the `patient_chart_search` tool, which queries the persistent "
# "embedding database of the patient's chart. **Use all 15 retrieved results individually** to "
# "determine presence or absence of each comorbidity.\n\n"
# "After reviewing all results, construct a final JSON object with a single key 'comorbidity_analysis'. "
# "Ensure there is one object for EACH comorbidity from the initial list. The 'context' field should "
# "combine all relevant evidence snippets into a single string."
# ),
# expected_output=(
# "A final JSON object with the key 'comorbidity_analysis'. This key should contain a list "
# "where each item has the structure: \n"
# '{\n'
# ' "condition": "<name of comorbidity>",\n'
# ' "is_present": true/false,\n'
# ' "context": "<Use all 15 retrieved results individually and combined them according to comorbidity>",\n'
# ' "rationale": "<one-line explanation of your finding>"\n'
# '}'
# ),
# agent=self.agent,
# json_mode=True
# )
# print(f"\n[TASK 2] Verifying identified comorbidities in the patient chart...")
# crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
# result = crew.kickoff()
# try:
# analysis = json.loads(repair_json(result))
# final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
# except (json.JSONDecodeError, TypeError):
# print(f"[ERROR] Failed to decode the final comorbidity analysis for '{primary_diagnosis}'.")
# final_result["comorbidities"] = []
# return final_result
# def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
# """
# Main execution loop. It iterates through diagnoses that have met MEAT criteria
# and runs the comorbidity check for each.
# """
# final_results = []
# for entry in meat_validated_results:
# meat_criteria = entry.get("meat", {})
# if isinstance(meat_criteria, dict) and any(meat_criteria.values()):
# print(f"\n[INFO] Checking for structured comorbidities for: {entry['diagnosis']}")
# entry_with_comorbidities = self.check_comorbidities_for_one(entry)
# final_results.append(entry_with_comorbidities)
# print(f"[COMORBIDITIES CHECKED] Analysis complete for {entry['diagnosis']}.")
# else:
# if entry.get("answer", "").lower() == "yes":
# entry["comorbidities"] = {
# "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
# }
# final_results.append(entry)
# return final_results