Spaces:
Sleeping
Sleeping
device token
Browse files- api/routes/patients.py +35 -97
- api/services/fhir_integration.py +148 -6
- api/services/synthea_integration.py +168 -222
- data/new_tool.json +0 -1
api/routes/patients.py
CHANGED
|
@@ -726,6 +726,18 @@ async def get_patients(
|
|
| 726 |
if patient.get("date_of_birth") == "":
|
| 727 |
patient["date_of_birth"] = None
|
| 728 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
processed_patients.append(patient)
|
| 730 |
|
| 731 |
logger.info(f"✅ Returning {len(processed_patients)} processed patients")
|
|
@@ -1170,14 +1182,20 @@ async def update_patient(
|
|
| 1170 |
@router.post("/patients/import-hapi-fhir", status_code=status.HTTP_201_CREATED)
|
| 1171 |
async def import_hapi_patients(
|
| 1172 |
limit: int = Query(20, ge=1, le=100, description="Number of patients to import"),
|
|
|
|
|
|
|
| 1173 |
current_user: dict = Depends(get_current_user)
|
| 1174 |
):
|
| 1175 |
"""
|
| 1176 |
-
Import patients from HAPI FHIR Test Server
|
| 1177 |
"""
|
| 1178 |
try:
|
| 1179 |
service = HAPIFHIRIntegrationService()
|
| 1180 |
-
result = await service.import_patients_from_hapi(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1181 |
|
| 1182 |
# Create detailed message
|
| 1183 |
message_parts = []
|
|
@@ -1185,6 +1203,8 @@ async def import_hapi_patients(
|
|
| 1185 |
message_parts.append(f"Successfully imported {result['imported_count']} patients")
|
| 1186 |
if result["skipped_count"] > 0:
|
| 1187 |
message_parts.append(f"Skipped {result['skipped_count']} duplicate patients")
|
|
|
|
|
|
|
| 1188 |
if result["errors"]:
|
| 1189 |
message_parts.append(f"Encountered {len(result['errors'])} errors")
|
| 1190 |
|
|
@@ -1194,9 +1214,12 @@ async def import_hapi_patients(
|
|
| 1194 |
"message": message,
|
| 1195 |
"imported_count": result["imported_count"],
|
| 1196 |
"skipped_count": result["skipped_count"],
|
|
|
|
| 1197 |
"total_found": result["total_found"],
|
| 1198 |
"imported_patients": result["imported_patients"],
|
| 1199 |
"skipped_patients": result["skipped_patients"],
|
|
|
|
|
|
|
| 1200 |
"errors": result["errors"],
|
| 1201 |
"source": "hapi_fhir"
|
| 1202 |
}
|
|
@@ -1386,17 +1409,12 @@ async def fetch_ehr_data(
|
|
| 1386 |
"field_mapping": ehr_system_config["field_mapping"]
|
| 1387 |
}
|
| 1388 |
else:
|
| 1389 |
-
# For other EHR systems, we
|
| 1390 |
-
#
|
| 1391 |
-
|
| 1392 |
-
|
| 1393 |
-
|
| 1394 |
-
|
| 1395 |
-
"data": sample_data,
|
| 1396 |
-
"total_count": len(sample_data),
|
| 1397 |
-
"field_mapping": ehr_system_config["field_mapping"],
|
| 1398 |
-
"note": "This is sample data. Implement actual FHIR client integration for production use."
|
| 1399 |
-
}
|
| 1400 |
|
| 1401 |
except Exception as e:
|
| 1402 |
logger.error(f"Error fetching EHR data: {str(e)}")
|
|
@@ -1405,89 +1423,7 @@ async def fetch_ehr_data(
|
|
| 1405 |
detail=f"Failed to fetch EHR data: {str(e)}"
|
| 1406 |
)
|
| 1407 |
|
| 1408 |
-
|
| 1409 |
-
"""
|
| 1410 |
-
Generate sample EHR data for testing purposes
|
| 1411 |
-
"""
|
| 1412 |
-
import random
|
| 1413 |
-
from datetime import date, timedelta
|
| 1414 |
-
|
| 1415 |
-
sample_names = [
|
| 1416 |
-
"John Smith", "Jane Doe", "Michael Johnson", "Sarah Wilson", "David Brown",
|
| 1417 |
-
"Emily Davis", "Robert Miller", "Lisa Garcia", "James Rodriguez", "Maria Martinez",
|
| 1418 |
-
"Christopher Anderson", "Jennifer Taylor", "Daniel Thomas", "Amanda Jackson",
|
| 1419 |
-
"Matthew White", "Nicole Harris", "Joshua Martin", "Stephanie Thompson"
|
| 1420 |
-
]
|
| 1421 |
-
|
| 1422 |
-
sample_addresses = [
|
| 1423 |
-
"123 Main St, New York, NY 10001",
|
| 1424 |
-
"456 Oak Ave, Los Angeles, CA 90210",
|
| 1425 |
-
"789 Pine Rd, Chicago, IL 60601",
|
| 1426 |
-
"321 Elm St, Houston, TX 77001",
|
| 1427 |
-
"654 Maple Dr, Phoenix, AZ 85001"
|
| 1428 |
-
]
|
| 1429 |
-
|
| 1430 |
-
sample_allergies = [
|
| 1431 |
-
"Penicillin", "Peanuts", "Latex", "Shellfish", "Dairy", "Eggs", "Soy", "Wheat"
|
| 1432 |
-
]
|
| 1433 |
-
|
| 1434 |
-
sample_conditions = [
|
| 1435 |
-
"Hypertension", "Diabetes Type 2", "Asthma", "Depression", "Anxiety",
|
| 1436 |
-
"Obesity", "Arthritis", "Heart Disease", "Chronic Kidney Disease", "COPD"
|
| 1437 |
-
]
|
| 1438 |
-
|
| 1439 |
-
sample_medications = [
|
| 1440 |
-
"Lisinopril", "Metformin", "Albuterol", "Sertraline", "Atorvastatin",
|
| 1441 |
-
"Omeprazole", "Amlodipine", "Losartan", "Simvastatin", "Hydrochlorothiazide"
|
| 1442 |
-
]
|
| 1443 |
-
|
| 1444 |
-
sample_insurance = [
|
| 1445 |
-
("Blue Cross Blue Shield", "BCBS123456"),
|
| 1446 |
-
("Aetna", "AET789012"),
|
| 1447 |
-
("Cigna", "CIG345678"),
|
| 1448 |
-
("UnitedHealth", "UHC901234"),
|
| 1449 |
-
("Humana", "HUM567890")
|
| 1450 |
-
]
|
| 1451 |
-
|
| 1452 |
-
data = []
|
| 1453 |
-
for i in range(min(limit, 18)):
|
| 1454 |
-
# Generate random date of birth (18-80 years old)
|
| 1455 |
-
years_old = random.randint(18, 80)
|
| 1456 |
-
birth_date = date.today() - timedelta(days=years_old * 365 + random.randint(0, 365))
|
| 1457 |
-
|
| 1458 |
-
# Generate random allergies and conditions
|
| 1459 |
-
patient_allergies = random.sample(sample_allergies, random.randint(0, 3))
|
| 1460 |
-
patient_conditions = random.sample(sample_conditions, random.randint(0, 2))
|
| 1461 |
-
patient_medications = random.sample(sample_medications, random.randint(0, 3))
|
| 1462 |
-
|
| 1463 |
-
# Generate emergency contact
|
| 1464 |
-
emergency_contact = random.choice(sample_names)
|
| 1465 |
-
while emergency_contact == sample_names[i % len(sample_names)]:
|
| 1466 |
-
emergency_contact = random.choice(sample_names)
|
| 1467 |
-
|
| 1468 |
-
# Generate insurance info
|
| 1469 |
-
insurance_provider, insurance_policy = random.choice(sample_insurance)
|
| 1470 |
-
|
| 1471 |
-
patient_data = {
|
| 1472 |
-
"ehr_id": f"{ehr_system.upper()}{str(i+1).zfill(3)}",
|
| 1473 |
-
"full_name": sample_names[i % len(sample_names)],
|
| 1474 |
-
"date_of_birth": birth_date.strftime("%Y-%m-%d"),
|
| 1475 |
-
"gender": random.choice(["male", "female"]),
|
| 1476 |
-
"address": random.choice(sample_addresses),
|
| 1477 |
-
"national_id": f"{random.randint(100000000, 999999999)}",
|
| 1478 |
-
"blood_type": random.choice(["A+", "A-", "B+", "B-", "AB+", "AB-", "O+", "O-"]),
|
| 1479 |
-
"allergies": patient_allergies,
|
| 1480 |
-
"chronic_conditions": patient_conditions,
|
| 1481 |
-
"medications": patient_medications,
|
| 1482 |
-
"emergency_contact_name": emergency_contact,
|
| 1483 |
-
"emergency_contact_phone": f"555-{random.randint(100, 999)}-{random.randint(1000, 9999)}",
|
| 1484 |
-
"insurance_provider": insurance_provider,
|
| 1485 |
-
"insurance_policy_number": insurance_policy
|
| 1486 |
-
}
|
| 1487 |
-
|
| 1488 |
-
data.append(patient_data)
|
| 1489 |
-
|
| 1490 |
-
return data
|
| 1491 |
|
| 1492 |
@router.post("/patients/generate-synthea", status_code=status.HTTP_201_CREATED)
|
| 1493 |
async def generate_synthea_patients(
|
|
@@ -1644,6 +1580,7 @@ async def generate_and_import_synthea_patients(
|
|
| 1644 |
age_max: int = Query(80, ge=0, le=120, description="Maximum age for generated patients"),
|
| 1645 |
gender: str = Query("both", description="Gender distribution: male, female, or both"),
|
| 1646 |
location: str = Query("Massachusetts", description="Location for generated patients"),
|
|
|
|
| 1647 |
current_user: dict = Depends(get_current_user)
|
| 1648 |
):
|
| 1649 |
"""
|
|
@@ -1666,7 +1603,8 @@ async def generate_and_import_synthea_patients(
|
|
| 1666 |
age_min=age_min,
|
| 1667 |
age_max=age_max,
|
| 1668 |
gender=gender,
|
| 1669 |
-
location=location
|
|
|
|
| 1670 |
)
|
| 1671 |
|
| 1672 |
if not generation_result['patients']:
|
|
|
|
| 726 |
if patient.get("date_of_birth") == "":
|
| 727 |
patient["date_of_birth"] = None
|
| 728 |
|
| 729 |
+
# Add missing required fields for Synthea patients
|
| 730 |
+
if "status" not in patient:
|
| 731 |
+
patient["status"] = "active"
|
| 732 |
+
if "created_at" not in patient:
|
| 733 |
+
patient["created_at"] = patient.get("import_date", datetime.utcnow())
|
| 734 |
+
if "updated_at" not in patient:
|
| 735 |
+
patient["updated_at"] = patient.get("last_updated", datetime.utcnow())
|
| 736 |
+
|
| 737 |
+
# Ensure source field is present
|
| 738 |
+
if "source" not in patient:
|
| 739 |
+
patient["source"] = "synthea"
|
| 740 |
+
|
| 741 |
processed_patients.append(patient)
|
| 742 |
|
| 743 |
logger.info(f"✅ Returning {len(processed_patients)} processed patients")
|
|
|
|
| 1182 |
@router.post("/patients/import-hapi-fhir", status_code=status.HTTP_201_CREATED)
|
| 1183 |
async def import_hapi_patients(
|
| 1184 |
limit: int = Query(20, ge=1, le=100, description="Number of patients to import"),
|
| 1185 |
+
require_medical_data: bool = Query(False, description="Require patients to have medical data (conditions, medications, encounters, or observations)"),
|
| 1186 |
+
min_completeness_score: float = Query(0.7, ge=0.0, le=1.0, description="Minimum validation score (0-1) for a patient to be considered complete"),
|
| 1187 |
current_user: dict = Depends(get_current_user)
|
| 1188 |
):
|
| 1189 |
"""
|
| 1190 |
+
Import patients from HAPI FHIR Test Server with data completeness validation
|
| 1191 |
"""
|
| 1192 |
try:
|
| 1193 |
service = HAPIFHIRIntegrationService()
|
| 1194 |
+
result = await service.import_patients_from_hapi(
|
| 1195 |
+
limit=limit,
|
| 1196 |
+
require_medical_data=require_medical_data,
|
| 1197 |
+
min_completeness_score=min_completeness_score
|
| 1198 |
+
)
|
| 1199 |
|
| 1200 |
# Create detailed message
|
| 1201 |
message_parts = []
|
|
|
|
| 1203 |
message_parts.append(f"Successfully imported {result['imported_count']} patients")
|
| 1204 |
if result["skipped_count"] > 0:
|
| 1205 |
message_parts.append(f"Skipped {result['skipped_count']} duplicate patients")
|
| 1206 |
+
if result["filtered_count"] > 0:
|
| 1207 |
+
message_parts.append(f"Filtered out {result['filtered_count']} incomplete patients")
|
| 1208 |
if result["errors"]:
|
| 1209 |
message_parts.append(f"Encountered {len(result['errors'])} errors")
|
| 1210 |
|
|
|
|
| 1214 |
"message": message,
|
| 1215 |
"imported_count": result["imported_count"],
|
| 1216 |
"skipped_count": result["skipped_count"],
|
| 1217 |
+
"filtered_count": result["filtered_count"],
|
| 1218 |
"total_found": result["total_found"],
|
| 1219 |
"imported_patients": result["imported_patients"],
|
| 1220 |
"skipped_patients": result["skipped_patients"],
|
| 1221 |
+
"filtered_patients": result["filtered_patients"],
|
| 1222 |
+
"validation_summary": result["validation_summary"],
|
| 1223 |
"errors": result["errors"],
|
| 1224 |
"source": "hapi_fhir"
|
| 1225 |
}
|
|
|
|
| 1409 |
"field_mapping": ehr_system_config["field_mapping"]
|
| 1410 |
}
|
| 1411 |
else:
|
| 1412 |
+
# For other EHR systems, we don't support hardcoded data
|
| 1413 |
+
# Only real Synthea data is supported
|
| 1414 |
+
raise HTTPException(
|
| 1415 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 1416 |
+
detail=f"EHR system '{ehr_system}' is not supported. Only HAPI FHIR Test Server and Synthea are supported for real data generation."
|
| 1417 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1418 |
|
| 1419 |
except Exception as e:
|
| 1420 |
logger.error(f"Error fetching EHR data: {str(e)}")
|
|
|
|
| 1423 |
detail=f"Failed to fetch EHR data: {str(e)}"
|
| 1424 |
)
|
| 1425 |
|
| 1426 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1427 |
|
| 1428 |
@router.post("/patients/generate-synthea", status_code=status.HTTP_201_CREATED)
|
| 1429 |
async def generate_synthea_patients(
|
|
|
|
| 1580 |
age_max: int = Query(80, ge=0, le=120, description="Maximum age for generated patients"),
|
| 1581 |
gender: str = Query("both", description="Gender distribution: male, female, or both"),
|
| 1582 |
location: str = Query("Massachusetts", description="Location for generated patients"),
|
| 1583 |
+
require_medical_data: bool = Query(True, description="Require patients to have medical data (conditions, medications, encounters, or observations)"),
|
| 1584 |
current_user: dict = Depends(get_current_user)
|
| 1585 |
):
|
| 1586 |
"""
|
|
|
|
| 1603 |
age_min=age_min,
|
| 1604 |
age_max=age_max,
|
| 1605 |
gender=gender,
|
| 1606 |
+
location=location,
|
| 1607 |
+
require_medical_data=require_medical_data
|
| 1608 |
)
|
| 1609 |
|
| 1610 |
if not generation_result['patients']:
|
api/services/fhir_integration.py
CHANGED
|
@@ -11,9 +11,90 @@ class HAPIFHIRIntegrationService:
|
|
| 11 |
def __init__(self):
|
| 12 |
self.fhir_client = HAPIFHIRClient()
|
| 13 |
|
| 14 |
-
|
| 15 |
"""
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"""
|
| 18 |
try:
|
| 19 |
print(f"Fetching {limit} patients from HAPI FHIR...")
|
|
@@ -24,19 +105,34 @@ class HAPIFHIRIntegrationService:
|
|
| 24 |
return {
|
| 25 |
"imported_count": 0,
|
| 26 |
"skipped_count": 0,
|
|
|
|
| 27 |
"total_found": 0,
|
| 28 |
"imported_patients": [],
|
| 29 |
"skipped_patients": [],
|
|
|
|
|
|
|
| 30 |
"errors": []
|
| 31 |
}
|
| 32 |
|
| 33 |
-
print(f"Found {len(patients)} patients, checking for duplicates...")
|
| 34 |
|
| 35 |
imported_count = 0
|
| 36 |
skipped_count = 0
|
|
|
|
| 37 |
imported_patients = []
|
| 38 |
skipped_patients = []
|
|
|
|
| 39 |
errors = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
for patient in patients:
|
| 42 |
try:
|
|
@@ -58,13 +154,48 @@ class HAPIFHIRIntegrationService:
|
|
| 58 |
# Enhance patient data with additional FHIR data
|
| 59 |
enhanced_patient = await self._enhance_patient_data(patient)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# Insert into database
|
| 62 |
result = await db.patients.insert_one(enhanced_patient)
|
| 63 |
|
| 64 |
if result.inserted_id:
|
| 65 |
imported_count += 1
|
| 66 |
-
imported_patients.append(
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
except Exception as e:
|
| 70 |
error_msg = f"Error importing patient {patient.get('full_name', 'Unknown')}: {e}"
|
|
@@ -72,14 +203,22 @@ class HAPIFHIRIntegrationService:
|
|
| 72 |
print(error_msg)
|
| 73 |
continue
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
return {
|
| 78 |
"imported_count": imported_count,
|
| 79 |
"skipped_count": skipped_count,
|
|
|
|
| 80 |
"total_found": len(patients),
|
| 81 |
"imported_patients": imported_patients,
|
| 82 |
"skipped_patients": skipped_patients,
|
|
|
|
|
|
|
| 83 |
"errors": errors
|
| 84 |
}
|
| 85 |
|
|
@@ -88,9 +227,12 @@ class HAPIFHIRIntegrationService:
|
|
| 88 |
return {
|
| 89 |
"imported_count": 0,
|
| 90 |
"skipped_count": 0,
|
|
|
|
| 91 |
"total_found": 0,
|
| 92 |
"imported_patients": [],
|
| 93 |
"skipped_patients": [],
|
|
|
|
|
|
|
| 94 |
"errors": [str(e)]
|
| 95 |
}
|
| 96 |
|
|
|
|
| 11 |
def __init__(self):
|
| 12 |
self.fhir_client = HAPIFHIRClient()
|
| 13 |
|
| 14 |
+
def _validate_patient_data_completeness(self, patient: Dict, require_medical_data: bool = False) -> Dict[str, any]:
|
| 15 |
"""
|
| 16 |
+
Validate if a patient has complete data
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
patient: Patient data dictionary
|
| 20 |
+
require_medical_data: Whether to require medical data (observations, medications, conditions)
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Dict with validation results:
|
| 24 |
+
{
|
| 25 |
+
"is_complete": bool,
|
| 26 |
+
"missing_fields": List[str],
|
| 27 |
+
"has_medical_data": bool,
|
| 28 |
+
"validation_score": float (0-1)
|
| 29 |
+
}
|
| 30 |
+
"""
|
| 31 |
+
required_demographic_fields = [
|
| 32 |
+
'full_name', 'gender', 'date_of_birth', 'address'
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
optional_demographic_fields = [
|
| 36 |
+
'phone', 'email', 'marital_status', 'language'
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
medical_data_fields = [
|
| 40 |
+
'observations', 'medications', 'conditions'
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
missing_fields = []
|
| 44 |
+
validation_score = 0.0
|
| 45 |
+
total_fields = len(required_demographic_fields) + len(optional_demographic_fields)
|
| 46 |
+
present_fields = 0
|
| 47 |
+
|
| 48 |
+
# Check required demographic fields
|
| 49 |
+
for field in required_demographic_fields:
|
| 50 |
+
value = patient.get(field, '')
|
| 51 |
+
if not value or (isinstance(value, str) and value.strip() == ''):
|
| 52 |
+
missing_fields.append(field)
|
| 53 |
+
else:
|
| 54 |
+
present_fields += 1
|
| 55 |
+
|
| 56 |
+
# Check optional demographic fields
|
| 57 |
+
for field in optional_demographic_fields:
|
| 58 |
+
value = patient.get(field, '')
|
| 59 |
+
if value and (not isinstance(value, str) or value.strip() != ''):
|
| 60 |
+
present_fields += 1
|
| 61 |
+
|
| 62 |
+
# Check medical data
|
| 63 |
+
has_medical_data = False
|
| 64 |
+
if 'clinical_data' in patient:
|
| 65 |
+
clinical_data = patient['clinical_data']
|
| 66 |
+
for field in medical_data_fields:
|
| 67 |
+
if field in clinical_data and clinical_data[field]:
|
| 68 |
+
has_medical_data = True
|
| 69 |
+
break
|
| 70 |
+
|
| 71 |
+
# Calculate validation score
|
| 72 |
+
validation_score = present_fields / total_fields if total_fields > 0 else 0.0
|
| 73 |
+
|
| 74 |
+
# Determine if patient is complete
|
| 75 |
+
is_complete = len(missing_fields) == 0 and validation_score >= 0.7
|
| 76 |
+
|
| 77 |
+
# If medical data is required, check if patient has it
|
| 78 |
+
if require_medical_data and not has_medical_data:
|
| 79 |
+
is_complete = False
|
| 80 |
+
missing_fields.append('medical_data')
|
| 81 |
+
|
| 82 |
+
return {
|
| 83 |
+
"is_complete": is_complete,
|
| 84 |
+
"missing_fields": missing_fields,
|
| 85 |
+
"has_medical_data": has_medical_data,
|
| 86 |
+
"validation_score": validation_score,
|
| 87 |
+
"demographic_completeness": present_fields / len(required_demographic_fields + optional_demographic_fields) if (len(required_demographic_fields) + len(optional_demographic_fields)) > 0 else 0.0
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
async def import_patients_from_hapi(self, limit: int = 20, require_medical_data: bool = False, min_completeness_score: float = 0.7) -> dict:
|
| 91 |
+
"""
|
| 92 |
+
Import patients from HAPI FHIR Test Server with data completeness validation
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
limit: Number of patients to fetch from HAPI FHIR
|
| 96 |
+
require_medical_data: Whether to require patients to have medical data
|
| 97 |
+
min_completeness_score: Minimum validation score (0-1) for a patient to be considered complete
|
| 98 |
"""
|
| 99 |
try:
|
| 100 |
print(f"Fetching {limit} patients from HAPI FHIR...")
|
|
|
|
| 105 |
return {
|
| 106 |
"imported_count": 0,
|
| 107 |
"skipped_count": 0,
|
| 108 |
+
"filtered_count": 0,
|
| 109 |
"total_found": 0,
|
| 110 |
"imported_patients": [],
|
| 111 |
"skipped_patients": [],
|
| 112 |
+
"filtered_patients": [],
|
| 113 |
+
"validation_summary": {},
|
| 114 |
"errors": []
|
| 115 |
}
|
| 116 |
|
| 117 |
+
print(f"Found {len(patients)} patients, checking for duplicates and data completeness...")
|
| 118 |
|
| 119 |
imported_count = 0
|
| 120 |
skipped_count = 0
|
| 121 |
+
filtered_count = 0
|
| 122 |
imported_patients = []
|
| 123 |
skipped_patients = []
|
| 124 |
+
filtered_patients = []
|
| 125 |
errors = []
|
| 126 |
+
validation_summary = {
|
| 127 |
+
"total_processed": len(patients),
|
| 128 |
+
"complete_patients": 0,
|
| 129 |
+
"incomplete_patients": 0,
|
| 130 |
+
"with_medical_data": 0,
|
| 131 |
+
"without_medical_data": 0,
|
| 132 |
+
"average_completeness_score": 0.0
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
total_completeness_score = 0.0
|
| 136 |
|
| 137 |
for patient in patients:
|
| 138 |
try:
|
|
|
|
| 154 |
# Enhance patient data with additional FHIR data
|
| 155 |
enhanced_patient = await self._enhance_patient_data(patient)
|
| 156 |
|
| 157 |
+
# Validate data completeness
|
| 158 |
+
validation_result = self._validate_patient_data_completeness(
|
| 159 |
+
enhanced_patient,
|
| 160 |
+
require_medical_data=require_medical_data
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Update validation summary
|
| 164 |
+
total_completeness_score += validation_result["validation_score"]
|
| 165 |
+
|
| 166 |
+
if validation_result["has_medical_data"]:
|
| 167 |
+
validation_summary["with_medical_data"] += 1
|
| 168 |
+
else:
|
| 169 |
+
validation_summary["without_medical_data"] += 1
|
| 170 |
+
|
| 171 |
+
# Check if patient meets completeness criteria
|
| 172 |
+
if not validation_result["is_complete"] or validation_result["validation_score"] < min_completeness_score:
|
| 173 |
+
filtered_count += 1
|
| 174 |
+
filtered_patients.append({
|
| 175 |
+
"name": patient['full_name'],
|
| 176 |
+
"fhir_id": patient['fhir_id'],
|
| 177 |
+
"missing_fields": validation_result["missing_fields"],
|
| 178 |
+
"completeness_score": validation_result["validation_score"],
|
| 179 |
+
"has_medical_data": validation_result["has_medical_data"]
|
| 180 |
+
})
|
| 181 |
+
print(f"Patient {patient['full_name']} filtered out - missing: {validation_result['missing_fields']}, score: {validation_result['validation_score']:.2f}")
|
| 182 |
+
validation_summary["incomplete_patients"] += 1
|
| 183 |
+
continue
|
| 184 |
+
|
| 185 |
+
validation_summary["complete_patients"] += 1
|
| 186 |
+
|
| 187 |
# Insert into database
|
| 188 |
result = await db.patients.insert_one(enhanced_patient)
|
| 189 |
|
| 190 |
if result.inserted_id:
|
| 191 |
imported_count += 1
|
| 192 |
+
imported_patients.append({
|
| 193 |
+
"name": patient['full_name'],
|
| 194 |
+
"fhir_id": patient['fhir_id'],
|
| 195 |
+
"completeness_score": validation_result["validation_score"],
|
| 196 |
+
"has_medical_data": validation_result["has_medical_data"]
|
| 197 |
+
})
|
| 198 |
+
print(f"Imported patient: {patient['full_name']} (ID: {result.inserted_id}, Score: {validation_result['validation_score']:.2f})")
|
| 199 |
|
| 200 |
except Exception as e:
|
| 201 |
error_msg = f"Error importing patient {patient.get('full_name', 'Unknown')}: {e}"
|
|
|
|
| 203 |
print(error_msg)
|
| 204 |
continue
|
| 205 |
|
| 206 |
+
# Calculate average completeness score
|
| 207 |
+
if validation_summary["total_processed"] > 0:
|
| 208 |
+
validation_summary["average_completeness_score"] = total_completeness_score / validation_summary["total_processed"]
|
| 209 |
+
|
| 210 |
+
print(f"Import completed: {imported_count} imported, {skipped_count} skipped, {filtered_count} filtered out")
|
| 211 |
+
print(f"Validation summary: {validation_summary}")
|
| 212 |
|
| 213 |
return {
|
| 214 |
"imported_count": imported_count,
|
| 215 |
"skipped_count": skipped_count,
|
| 216 |
+
"filtered_count": filtered_count,
|
| 217 |
"total_found": len(patients),
|
| 218 |
"imported_patients": imported_patients,
|
| 219 |
"skipped_patients": skipped_patients,
|
| 220 |
+
"filtered_patients": filtered_patients,
|
| 221 |
+
"validation_summary": validation_summary,
|
| 222 |
"errors": errors
|
| 223 |
}
|
| 224 |
|
|
|
|
| 227 |
return {
|
| 228 |
"imported_count": 0,
|
| 229 |
"skipped_count": 0,
|
| 230 |
+
"filtered_count": 0,
|
| 231 |
"total_found": 0,
|
| 232 |
"imported_patients": [],
|
| 233 |
"skipped_patients": [],
|
| 234 |
+
"filtered_patients": [],
|
| 235 |
+
"validation_summary": {},
|
| 236 |
"errors": [str(e)]
|
| 237 |
}
|
| 238 |
|
api/services/synthea_integration.py
CHANGED
|
@@ -28,7 +28,10 @@ class SyntheaIntegrationService:
|
|
| 28 |
# Check if we're in a containerized environment (like Hugging Face Spaces)
|
| 29 |
self.is_containerized = os.path.exists('/.dockerenv') or os.environ.get('HF_SPACE_ID') is not None
|
| 30 |
|
| 31 |
-
#
|
|
|
|
|
|
|
|
|
|
| 32 |
self.use_mock_data = False
|
| 33 |
|
| 34 |
# Try multiple directory locations for better compatibility
|
|
@@ -55,7 +58,8 @@ class SyntheaIntegrationService:
|
|
| 55 |
logger.warning("⚠️ No writable temp directory found, using current directory")
|
| 56 |
|
| 57 |
self.synthea_dir = base_temp_dir / "cps_synthea"
|
| 58 |
-
|
|
|
|
| 59 |
self.synthea_jar_path = self.synthea_dir / "synthea-with-dependencies.jar"
|
| 60 |
|
| 61 |
# Try to create directories
|
|
@@ -65,7 +69,9 @@ class SyntheaIntegrationService:
|
|
| 65 |
logger.info(f"✅ Using directories: synthea={self.synthea_dir}, output={self.output_dir}")
|
| 66 |
except Exception as e:
|
| 67 |
logger.warning(f"⚠️ Could not create directories: {e}, will try to use existing paths")
|
| 68 |
-
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# Synthea configuration
|
| 71 |
self.default_config = {
|
|
@@ -90,6 +96,20 @@ class SyntheaIntegrationService:
|
|
| 90 |
"exporter.fhir.include_practitioners": "false"
|
| 91 |
}
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
async def download_synthea(self) -> bool:
|
| 94 |
"""
|
| 95 |
Download Synthea JAR file if not present
|
|
@@ -160,10 +180,12 @@ class SyntheaIntegrationService:
|
|
| 160 |
file.unlink()
|
| 161 |
|
| 162 |
# Run Synthea with command line arguments (more reliable)
|
|
|
|
|
|
|
| 163 |
cmd = [
|
| 164 |
"java", "-jar", str(self.synthea_jar_path),
|
| 165 |
"-p", str(population),
|
| 166 |
-
"-o", str(
|
| 167 |
"--seed", str(int(datetime.now().timestamp())),
|
| 168 |
"--exporter.fhir.transaction_bundle=true",
|
| 169 |
"--exporter.fhir.include_patient_summary=true",
|
|
@@ -181,7 +203,7 @@ class SyntheaIntegrationService:
|
|
| 181 |
logger.info(f"Output directory exists before generation: {self.output_dir.exists()}")
|
| 182 |
|
| 183 |
# Try multiple working directories for better compatibility
|
| 184 |
-
working_dirs = [str(
|
| 185 |
|
| 186 |
process = None
|
| 187 |
for working_dir in working_dirs:
|
|
@@ -206,7 +228,13 @@ class SyntheaIntegrationService:
|
|
| 206 |
|
| 207 |
if process.returncode == 0:
|
| 208 |
logger.info("✅ Synthea generation completed successfully")
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
# Debug: Check what files were actually created
|
| 212 |
logger.info(f"🔍 Checking output directory immediately after generation: {self.output_dir}")
|
|
@@ -222,6 +250,14 @@ class SyntheaIntegrationService:
|
|
| 222 |
for subdir in subdirs:
|
| 223 |
json_files = list(subdir.glob("*.json"))
|
| 224 |
logger.info(f"📁 JSON files in {subdir.name}: {[f.name for f in json_files]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
# Also check if files were created in the working directory
|
| 227 |
working_dir_files = list(Path.cwd().glob("*.json"))
|
|
@@ -231,12 +267,29 @@ class SyntheaIntegrationService:
|
|
| 231 |
synthea_dir_files = list(self.synthea_dir.glob("*.json"))
|
| 232 |
logger.info(f"📁 JSON files in synthea directory: {[f.name for f in synthea_dir_files]}")
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
else:
|
| 235 |
logger.warning(f"⚠️ Output directory does not exist: {self.output_dir}")
|
| 236 |
|
| 237 |
return True
|
| 238 |
else:
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
logger.error(f"❌ Synthea generation failed with return code {process.returncode}")
|
| 241 |
logger.error(f"Error output: {error_output}")
|
| 242 |
return False
|
|
@@ -245,7 +298,7 @@ class SyntheaIntegrationService:
|
|
| 245 |
logger.error(f"❌ Error running Synthea: {str(e)}")
|
| 246 |
return False
|
| 247 |
|
| 248 |
-
async def process_synthea_output(self) -> List[Dict[str, Any]]:
|
| 249 |
"""
|
| 250 |
Process Synthea output files and convert to application format
|
| 251 |
"""
|
|
@@ -257,9 +310,10 @@ class SyntheaIntegrationService:
|
|
| 257 |
|
| 258 |
# List of directories to search for Synthea output
|
| 259 |
search_dirs = [
|
| 260 |
-
self.output_dir,
|
| 261 |
-
|
| 262 |
-
Path.cwd(),
|
|
|
|
| 263 |
Path('/tmp'),
|
| 264 |
Path('/app'),
|
| 265 |
Path('/app/tmp')
|
|
@@ -315,6 +369,9 @@ class SyntheaIntegrationService:
|
|
| 315 |
return []
|
| 316 |
|
| 317 |
# Process each patient file
|
|
|
|
|
|
|
|
|
|
| 318 |
for file_path in patient_files:
|
| 319 |
try:
|
| 320 |
logger.info(f"📄 Processing file: {file_path}")
|
|
@@ -324,8 +381,14 @@ class SyntheaIntegrationService:
|
|
| 324 |
|
| 325 |
patient_data = await self._extract_patient_data(bundle, file_path.name)
|
| 326 |
if patient_data:
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
else:
|
| 330 |
logger.warning(f"⚠️ No patient data extracted from {file_path}")
|
| 331 |
|
|
@@ -333,6 +396,8 @@ class SyntheaIntegrationService:
|
|
| 333 |
logger.error(f"❌ Error processing {file_path}: {str(e)}")
|
| 334 |
continue
|
| 335 |
|
|
|
|
|
|
|
| 336 |
logger.info(f"✅ Successfully processed {len(patients)} patients from Synthea output")
|
| 337 |
return patients
|
| 338 |
|
|
@@ -527,6 +592,72 @@ class SyntheaIntegrationService:
|
|
| 527 |
except:
|
| 528 |
return []
|
| 529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
async def save_patients_to_database(self, patients: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 531 |
"""
|
| 532 |
Save generated patients directly to the database
|
|
@@ -553,6 +684,9 @@ class SyntheaIntegrationService:
|
|
| 553 |
'marital_status': patient.get('marital_status', ''),
|
| 554 |
'language': patient.get('language', 'English'),
|
| 555 |
'source': patient.get('source', 'synthea'),
|
|
|
|
|
|
|
|
|
|
| 556 |
'import_date': datetime.utcnow(),
|
| 557 |
'last_updated': datetime.utcnow(),
|
| 558 |
'conditions': patient.get('conditions', []),
|
|
@@ -599,7 +733,8 @@ class SyntheaIntegrationService:
|
|
| 599 |
age_min: int = 18,
|
| 600 |
age_max: int = 80,
|
| 601 |
gender: str = "both",
|
| 602 |
-
location: str = "Massachusetts"
|
|
|
|
| 603 |
) -> Dict[str, Any]:
|
| 604 |
"""
|
| 605 |
Complete workflow: generate Synthea data and prepare for import
|
|
@@ -607,7 +742,7 @@ class SyntheaIntegrationService:
|
|
| 607 |
try:
|
| 608 |
logger.info(f"🎯 Starting Synthea generation for {population} patients")
|
| 609 |
|
| 610 |
-
# Always
|
| 611 |
try:
|
| 612 |
# Download Synthea if needed
|
| 613 |
if not await self.download_synthea():
|
|
@@ -630,12 +765,19 @@ class SyntheaIntegrationService:
|
|
| 630 |
logger.error("❌ Synthea generation failed")
|
| 631 |
raise Exception("Synthea generation failed")
|
| 632 |
|
| 633 |
-
# Process output
|
| 634 |
-
patients = await self.process_synthea_output()
|
| 635 |
|
| 636 |
if not patients:
|
| 637 |
-
logger.error("❌ No patients generated from Synthea")
|
| 638 |
-
raise Exception("No patients generated from Synthea")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
# Save patients to database
|
| 641 |
db_result = await self.save_patients_to_database(patients)
|
|
@@ -650,15 +792,14 @@ class SyntheaIntegrationService:
|
|
| 650 |
"patients": patients,
|
| 651 |
"config": config_overrides,
|
| 652 |
"output_directory": str(self.output_dir),
|
| 653 |
-
"source": "synthea_real"
|
|
|
|
| 654 |
}
|
| 655 |
|
| 656 |
except Exception as e:
|
| 657 |
logger.error(f"❌ Synthea integration failed: {str(e)}")
|
| 658 |
-
raise
|
| 659 |
-
|
| 660 |
-
detail=f"Synthea generation failed: {str(e)}"
|
| 661 |
-
)
|
| 662 |
|
| 663 |
except Exception as e:
|
| 664 |
logger.error(f"❌ Error in generate_and_import_patients: {str(e)}")
|
|
@@ -667,198 +808,6 @@ class SyntheaIntegrationService:
|
|
| 667 |
detail=f"Patient generation failed: {str(e)}"
|
| 668 |
)
|
| 669 |
|
| 670 |
-
async def _generate_mock_patients(
|
| 671 |
-
self,
|
| 672 |
-
population: int = 10,
|
| 673 |
-
age_min: int = 18,
|
| 674 |
-
age_max: int = 80,
|
| 675 |
-
gender: str = "both",
|
| 676 |
-
location: str = "Massachusetts"
|
| 677 |
-
) -> Dict[str, Any]:
|
| 678 |
-
"""
|
| 679 |
-
Generate realistic mock patient data when Synthea is not available
|
| 680 |
-
"""
|
| 681 |
-
import random
|
| 682 |
-
from datetime import datetime, timedelta
|
| 683 |
-
|
| 684 |
-
logger.info(f"🎭 Generating {population} realistic mock patients")
|
| 685 |
-
|
| 686 |
-
# More comprehensive name lists
|
| 687 |
-
first_names = [
|
| 688 |
-
"John", "Jane", "Michael", "Sarah", "David", "Emily", "Robert", "Lisa", "James", "Maria",
|
| 689 |
-
"William", "Jennifer", "Christopher", "Jessica", "Daniel", "Amanda", "Matthew", "Nicole", "Anthony", "Stephanie",
|
| 690 |
-
"Mark", "Melissa", "Donald", "Michelle", "Steven", "Laura", "Paul", "Kimberly", "Andrew", "Deborah",
|
| 691 |
-
"Joshua", "Dorothy", "Kenneth", "Helen", "Kevin", "Sharon", "Brian", "Carol", "George", "Ruth",
|
| 692 |
-
"Edward", "Julie", "Ronald", "Joyce", "Timothy", "Virginia", "Jason", "Victoria", "Jeffrey", "Kelly"
|
| 693 |
-
]
|
| 694 |
-
last_names = [
|
| 695 |
-
"Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez",
|
| 696 |
-
"Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin",
|
| 697 |
-
"Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
|
| 698 |
-
"Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores",
|
| 699 |
-
"Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts"
|
| 700 |
-
]
|
| 701 |
-
cities = ["Boston", "Cambridge", "Worcester", "Springfield", "Lowell", "New Bedford", "Brockton", "Quincy", "Lynn", "Fall River"]
|
| 702 |
-
|
| 703 |
-
# Medical conditions for more realistic data
|
| 704 |
-
conditions = [
|
| 705 |
-
"Hypertension", "Diabetes Type 2", "Asthma", "Depression", "Anxiety", "Obesity", "Arthritis", "Heart Disease",
|
| 706 |
-
"High Cholesterol", "Migraine", "Insomnia", "GERD", "Allergies", "Back Pain", "Carpal Tunnel Syndrome"
|
| 707 |
-
]
|
| 708 |
-
|
| 709 |
-
# Medications for more realistic data
|
| 710 |
-
medications = [
|
| 711 |
-
"Lisinopril", "Metformin", "Albuterol", "Sertraline", "Atorvastatin", "Omeprazole", "Ibuprofen", "Acetaminophen",
|
| 712 |
-
"Loratadine", "Melatonin", "Vitamin D", "Fish Oil", "Calcium", "Iron", "Folic Acid"
|
| 713 |
-
]
|
| 714 |
-
|
| 715 |
-
patients = []
|
| 716 |
-
|
| 717 |
-
for i in range(population):
|
| 718 |
-
# Generate random name
|
| 719 |
-
first_name = random.choice(first_names)
|
| 720 |
-
last_name = random.choice(last_names)
|
| 721 |
-
full_name = f"{first_name} {last_name}"
|
| 722 |
-
|
| 723 |
-
# Generate random age
|
| 724 |
-
age = random.randint(age_min, age_max)
|
| 725 |
-
birth_date = datetime.now() - timedelta(days=age*365 + random.randint(0, 365))
|
| 726 |
-
|
| 727 |
-
# Generate random gender
|
| 728 |
-
if gender == "both":
|
| 729 |
-
patient_gender = random.choice(["male", "female"])
|
| 730 |
-
else:
|
| 731 |
-
patient_gender = gender
|
| 732 |
-
|
| 733 |
-
# Generate random address
|
| 734 |
-
street_number = random.randint(100, 9999)
|
| 735 |
-
street_name = random.choice(["Main St", "Oak Ave", "Elm St", "Maple Dr", "Cedar Ln", "Pine Rd", "Birch Way", "Willow Ct"])
|
| 736 |
-
city = random.choice(cities)
|
| 737 |
-
state = "MA"
|
| 738 |
-
postal_code = f"{random.randint(10000, 99999)}"
|
| 739 |
-
|
| 740 |
-
# Generate realistic medical data
|
| 741 |
-
patient_conditions = []
|
| 742 |
-
patient_medications = []
|
| 743 |
-
patient_encounters = []
|
| 744 |
-
patient_observations = []
|
| 745 |
-
patient_procedures = []
|
| 746 |
-
patient_immunizations = []
|
| 747 |
-
patient_allergies = []
|
| 748 |
-
|
| 749 |
-
# Add 0-3 random conditions
|
| 750 |
-
num_conditions = random.randint(0, 3)
|
| 751 |
-
for _ in range(num_conditions):
|
| 752 |
-
condition = random.choice(conditions)
|
| 753 |
-
if condition not in [c['code'] for c in patient_conditions]:
|
| 754 |
-
patient_conditions.append({
|
| 755 |
-
'id': f"condition-{random.randint(1000, 9999)}",
|
| 756 |
-
'code': condition,
|
| 757 |
-
'status': random.choice(['active', 'inactive', 'resolved']),
|
| 758 |
-
'onset_date': (datetime.now() - timedelta(days=random.randint(30, 3650))).strftime('%Y-%m-%d'),
|
| 759 |
-
'recorded_date': datetime.now().strftime('%Y-%m-%d'),
|
| 760 |
-
'verification_status': 'confirmed',
|
| 761 |
-
'category': 'diagnosis'
|
| 762 |
-
})
|
| 763 |
-
|
| 764 |
-
# Add 0-4 random medications
|
| 765 |
-
num_medications = random.randint(0, 4)
|
| 766 |
-
for _ in range(num_medications):
|
| 767 |
-
medication = random.choice(medications)
|
| 768 |
-
if medication not in [m['name'] for m in patient_medications]:
|
| 769 |
-
patient_medications.append({
|
| 770 |
-
'id': f"med-{random.randint(1000, 9999)}",
|
| 771 |
-
'name': medication,
|
| 772 |
-
'status': random.choice(['active', 'discontinued', 'completed']),
|
| 773 |
-
'prescribed_date': (datetime.now() - timedelta(days=random.randint(7, 365))).strftime('%Y-%m-%d'),
|
| 774 |
-
'requester': f"Dr. {random.choice(['Smith', 'Johnson', 'Williams', 'Brown', 'Davis'])}",
|
| 775 |
-
'dosage': f"{random.randint(1, 3)} tablet(s) daily",
|
| 776 |
-
'intent': 'order',
|
| 777 |
-
'priority': 'routine'
|
| 778 |
-
})
|
| 779 |
-
|
| 780 |
-
# Add 1-5 encounters
|
| 781 |
-
num_encounters = random.randint(1, 5)
|
| 782 |
-
for j in range(num_encounters):
|
| 783 |
-
encounter_date = datetime.now() - timedelta(days=random.randint(1, 365))
|
| 784 |
-
patient_encounters.append({
|
| 785 |
-
'id': f"encounter-{random.randint(1000, 9999)}",
|
| 786 |
-
'type': random.choice(['Office Visit', 'Emergency Room', 'Hospital Admission', 'Telemedicine', 'Lab Visit']),
|
| 787 |
-
'status': 'finished',
|
| 788 |
-
'period': {
|
| 789 |
-
'start': encounter_date.strftime('%Y-%m-%dT%H:%M:%S'),
|
| 790 |
-
'end': (encounter_date + timedelta(hours=random.randint(1, 4))).strftime('%Y-%m-%dT%H:%M:%S')
|
| 791 |
-
},
|
| 792 |
-
'service_provider': f"{random.choice(['General Hospital', 'Medical Center', 'Clinic'])}",
|
| 793 |
-
'class': 'ambulatory',
|
| 794 |
-
'reason': random.choice(['Routine Checkup', 'Follow-up', 'Emergency', 'Lab Work', 'Consultation'])
|
| 795 |
-
})
|
| 796 |
-
|
| 797 |
-
# Add 2-8 observations
|
| 798 |
-
num_observations = random.randint(2, 8)
|
| 799 |
-
for _ in range(num_observations):
|
| 800 |
-
observation_date = datetime.now() - timedelta(days=random.randint(1, 365))
|
| 801 |
-
patient_observations.append({
|
| 802 |
-
'id': f"obs-{random.randint(1000, 9999)}",
|
| 803 |
-
'code': random.choice(['Blood Pressure', 'Heart Rate', 'Temperature', 'Weight', 'Height', 'Blood Glucose', 'Cholesterol']),
|
| 804 |
-
'value': f"{random.randint(70, 200)}",
|
| 805 |
-
'unit': random.choice(['mmHg', 'bpm', '°F', 'lbs', 'inches', 'mg/dL']),
|
| 806 |
-
'status': 'final',
|
| 807 |
-
'effective_date': observation_date.strftime('%Y-%m-%dT%H:%M:%S'),
|
| 808 |
-
'category': 'vital-signs'
|
| 809 |
-
})
|
| 810 |
-
|
| 811 |
-
patient_data = {
|
| 812 |
-
'fhir_id': f"mock-patient-{i+1}",
|
| 813 |
-
'full_name': full_name,
|
| 814 |
-
'gender': patient_gender,
|
| 815 |
-
'date_of_birth': birth_date.strftime('%Y-%m-%d'),
|
| 816 |
-
'address': f"{street_number} {street_name}",
|
| 817 |
-
'city': city,
|
| 818 |
-
'state': state,
|
| 819 |
-
'postal_code': postal_code,
|
| 820 |
-
'country': 'US',
|
| 821 |
-
'marital_status': random.choice(['single', 'married', 'divorced', 'widowed']),
|
| 822 |
-
'language': 'English',
|
| 823 |
-
'source': 'synthea_mock',
|
| 824 |
-
'import_date': datetime.utcnow().isoformat(),
|
| 825 |
-
'last_updated': datetime.utcnow().isoformat(),
|
| 826 |
-
'conditions': patient_conditions,
|
| 827 |
-
'medications': patient_medications,
|
| 828 |
-
'encounters': patient_encounters,
|
| 829 |
-
'observations': patient_observations,
|
| 830 |
-
'procedures': patient_procedures,
|
| 831 |
-
'immunizations': patient_immunizations,
|
| 832 |
-
'allergies': patient_allergies
|
| 833 |
-
}
|
| 834 |
-
|
| 835 |
-
patients.append(patient_data)
|
| 836 |
-
|
| 837 |
-
# Save mock patients to database
|
| 838 |
-
if patients:
|
| 839 |
-
db_result = await self.save_patients_to_database(patients)
|
| 840 |
-
logger.info(f"💾 Mock patients database save result: {db_result}")
|
| 841 |
-
else:
|
| 842 |
-
db_result = {"saved_count": 0, "failed_count": 0, "errors": ["No mock patients to save"], "success": False}
|
| 843 |
-
|
| 844 |
-
return {
|
| 845 |
-
"status": "success",
|
| 846 |
-
"generated_patients": len(patients),
|
| 847 |
-
"saved_to_database": db_result["saved_count"],
|
| 848 |
-
"failed_to_save": db_result["failed_count"],
|
| 849 |
-
"database_errors": db_result["errors"],
|
| 850 |
-
"patients": patients,
|
| 851 |
-
"config": {
|
| 852 |
-
"population": population,
|
| 853 |
-
"age_min": age_min,
|
| 854 |
-
"age_max": age_max,
|
| 855 |
-
"gender": gender,
|
| 856 |
-
"location": location
|
| 857 |
-
},
|
| 858 |
-
"output_directory": "mock_data",
|
| 859 |
-
"source": "synthea_mock"
|
| 860 |
-
}
|
| 861 |
-
|
| 862 |
async def get_synthea_statistics(self) -> Dict[str, Any]:
|
| 863 |
"""
|
| 864 |
Get statistics about Synthea capabilities and generated data
|
|
@@ -873,8 +822,7 @@ class SyntheaIntegrationService:
|
|
| 873 |
"synthea_available": False,
|
| 874 |
"java_available": False,
|
| 875 |
"directories_accessible": False,
|
| 876 |
-
"environment": "local"
|
| 877 |
-
"using_mock_data": False
|
| 878 |
}
|
| 879 |
|
| 880 |
# Set environment info
|
|
@@ -883,8 +831,7 @@ class SyntheaIntegrationService:
|
|
| 883 |
else:
|
| 884 |
stats["environment"] = "local"
|
| 885 |
|
| 886 |
-
# Always
|
| 887 |
-
stats["using_mock_data"] = False
|
| 888 |
|
| 889 |
# Check if directories are accessible
|
| 890 |
try:
|
|
@@ -957,6 +904,5 @@ class SyntheaIntegrationService:
|
|
| 957 |
"synthea_available": False,
|
| 958 |
"java_available": False,
|
| 959 |
"directories_accessible": False,
|
| 960 |
-
"environment": "unknown"
|
| 961 |
-
"using_mock_data": True
|
| 962 |
}
|
|
|
|
| 28 |
# Check if we're in a containerized environment (like Hugging Face Spaces)
|
| 29 |
self.is_containerized = os.path.exists('/.dockerenv') or os.environ.get('HF_SPACE_ID') is not None
|
| 30 |
|
| 31 |
+
# Check if Java is available locally
|
| 32 |
+
self.java_available = self._check_java_availability()
|
| 33 |
+
|
| 34 |
+
# Always use real Synthea data - no fallback to mock data
|
| 35 |
self.use_mock_data = False
|
| 36 |
|
| 37 |
# Try multiple directory locations for better compatibility
|
|
|
|
| 58 |
logger.warning("⚠️ No writable temp directory found, using current directory")
|
| 59 |
|
| 60 |
self.synthea_dir = base_temp_dir / "cps_synthea"
|
| 61 |
+
# Use the actual output directory where Synthea creates files
|
| 62 |
+
self.output_dir = Path.cwd() / "output" / "fhir"
|
| 63 |
self.synthea_jar_path = self.synthea_dir / "synthea-with-dependencies.jar"
|
| 64 |
|
| 65 |
# Try to create directories
|
|
|
|
| 69 |
logger.info(f"✅ Using directories: synthea={self.synthea_dir}, output={self.output_dir}")
|
| 70 |
except Exception as e:
|
| 71 |
logger.warning(f"⚠️ Could not create directories: {e}, will try to use existing paths")
|
| 72 |
+
|
| 73 |
+
# Log the configuration
|
| 74 |
+
logger.info("🚀 Using real Synthea generation (no fallback to mock data)")
|
| 75 |
|
| 76 |
# Synthea configuration
|
| 77 |
self.default_config = {
|
|
|
|
| 96 |
"exporter.fhir.include_practitioners": "false"
|
| 97 |
}
|
| 98 |
|
| 99 |
+
def _check_java_availability(self) -> bool:
|
| 100 |
+
"""
|
| 101 |
+
Check if Java is available in the system
|
| 102 |
+
"""
|
| 103 |
+
try:
|
| 104 |
+
import subprocess
|
| 105 |
+
result = subprocess.run(['java', '-version'],
|
| 106 |
+
capture_output=True,
|
| 107 |
+
text=True,
|
| 108 |
+
timeout=10)
|
| 109 |
+
return result.returncode == 0
|
| 110 |
+
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
|
| 111 |
+
return False
|
| 112 |
+
|
| 113 |
async def download_synthea(self) -> bool:
|
| 114 |
"""
|
| 115 |
Download Synthea JAR file if not present
|
|
|
|
| 180 |
file.unlink()
|
| 181 |
|
| 182 |
# Run Synthea with command line arguments (more reliable)
|
| 183 |
+
# Use the parent directory of output/fhir as the output directory
|
| 184 |
+
output_parent = Path.cwd() / "output"
|
| 185 |
cmd = [
|
| 186 |
"java", "-jar", str(self.synthea_jar_path),
|
| 187 |
"-p", str(population),
|
| 188 |
+
"-o", str(output_parent.absolute()),
|
| 189 |
"--seed", str(int(datetime.now().timestamp())),
|
| 190 |
"--exporter.fhir.transaction_bundle=true",
|
| 191 |
"--exporter.fhir.include_patient_summary=true",
|
|
|
|
| 203 |
logger.info(f"Output directory exists before generation: {self.output_dir.exists()}")
|
| 204 |
|
| 205 |
# Try multiple working directories for better compatibility
|
| 206 |
+
working_dirs = [str(Path.cwd()), str(self.synthea_dir), str(self.output_dir)]
|
| 207 |
|
| 208 |
process = None
|
| 209 |
for working_dir in working_dirs:
|
|
|
|
| 228 |
|
| 229 |
if process.returncode == 0:
|
| 230 |
logger.info("✅ Synthea generation completed successfully")
|
| 231 |
+
# Handle potential encoding issues with subprocess output
|
| 232 |
+
try:
|
| 233 |
+
output_text = stdout.decode('utf-8', errors='ignore')
|
| 234 |
+
logger.info(f"Output: {output_text}")
|
| 235 |
+
except Exception as decode_error:
|
| 236 |
+
logger.warning(f"⚠️ Could not decode stdout: {decode_error}")
|
| 237 |
+
logger.info("✅ Synthea generation completed successfully (output not displayed due to encoding)")
|
| 238 |
|
| 239 |
# Debug: Check what files were actually created
|
| 240 |
logger.info(f"🔍 Checking output directory immediately after generation: {self.output_dir}")
|
|
|
|
| 250 |
for subdir in subdirs:
|
| 251 |
json_files = list(subdir.glob("*.json"))
|
| 252 |
logger.info(f"📁 JSON files in {subdir.name}: {[f.name for f in json_files]}")
|
| 253 |
+
|
| 254 |
+
# Specifically check for fhir subdirectory
|
| 255 |
+
fhir_dir = self.output_dir / "fhir"
|
| 256 |
+
if fhir_dir.exists():
|
| 257 |
+
fhir_files = list(fhir_dir.glob("*.json"))
|
| 258 |
+
logger.info(f"📁 JSON files in fhir subdirectory: {[f.name for f in fhir_files]}")
|
| 259 |
+
else:
|
| 260 |
+
logger.warning(f"⚠️ FHIR subdirectory does not exist: {fhir_dir}")
|
| 261 |
|
| 262 |
# Also check if files were created in the working directory
|
| 263 |
working_dir_files = list(Path.cwd().glob("*.json"))
|
|
|
|
| 267 |
synthea_dir_files = list(self.synthea_dir.glob("*.json"))
|
| 268 |
logger.info(f"📁 JSON files in synthea directory: {[f.name for f in synthea_dir_files]}")
|
| 269 |
|
| 270 |
+
# Check what files were created in the working directory where Synthea actually ran
|
| 271 |
+
for working_dir in working_dirs:
|
| 272 |
+
if Path(working_dir).exists():
|
| 273 |
+
working_dir_files = list(Path(working_dir).glob("*.json"))
|
| 274 |
+
logger.info(f"📁 JSON files in working directory {working_dir}: {[f.name for f in working_dir_files]}")
|
| 275 |
+
|
| 276 |
+
# Also check subdirectories in the working directory
|
| 277 |
+
for subdir in Path(working_dir).iterdir():
|
| 278 |
+
if subdir.is_dir():
|
| 279 |
+
subdir_files = list(subdir.glob("*.json"))
|
| 280 |
+
if subdir_files:
|
| 281 |
+
logger.info(f"📁 JSON files in subdirectory {subdir}: {[f.name for f in subdir_files]}")
|
| 282 |
+
|
| 283 |
else:
|
| 284 |
logger.warning(f"⚠️ Output directory does not exist: {self.output_dir}")
|
| 285 |
|
| 286 |
return True
|
| 287 |
else:
|
| 288 |
+
# Handle potential encoding issues with stderr
|
| 289 |
+
try:
|
| 290 |
+
error_output = stderr.decode('utf-8', errors='ignore')
|
| 291 |
+
except Exception as decode_error:
|
| 292 |
+
error_output = f"Could not decode error output: {decode_error}"
|
| 293 |
logger.error(f"❌ Synthea generation failed with return code {process.returncode}")
|
| 294 |
logger.error(f"Error output: {error_output}")
|
| 295 |
return False
|
|
|
|
| 298 |
logger.error(f"❌ Error running Synthea: {str(e)}")
|
| 299 |
return False
|
| 300 |
|
| 301 |
+
async def process_synthea_output(self, require_medical_data: bool = False) -> List[Dict[str, Any]]:
|
| 302 |
"""
|
| 303 |
Process Synthea output files and convert to application format
|
| 304 |
"""
|
|
|
|
| 310 |
|
| 311 |
# List of directories to search for Synthea output
|
| 312 |
search_dirs = [
|
| 313 |
+
self.output_dir, # The actual fhir directory where files are created
|
| 314 |
+
Path.cwd() / "output" / "fhir", # Explicit path to fhir directory
|
| 315 |
+
Path.cwd() / "output", # Parent output directory
|
| 316 |
+
Path.cwd(), # Current working directory
|
| 317 |
Path('/tmp'),
|
| 318 |
Path('/app'),
|
| 319 |
Path('/app/tmp')
|
|
|
|
| 369 |
return []
|
| 370 |
|
| 371 |
# Process each patient file
|
| 372 |
+
valid_patients = 0
|
| 373 |
+
invalid_patients = 0
|
| 374 |
+
|
| 375 |
for file_path in patient_files:
|
| 376 |
try:
|
| 377 |
logger.info(f"📄 Processing file: {file_path}")
|
|
|
|
| 381 |
|
| 382 |
patient_data = await self._extract_patient_data(bundle, file_path.name)
|
| 383 |
if patient_data:
|
| 384 |
+
# Validate patient data completeness
|
| 385 |
+
if self._validate_patient_data_completeness(patient_data, require_medical_data):
|
| 386 |
+
patients.append(patient_data)
|
| 387 |
+
valid_patients += 1
|
| 388 |
+
logger.info(f"✅ Validated and extracted patient: {patient_data.get('full_name', 'Unknown')}")
|
| 389 |
+
else:
|
| 390 |
+
invalid_patients += 1
|
| 391 |
+
logger.warning(f"❌ Patient validation failed: {patient_data.get('full_name', 'Unknown')}")
|
| 392 |
else:
|
| 393 |
logger.warning(f"⚠️ No patient data extracted from {file_path}")
|
| 394 |
|
|
|
|
| 396 |
logger.error(f"❌ Error processing {file_path}: {str(e)}")
|
| 397 |
continue
|
| 398 |
|
| 399 |
+
logger.info(f"📊 Patient validation summary: {valid_patients} valid, {invalid_patients} invalid")
|
| 400 |
+
|
| 401 |
logger.info(f"✅ Successfully processed {len(patients)} patients from Synthea output")
|
| 402 |
return patients
|
| 403 |
|
|
|
|
| 592 |
except:
|
| 593 |
return []
|
| 594 |
|
| 595 |
+
def _validate_patient_data_completeness(self, patient_data: Dict[str, Any], require_medical_data: bool = True) -> bool:
|
| 596 |
+
"""
|
| 597 |
+
Validate that a patient has all required data fields
|
| 598 |
+
|
| 599 |
+
Args:
|
| 600 |
+
patient_data: The patient data to validate
|
| 601 |
+
require_medical_data: If True, patient must have at least some medical data (default: True for complete data)
|
| 602 |
+
"""
|
| 603 |
+
try:
|
| 604 |
+
# Required basic fields - all must be present and not empty
|
| 605 |
+
required_fields = [
|
| 606 |
+
'full_name', 'gender', 'date_of_birth', 'address',
|
| 607 |
+
'city', 'state', 'postal_code', 'country'
|
| 608 |
+
]
|
| 609 |
+
|
| 610 |
+
# Check if all required fields are present and not empty
|
| 611 |
+
for field in required_fields:
|
| 612 |
+
value = patient_data.get(field)
|
| 613 |
+
if not value or (isinstance(value, str) and not value.strip()):
|
| 614 |
+
logger.warning(f"⚠️ Missing or empty required field '{field}' for patient {patient_data.get('full_name', 'Unknown')}")
|
| 615 |
+
return False
|
| 616 |
+
|
| 617 |
+
# Validate name is not just whitespace
|
| 618 |
+
if not patient_data.get('full_name', '').strip():
|
| 619 |
+
logger.warning(f"⚠️ Empty or invalid name for patient")
|
| 620 |
+
return False
|
| 621 |
+
|
| 622 |
+
# Validate gender is valid
|
| 623 |
+
if patient_data.get('gender') not in ['male', 'female', 'other', 'unknown']:
|
| 624 |
+
logger.warning(f"⚠️ Invalid gender '{patient_data.get('gender')}' for patient {patient_data.get('full_name', 'Unknown')}")
|
| 625 |
+
return False
|
| 626 |
+
|
| 627 |
+
# Validate date of birth format
|
| 628 |
+
try:
|
| 629 |
+
if patient_data.get('date_of_birth'):
|
| 630 |
+
datetime.strptime(patient_data['date_of_birth'], '%Y-%m-%d')
|
| 631 |
+
else:
|
| 632 |
+
logger.warning(f"⚠️ Missing date of birth for patient {patient_data.get('full_name', 'Unknown')}")
|
| 633 |
+
return False
|
| 634 |
+
except ValueError:
|
| 635 |
+
logger.warning(f"⚠️ Invalid date of birth format '{patient_data.get('date_of_birth')}' for patient {patient_data.get('full_name', 'Unknown')}")
|
| 636 |
+
return False
|
| 637 |
+
|
| 638 |
+
# For complete data, we require medical data
|
| 639 |
+
if require_medical_data:
|
| 640 |
+
# Check if patient has at least some medical data
|
| 641 |
+
medical_data_present = (
|
| 642 |
+
len(patient_data.get('conditions', [])) > 0 or
|
| 643 |
+
len(patient_data.get('medications', [])) > 0 or
|
| 644 |
+
len(patient_data.get('encounters', [])) > 0 or
|
| 645 |
+
len(patient_data.get('observations', [])) > 0
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
if not medical_data_present:
|
| 649 |
+
logger.warning(f"❌ Patient {patient_data.get('full_name', 'Unknown')} rejected: no medical data (conditions, medications, encounters, or observations)")
|
| 650 |
+
return False
|
| 651 |
+
else:
|
| 652 |
+
logger.info(f"✅ Patient {patient_data.get('full_name', 'Unknown')} has medical data")
|
| 653 |
+
|
| 654 |
+
logger.info(f"✅ Patient {patient_data.get('full_name', 'Unknown')} passed complete validation")
|
| 655 |
+
return True
|
| 656 |
+
|
| 657 |
+
except Exception as e:
|
| 658 |
+
logger.error(f"❌ Error validating patient data: {str(e)}")
|
| 659 |
+
return False
|
| 660 |
+
|
| 661 |
async def save_patients_to_database(self, patients: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 662 |
"""
|
| 663 |
Save generated patients directly to the database
|
|
|
|
| 684 |
'marital_status': patient.get('marital_status', ''),
|
| 685 |
'language': patient.get('language', 'English'),
|
| 686 |
'source': patient.get('source', 'synthea'),
|
| 687 |
+
'status': 'active',
|
| 688 |
+
'created_at': datetime.utcnow(),
|
| 689 |
+
'updated_at': datetime.utcnow(),
|
| 690 |
'import_date': datetime.utcnow(),
|
| 691 |
'last_updated': datetime.utcnow(),
|
| 692 |
'conditions': patient.get('conditions', []),
|
|
|
|
| 733 |
age_min: int = 18,
|
| 734 |
age_max: int = 80,
|
| 735 |
gender: str = "both",
|
| 736 |
+
location: str = "Massachusetts",
|
| 737 |
+
require_medical_data: bool = True
|
| 738 |
) -> Dict[str, Any]:
|
| 739 |
"""
|
| 740 |
Complete workflow: generate Synthea data and prepare for import
|
|
|
|
| 742 |
try:
|
| 743 |
logger.info(f"🎯 Starting Synthea generation for {population} patients")
|
| 744 |
|
| 745 |
+
# Always use real Synthea - no fallback to mock data
|
| 746 |
try:
|
| 747 |
# Download Synthea if needed
|
| 748 |
if not await self.download_synthea():
|
|
|
|
| 765 |
logger.error("❌ Synthea generation failed")
|
| 766 |
raise Exception("Synthea generation failed")
|
| 767 |
|
| 768 |
+
# Process output - only get patients with complete data
|
| 769 |
+
patients = await self.process_synthea_output(require_medical_data=True)
|
| 770 |
|
| 771 |
if not patients:
|
| 772 |
+
logger.error("❌ No patients with complete data generated from Synthea")
|
| 773 |
+
raise Exception("No patients with complete data generated from Synthea")
|
| 774 |
+
|
| 775 |
+
# Limit to exactly 10 patients with complete data
|
| 776 |
+
if len(patients) > 10:
|
| 777 |
+
logger.info(f"📊 Limiting from {len(patients)} to 10 patients with complete data")
|
| 778 |
+
patients = patients[:10]
|
| 779 |
+
|
| 780 |
+
logger.info(f"📊 Final patient count for database storage: {len(patients)}")
|
| 781 |
|
| 782 |
# Save patients to database
|
| 783 |
db_result = await self.save_patients_to_database(patients)
|
|
|
|
| 792 |
"patients": patients,
|
| 793 |
"config": config_overrides,
|
| 794 |
"output_directory": str(self.output_dir),
|
| 795 |
+
"source": "synthea_real",
|
| 796 |
+
"message": f"Successfully stored {db_result['saved_count']} patients with complete data to database"
|
| 797 |
}
|
| 798 |
|
| 799 |
except Exception as e:
|
| 800 |
logger.error(f"❌ Synthea integration failed: {str(e)}")
|
| 801 |
+
# No fallback to mock data - raise the error
|
| 802 |
+
raise Exception(f"Synthea generation failed: {str(e)}")
|
|
|
|
|
|
|
| 803 |
|
| 804 |
except Exception as e:
|
| 805 |
logger.error(f"❌ Error in generate_and_import_patients: {str(e)}")
|
|
|
|
| 808 |
detail=f"Patient generation failed: {str(e)}"
|
| 809 |
)
|
| 810 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
async def get_synthea_statistics(self) -> Dict[str, Any]:
|
| 812 |
"""
|
| 813 |
Get statistics about Synthea capabilities and generated data
|
|
|
|
| 822 |
"synthea_available": False,
|
| 823 |
"java_available": False,
|
| 824 |
"directories_accessible": False,
|
| 825 |
+
"environment": "local"
|
|
|
|
| 826 |
}
|
| 827 |
|
| 828 |
# Set environment info
|
|
|
|
| 831 |
else:
|
| 832 |
stats["environment"] = "local"
|
| 833 |
|
| 834 |
+
# Always use real Synthea - no mock data fallback
|
|
|
|
| 835 |
|
| 836 |
# Check if directories are accessible
|
| 837 |
try:
|
|
|
|
| 904 |
"synthea_available": False,
|
| 905 |
"java_available": False,
|
| 906 |
"directories_accessible": False,
|
| 907 |
+
"environment": "unknown"
|
|
|
|
| 908 |
}
|
data/new_tool.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
[]
|
|
|
|
|
|