Medica_DecisionSupportAI / auto_metrics.py
Rajan Sharma
Update auto_metrics.py
044dc7d verified
raw
history blame
4.03 kB
# auto_metrics.py
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Tuple
def build_data_findings_markdown(data_registry, mapping) -> Tuple[str, List[str]]:
"""Build markdown summary of data findings with healthcare-specific metrics."""
findings = []
missing_keys = []
# Facility distribution findings
if "facility_distribution" in mapping.resolved:
facility_file = mapping.resolved["facility_distribution"]
df = data_registry.get(facility_file)
if df is not None:
findings.append("### Facility Distribution Findings")
# Total facilities
total_facilities = len(df)
findings.append(f"- Total healthcare facilities: {total_facilities}")
# Facility type breakdown
if 'facility_type' in df.columns:
type_counts = df['facility_type'].value_counts()
findings.append("- Facility type distribution:")
for ftype, count in type_counts.items():
findings.append(f" - {ftype}: {count}")
# Geographic distribution
if 'city' in df.columns:
top_cities = df['city'].value_counts().head(5)
findings.append("- Top 5 cities by facility count:")
for city, count in top_cities.items():
findings.append(f" - {city}: {count}")
else:
missing_keys.append("facility_distribution")
# Bed capacity findings
if "bed_capacity" in mapping.resolved:
bed_file = mapping.resolved["bed_capacity"]
df = data_registry.get(bed_file)
if df is not None:
findings.append("### Bed Capacity Findings")
# Total beds
if 'beds_current' in df.columns:
total_current = df['beds_current'].sum()
total_prev = df['beds_prev'].sum()
total_change = total_current - total_prev
total_pct = (total_change / total_prev) * 100 if total_prev > 0 else 0
findings.append(f"- Total staffed beds (current): {total_current}")
findings.append(f"- Total staffed beds (previous): {total_prev}")
findings.append(f"- Overall change: {total_change} ({total_pct:.1f}%)")
# Zone-level analysis
if 'zone' in df.columns:
zone_summary = df.groupby('zone').agg({
'beds_current': 'sum',
'beds_prev': 'sum'
}).reset_index()
zone_summary['change'] = zone_summary['beds_current'] - zone_summary['beds_prev']
zone_summary['percent_change'] = (zone_summary['change'] / zone_summary['beds_prev']) * 100
findings.append("- Zone-level bed capacity:")
for _, row in zone_summary.iterrows():
findings.append(f" - {row['zone']}: {row['beds_current']} beds ({row['percent_change']:.1f}% change)")
# Identify worst-performing zone
worst_zone = zone_summary.loc[zone_summary['percent_change'].idxmin()]
findings.append(f"- Largest percentage decrease: {worst_zone['zone']} ({worst_zone['percent_change']:.1f}%)")
else:
missing_keys.append("bed_capacity")
# Long-term care findings
if "long_term_care" in mapping.resolved:
findings.append("### Long-Term Care Findings")
findings.append("- Long-term care capacity analysis requires facility distribution data")
else:
missing_keys.append("long_term_care")
return "\n".join(findings), missing_keys
else:
md = "### Healthcare Data Analysis Results\n\nNo analyzable healthcare patterns found in the provided data. Consider uploading data with healthcare facility, service, or outcome metrics."
return md, missing