""" Run this to test the explainability module independently """ import sys import os import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split import joblib from pathlib import Path # Add current directory to path for imports sys.path.append('.') def create_test_model_and_data(): """Create a simple test model and data for SHAP testing""" print("Creating test model and data...") # Generate synthetic health data np.random.seed(42) n_samples = 1000 # Feature columns (simplified version of Alertra features) feature_columns = [ 'heart_rate', 'systolic_bp', 'diastolic_bp', 'sleep_hours', 'steps', 'spo2', 'weight_lbs', 'medication_taken', 'pain_level', 'pulse_pressure', 'activity_sleep_ratio', 'cardiovascular_risk', 'respiratory_risk' ] # Generate random data data = { 'heart_rate': np.random.normal(75, 10, n_samples), 'systolic_bp': np.random.normal(130, 15, n_samples), 'diastolic_bp': np.random.normal(80, 10, n_samples), 'sleep_hours': np.random.normal(7, 1.5, n_samples), 'steps': np.random.normal(6000, 2000, n_samples), 'spo2': np.random.normal(97, 2, n_samples), 'weight_lbs': np.random.normal(175, 25, n_samples), 'medication_taken': np.random.choice([0, 1], n_samples, p=[0.2, 0.8]), 'pain_level': np.random.randint(0, 6, n_samples) } # Add derived features data['pulse_pressure'] = data['systolic_bp'] - data['diastolic_bp'] data['activity_sleep_ratio'] = data['steps'] / (data['sleep_hours'] + 0.1) data['cardiovascular_risk'] = ( (data['heart_rate'] > 90).astype(int) + (data['systolic_bp'] > 140).astype(int) + (data['diastolic_bp'] > 90).astype(int) ) data['respiratory_risk'] = ( (data['spo2'] < 95).astype(int) * 2 + (data['spo2'] < 90).astype(int) * 3 ) # Create DataFrame df = pd.DataFrame(data) # Create target variable (alert conditions) df['target'] = ( (df['spo2'] < 88) | # Critical oxygen (df['heart_rate'] > 120) | # High heart rate (df['systolic_bp'] > 180) | # High BP (df['pain_level'] > 7) | # Severe pain ((df['cardiovascular_risk'] >= 2) & (df['medication_taken'] == 0)) # Multiple risks + no meds ).astype(int) # Prepare features and target X = df[feature_columns] y = df['target'] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model print("Training Random Forest model...") model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced') model.fit(X_train, y_train) # Create scaler scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) print(f"Model accuracy: {model.score(X_test, y_test):.3f}") print(f"Alert rate: {y.mean():.3f}") return model, scaler, feature_columns, X_test.iloc[0:5] def test_shap_explainer(): """Test the fixed SHAP explainer""" print("\n" + "="*60) print("TESTING SHAP EXPLAINER") print("="*60) try: # Import our fixed explainer from explainability import AlertraExplainer print("✓ Successfully imported AlertraExplainer") # Create test model and data model, scaler, feature_columns, test_samples = create_test_model_and_data() print("✓ Created test model and data") # Initialize explainer explainer = AlertraExplainer(model, feature_columns, model_type="random_forest") print("✓ Initialized SHAP explainer") # Test single prediction explanation print("\nTesting single prediction explanation...") test_health_data = test_samples.iloc[0].to_dict() explanation = explainer.explain_prediction(test_health_data, return_plot_data=True) print("✓ Generated SHAP explanation") # Print explanation details print(f"\nExplanation: {explanation['prediction_explanation']}") print(f"Expected value: {explanation['expected_value']:.3f}") print(f"Top factors:") for factor in explanation['top_factors'][:3]: print(f" {factor['rank']}. {factor['feature']}: {factor['impact']} (magnitude: {factor['magnitude']:.3f})") # Test waterfall plot (the main bug we're fixing) print("\nTesting waterfall plot generation...") try: plot_path = explainer.create_waterfall_plot(explanation, "test_waterfall.png") if plot_path: print("✓ Waterfall plot created successfully") else: print("✓ Waterfall plot displayed (no save path)") except Exception as e: print(f"⚠ Waterfall plot failed, but manual fallback should work: {e}") # Test feature importance plot print("\nTesting feature importance plot...") importance_path = explainer.create_feature_importance_plot(explanation, "test_importance.png") if importance_path: print("✓ Feature importance plot created successfully") # Test global feature importance print("\nTesting global feature importance...") global_importance = explainer.get_global_feature_importance() print("✓ Global feature importance calculated") print("Top 3 globally important features:") for item in global_importance['feature_importance'][:3]: print(f" {item['rank']}. {item['feature']}: {item['importance']:.3f}") # Test batch explanation print("\nTesting batch explanations...") batch_data = [test_samples.iloc[i].to_dict() for i in range(3)] batch_explanations = explainer.batch_explain(batch_data) print(f"✓ Generated {len(batch_explanations)} batch explanations") print("\n" + "="*60) print("ALL TESTS PASSED! SHAP explainer is working correctly.") print("="*60) return True except Exception as e: print(f"\n TEST FAILED: {e}") import traceback traceback.print_exc() return False def test_api_integration(): """Test if the explainer works with the main API""" print("\n" + "="*60) print("TESTING API INTEGRATION") print("="*60) try: # Create models directory and save test files models_dir = Path("models") models_dir.mkdir(exist_ok=True) model, scaler, feature_columns, _ = create_test_model_and_data() # Save model files joblib.dump(model, models_dir / "alertra_enhanced_model.pkl") joblib.dump(scaler, models_dir / "alertra_enhanced_scaler.pkl") joblib.dump(feature_columns, models_dir / "alertra_feature_columns.pkl") print("✓ Saved test model files") # Test loading in main.py style loaded_model = joblib.load(models_dir / "alertra_enhanced_model.pkl") loaded_scaler = joblib.load(models_dir / "alertra_enhanced_scaler.pkl") loaded_features = joblib.load(models_dir / "alertra_feature_columns.pkl") from explainability import AlertraExplainer api_explainer = AlertraExplainer(loaded_model, loaded_features) print("✓ Successfully loaded model files and created explainer (API style)") # Test explanation test_data = { 'heart_rate': 85, 'systolic_bp': 140, 'diastolic_bp': 90, 'sleep_hours': 6, 'steps': 5000, 'spo2': 95, 'weight_lbs': 180, 'medication_taken': 1, 'pain_level': 3, 'pulse_pressure': 50, 'activity_sleep_ratio': 833.3, 'cardiovascular_risk': 1, 'respiratory_risk': 2 } explanation = api_explainer.explain_prediction(test_data) print("✓ API-style explanation generated successfully") print("\n" + "="*60) print("API INTEGRATION TEST PASSED!") print("The explainer is ready for use in main.py") print("="*60) return True except Exception as e: print(f"\n API INTEGRATION TEST FAILED: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": print("ALERTRA SHAP EXPLAINABILITY TEST SUITE") print("This script tests the fixed SHAP implementation") print("Make sure you have installed: pip install shap matplotlib") # Run tests test1_passed = test_shap_explainer() test2_passed = test_api_integration() print(f"\n" + "="*60) print("FINAL TEST RESULTS:") print(f"SHAP Explainer Test: {'✓ PASSED' if test1_passed else 'FAILED'}") print(f"API Integration Test: {'✓ PASSED' if test2_passed else 'FAILED'}") if test1_passed and test2_passed: print("\n ALL TESTS PASSED! The SHAP bug has been fixed.") print("You can now run the main API with working explainability.") print("\nNext steps:") print("1. Copy your actual model files to the models/ directory") print("2. Run: python main.py") print("3. Test explainability endpoints at /explain-prediction") else: print("\n Some tests failed. Check the error messages above.") print("You may need to install missing dependencies:") print("pip install shap matplotlib scikit-learn") print("="*60)