Akshay4506's picture
Fix deployment entry point and merge requirements
c4ff02d
"""
Evaluation Metrics
==================
Comprehensive metrics for classification and regression tasks.
Author: UW MSIM Team
Date: November 2025
"""
import numpy as np
from sklearn.metrics import (
roc_auc_score, accuracy_score, f1_score, precision_score, recall_score,
r2_score, mean_squared_error, mean_absolute_error, log_loss
)
from typing import Dict, Optional
import logging
logger = logging.getLogger(__name__)
def calculate_classification_metrics(
y_true: np.ndarray,
y_pred: np.ndarray,
y_proba: Optional[np.ndarray] = None
) -> Dict[str, float]:
"""
Calculate all classification metrics.
Parameters
----------
y_true : np.ndarray
True labels
y_pred : np.ndarray
Predicted labels
y_proba : np.ndarray, optional
Predicted probabilities (n_samples, n_classes)
Returns
-------
metrics : dict
Dictionary of metric names and values
"""
metrics = {
'accuracy': accuracy_score(y_true, y_pred),
'f1_macro': f1_score(y_true, y_pred, average='macro', zero_division=0),
'f1_weighted': f1_score(y_true, y_pred, average='weighted', zero_division=0),
'precision_macro': precision_score(y_true, y_pred, average='macro', zero_division=0),
'recall_macro': recall_score(y_true, y_pred, average='macro', zero_division=0)
}
# ROC-AUC (if probabilities available)
if y_proba is not None:
try:
n_classes = len(np.unique(y_true))
if n_classes == 2:
# Binary classification
metrics['roc_auc'] = roc_auc_score(y_true, y_proba[:, 1])
else:
# Multi-class classification
metrics['roc_auc'] = roc_auc_score(
y_true, y_proba,
multi_class='ovr',
average='macro'
)
# Log loss
metrics['log_loss'] = log_loss(y_true, y_proba)
except Exception as e:
logger.warning(f"ROC-AUC calculation failed: {e}")
metrics['roc_auc'] = np.nan
metrics['log_loss'] = np.nan
return metrics
def calculate_regression_metrics(
y_true: np.ndarray,
y_pred: np.ndarray
) -> Dict[str, float]:
"""
Calculate all regression metrics.
Parameters
----------
y_true : np.ndarray
True values
y_pred : np.ndarray
Predicted values
Returns
-------
metrics : dict
Dictionary of metric names and values
"""
metrics = {
'r2': r2_score(y_true, y_pred),
'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
'mae': mean_absolute_error(y_true, y_pred),
'mse': mean_squared_error(y_true, y_pred)
}
# MAPE (avoid division by zero)
try:
non_zero_mask = y_true != 0
if np.any(non_zero_mask):
mape = np.mean(np.abs((y_true[non_zero_mask] - y_pred[non_zero_mask]) / y_true[non_zero_mask])) * 100
metrics['mape'] = mape
else:
metrics['mape'] = np.nan
except:
metrics['mape'] = np.nan
return metrics