| """ |
| 评估指标模块 |
| Metrics for PAD Predictor Evaluation |
| |
| 该模块包含了PAD预测器的各种评估指标,包括: |
| - 回归指标:MAE、RMSE、R² |
| - 置信度评估指标:ECE(Expected Calibration Error) |
| - 可靠性图表功能 |
| """ |
|
|
| import torch |
| import torch.nn.functional as F |
| import numpy as np |
| from typing import Dict, List, Tuple, Optional, Any |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score |
| import logging |
|
|
|
|
| class RegressionMetrics: |
| """回归评估指标类""" |
| |
| def __init__(self): |
| self.logger = logging.getLogger(__name__) |
| |
| @staticmethod |
| def mae(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor: |
| """ |
| 平均绝对误差 (Mean Absolute Error) |
| |
| Args: |
| y_true: 真实值 |
| y_pred: 预测值 |
| reduction: 聚合方式 ('mean', 'sum', 'none') |
| |
| Returns: |
| MAE值 |
| """ |
| mae = torch.mean(torch.abs(y_pred - y_true), dim=0) |
| |
| if reduction == 'mean': |
| return torch.mean(mae) |
| elif reduction == 'sum': |
| return torch.sum(mae) |
| else: |
| return mae |
| |
| @staticmethod |
| def rmse(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor: |
| """ |
| 均方根误差 (Root Mean Square Error) |
| |
| Args: |
| y_true: 真实值 |
| y_pred: 预测值 |
| reduction: 聚合方式 ('mean', 'sum', 'none') |
| |
| Returns: |
| RMSE值 |
| """ |
| mse = torch.mean((y_pred - y_true) ** 2, dim=0) |
| rmse = torch.sqrt(mse) |
| |
| if reduction == 'mean': |
| return torch.mean(rmse) |
| elif reduction == 'sum': |
| return torch.sum(rmse) |
| else: |
| return rmse |
| |
| @staticmethod |
| def r2_score(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor: |
| """ |
| R²决定系数 (Coefficient of Determination) |
| |
| Args: |
| y_true: 真实值 |
| y_pred: 预测值 |
| reduction: 聚合方式 ('mean', 'sum', 'none') |
| |
| Returns: |
| R²值 |
| """ |
| |
| ss_tot = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2, dim=0) |
|
|
| |
| ss_res = torch.sum((y_true - y_pred) ** 2, dim=0) |
|
|
| |
| r2 = 1 - (ss_res / (ss_tot + 1e-8)) |
|
|
| if reduction == 'mean': |
| return torch.mean(r2) |
| elif reduction == 'sum': |
| return torch.sum(r2) |
| else: |
| return r2 |
|
|
| @staticmethod |
| def robust_r2(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor: |
| """ |
| 稳健R²决定系数(Robust R² for Multi-Output Regression) |
| |
| 先对所有维度求和SS_res和SS_tot,然后计算一个总的R²。 |
| 这种方法更适合多目标回归,因为它考虑了所有目标的总方差。 |
| |
| 公式:R²_robust = 1 - Σ(SS_res_all) / Σ(SS_tot_all) |
| |
| Args: |
| y_true: 真实值,形状为 (batch_size, output_dim) |
| y_pred: 预测值,形状为 (batch_size, output_dim) |
| |
| Returns: |
| 稳健R²值(标量) |
| """ |
| |
| ss_res_total = torch.sum((y_true - y_pred) ** 2) |
|
|
| |
| ss_tot_total = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2) |
|
|
| |
| r2_robust = 1 - (ss_res_total / (ss_tot_total + 1e-8)) |
|
|
| return r2_robust |
| |
| @staticmethod |
| def mape(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor: |
| """ |
| 平均绝对百分比误差 (Mean Absolute Percentage Error) |
| |
| Args: |
| y_true: 真实值 |
| y_pred: 预测值 |
| reduction: 聚合方式 |
| |
| Returns: |
| MAPE值 |
| """ |
| |
| mape = torch.mean(torch.abs((y_pred - y_true) / (y_true + 1e-8)), dim=0) |
| |
| if reduction == 'mean': |
| return torch.mean(mape) |
| elif reduction == 'sum': |
| return torch.sum(mape) |
| else: |
| return mape |
| |
| def compute_all_metrics(self, |
| y_true: torch.Tensor, |
| y_pred: torch.Tensor, |
| component_names: List[str] = None) -> Dict[str, Dict[str, float]]: |
| """ |
| 计算所有回归指标 |
| |
| Args: |
| y_true: 真实值,形状为 (batch_size, output_dim) |
| y_pred: 预测值,形状为 (batch_size, output_dim) |
| component_names: 组件名称列表 |
| |
| Returns: |
| 包含所有指标的嵌套字典 |
| """ |
| if component_names is None: |
| component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d'] |
|
|
| metrics = {} |
|
|
| |
| metrics['overall'] = { |
| 'mae': self.mae(y_true, y_pred).item(), |
| 'rmse': self.rmse(y_true, y_pred).item(), |
| 'r2': self.r2_score(y_true, y_pred).item(), |
| 'r2_robust': self.robust_r2(y_true, y_pred).item(), |
| 'mape': self.mape(y_true, y_pred).item() |
| } |
|
|
| |
| component_metrics = {} |
| for i, name in enumerate(component_names): |
| if i < y_true.size(1): |
| component_metrics[name] = { |
| 'mae': self.mae(y_true[:, i], y_pred[:, i]).item(), |
| 'rmse': self.rmse(y_true[:, i], y_pred[:, i]).item(), |
| 'r2': self.r2_score(y_true[:, i], y_pred[:, i]).item(), |
| 'mape': self.mape(y_true[:, i], y_pred[:, i]).item() |
| } |
|
|
| metrics['components'] = component_metrics |
|
|
| return metrics |
|
|
| def print_diagnostic_metrics(self, |
| y_true: torch.Tensor, |
| y_pred: torch.Tensor, |
| component_names: List[str] = None) -> None: |
| """ |
| 打印诊断模式下的详细指标(每个维度的独立得分) |
| |
| Args: |
| y_true: 真实值,形状为 (batch_size, output_dim) |
| y_pred: 预测值,形状为 (batch_size, output_dim) |
| component_names: 组件名称列表 |
| """ |
| if component_names is None: |
| component_names = ['ΔPAD_P', 'ΔPAD_A', 'ΔPAD_D'] |
|
|
| print("\n" + "="*80) |
| print("🔍 诊断模式:各维度独立指标") |
| print("="*80) |
|
|
| |
| r2_robust = self.robust_r2(y_true, y_pred).item() |
| r2_mean = self.r2_score(y_true, y_pred).item() |
|
|
| print(f"\n📊 整体指标:") |
| print(f" 稳健 R² (Robust R²): {r2_robust:.6f} ← 所有维度总方差比") |
| print(f" 平均 R² (Mean R²) : {r2_mean:.6f} ← 各维度R²的算术平均") |
| print(f" 差异 : {r2_robust - r2_mean:+.6f}") |
|
|
| print(f"\n📐 各维度详细指标:") |
| print(f"{'维度':<15} {'R²':<12} {'MAE':<12} {'RMSE':<12} {'MAPE':<12}") |
| print("-" * 80) |
|
|
| for i, name in enumerate(component_names): |
| if i < y_true.size(1): |
| mae = self.mae(y_true[:, i], y_pred[:, i]).item() |
| rmse = self.rmse(y_true[:, i], y_pred[:, i]).item() |
| r2 = self.r2_score(y_true[:, i], y_pred[:, i]).item() |
| mape = self.mape(y_true[:, i], y_pred[:, i]).item() |
|
|
| |
| r2_str = f"{r2:.6f}" |
| if r2 >= 0.8: |
| r2_str = f"✅ {r2_str}" |
| elif r2 >= 0.5: |
| r2_str = f"⚠️ {r2_str}" |
| else: |
| r2_str = f"❌ {r2_str}" |
|
|
| print(f"{name:<15} {r2_str:<12} {mae:<12.6f} {rmse:<12.6f} {mape:<12.6f}") |
|
|
| print("="*80 + "\n") |
|
|
|
|
| class CalibrationMetrics: |
| """置信度校准评估指标类""" |
| |
| def __init__(self, n_bins: int = 10): |
| """ |
| 初始化校准指标 |
| |
| Args: |
| n_bins: 分箱数量 |
| """ |
| self.n_bins = n_bins |
| self.logger = logging.getLogger(__name__) |
| |
| def expected_calibration_error(self, |
| predictions: torch.Tensor, |
| targets: torch.Tensor, |
| confidences: torch.Tensor) -> Tuple[float, List[Tuple]]: |
| """ |
| 计算期望校准误差 (Expected Calibration Error) |
| |
| Args: |
| predictions: 预测值,形状为 (batch_size, 4) |
| targets: 真实值,形状为 (batch_size, 4) |
| confidences: 置信度,形状为 (batch_size, 1) |
| |
| Returns: |
| ECE值和分箱信息 |
| """ |
| |
| errors = torch.mean((predictions - targets) ** 2, dim=1, keepdim=True) |
| |
| |
| confidences_norm = torch.sigmoid(confidences) |
| |
| |
| bin_boundaries = torch.linspace(0, 1, self.n_bins + 1) |
| bin_lowers = bin_boundaries[:-1] |
| bin_uppers = bin_boundaries[1:] |
| |
| ece = torch.tensor(0.0, device=confidences_norm.device) |
| bin_info = [] |
|
|
| for bin_lower, bin_upper in zip(bin_lowers, bin_uppers): |
| |
| in_bin = (confidences_norm > bin_lower) & (confidences_norm <= bin_upper) |
| prop_in_bin = in_bin.float().mean() |
|
|
| if prop_in_bin > 0: |
| |
| avg_confidence_in_bin = confidences_norm[in_bin].mean() |
| avg_error_in_bin = errors[in_bin].mean() |
|
|
| |
| ece += torch.abs(avg_confidence_in_bin - avg_error_in_bin) * prop_in_bin |
| |
| bin_info.append({ |
| 'bin_lower': bin_lower.item(), |
| 'bin_upper': bin_upper.item(), |
| 'count': in_bin.sum().item(), |
| 'avg_confidence': avg_confidence_in_bin.item(), |
| 'avg_error': avg_error_in_bin.item(), |
| 'accuracy': (1 - avg_error_in_bin).item() |
| }) |
| |
| return ece.item(), bin_info |
| |
| def reliability_diagram(self, |
| predictions: torch.Tensor, |
| targets: torch.Tensor, |
| confidences: torch.Tensor, |
| save_path: Optional[str] = None) -> None: |
| """ |
| 绘制可靠性图表 |
| |
| Args: |
| predictions: 预测值 |
| targets: 真实值 |
| confidences: 置信度 |
| save_path: 保存路径 |
| """ |
| ece, bin_info = self.expected_calibration_error(predictions, targets, confidences) |
| |
| |
| bin_lowers = [info['bin_lower'] for info in bin_info] |
| bin_uppers = [info['bin_upper'] for info in bin_info] |
| avg_confidences = [info['avg_confidence'] for info in bin_info] |
| accuracies = [info['accuracy'] for info in bin_info] |
| counts = [info['count'] for info in bin_info] |
| |
| |
| bin_centers = [(lower + upper) / 2 for lower, upper in zip(bin_lowers, bin_uppers)] |
| |
| |
| plt.figure(figsize=(10, 6)) |
| |
| |
| plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration') |
| plt.plot(bin_centers, accuracies, 'bo-', label='Model', linewidth=2, markersize=8) |
| |
| |
| ax2 = plt.gca().twinx() |
| ax2.bar(bin_centers, counts, width=0.1, alpha=0.3, color='gray', label='Sample Count') |
| ax2.set_ylabel('Sample Count', fontsize=12) |
| ax2.set_ylim(0, max(counts) * 1.2 if counts else 1) |
| |
| |
| plt.xlabel('Confidence', fontsize=12) |
| plt.ylabel('Accuracy', fontsize=12) |
| plt.title(f'Reliability Diagram (ECE = {ece:.4f})', fontsize=14) |
| plt.legend(loc='upper left') |
| plt.grid(True, alpha=0.3) |
| plt.xlim(0, 1) |
| plt.ylim(0, 1) |
| |
| |
| if save_path: |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') |
| self.logger.info(f"可靠性图表已保存到: {save_path}") |
| |
| plt.show() |
| |
| def sharpness(self, confidences: torch.Tensor) -> float: |
| """ |
| 计算置信度的锐度 (Sharpness) |
| |
| Args: |
| confidences: 置信度 |
| |
| Returns: |
| 锐度值(置信度的标准差) |
| """ |
| confidences_norm = torch.sigmoid(confidences) |
| return torch.std(confidences_norm).item() |
|
|
|
|
| class PADMetrics: |
| """PAD特定的评估指标类""" |
| |
| def __init__(self): |
| self.regression_metrics = RegressionMetrics() |
| self.calibration_metrics = CalibrationMetrics() |
| self.logger = logging.getLogger(__name__) |
| |
| def evaluate_predictions(self, |
| predictions: torch.Tensor, |
| targets: torch.Tensor, |
| component_names: List[str] = None) -> Dict[str, Any]: |
| """ |
| 全面评估预测结果 |
| |
| Args: |
| predictions: 预测值,形状为 (batch_size, 4) 或 (4,) |
| targets: 真实值,形状为 (batch_size, 4) 或 (4,) |
| component_names: 组件名称列表 |
| |
| Returns: |
| 包含所有评估指标的字典 |
| """ |
| if component_names is None: |
| component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d'] |
|
|
| |
| if predictions.dim() == 1: |
| predictions = predictions.unsqueeze(0) |
| if targets.dim() == 1: |
| targets = targets.unsqueeze(0) |
|
|
| results = {} |
|
|
| |
| regression_results = self.regression_metrics.compute_all_metrics( |
| predictions, targets, component_names |
| ) |
| results['regression'] = regression_results |
|
|
| |
| results['r2_robust'] = regression_results['overall']['r2_robust'] |
| results['r2_mean'] = regression_results['overall']['r2'] |
|
|
| |
| |
| delta_pad_pred = predictions[:, :3] |
| delta_pad_true = targets[:, :3] |
|
|
| |
| cos_sim = F.cosine_similarity(delta_pad_pred, delta_pad_true, dim=1) |
| angle_error = torch.acos(torch.clamp(cos_sim, -1 + 1e-8, 1 - 1e-8)) * 180 / np.pi |
|
|
| results['pad_specific'] = { |
| 'cosine_similarity_mean': cos_sim.mean().item(), |
| 'cosine_similarity_std': cos_sim.std().item(), |
| 'angle_error_mean': angle_error.mean().item(), |
| 'angle_error_std': angle_error.std().item() |
| } |
|
|
| return results |
|
|
| def evaluate_predictions_diagnostic(self, |
| predictions: torch.Tensor, |
| targets: torch.Tensor, |
| component_names: List[str] = None) -> Dict[str, Any]: |
| """ |
| 诊断模式评估:打印详细指标并返回结果 |
| |
| Args: |
| predictions: 预测值 |
| targets: 真实值 |
| component_names: 组件名称列表 |
| |
| Returns: |
| 包含所有评估指标的字典 |
| """ |
| |
| self.regression_metrics.print_diagnostic_metrics(predictions, targets, component_names) |
|
|
| |
| return self.evaluate_predictions(predictions, targets, component_names) |
|
|
| def generate_evaluation_report(self, |
| predictions: torch.Tensor, |
| targets: torch.Tensor, |
| save_path: Optional[str] = None) -> str: |
| """ |
| 生成评估报告 |
| |
| Args: |
| predictions: 预测值 |
| targets: 真实值 |
| save_path: 报告保存路径 |
| |
| Returns: |
| 评估报告文本 |
| """ |
| results = self.evaluate_predictions(predictions, targets) |
| |
| |
| report = [] |
| report.append("=" * 60) |
| report.append("PAD预测器评估报告") |
| report.append("=" * 60) |
| |
| |
| report.append("\n1. 整体回归指标:") |
| overall = results['regression']['overall'] |
| report.append(f" MAE: {overall['mae']:.6f}") |
| report.append(f" RMSE: {overall['rmse']:.6f}") |
| report.append(f" R² (平均): {overall['r2']:.6f}") |
| report.append(f" R² (稳健): {overall['r2_robust']:.6f} ← 所有维度总方差比") |
| report.append(f" MAPE: {overall['mape']:.6f}") |
| |
| |
| report.append("\n2. 各组件回归指标:") |
| components = results['regression']['components'] |
| for name, metrics in components.items(): |
| report.append(f" {name}:") |
| report.append(f" MAE: {metrics['mae']:.6f}") |
| report.append(f" RMSE: {metrics['rmse']:.6f}") |
| report.append(f" R²: {metrics['r2']:.6f}") |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| report.append("\n3. PAD特定指标:") |
| pad_specific = results['pad_specific'] |
| report.append(f" 余弦相似度 (均值±标准差): {pad_specific['cosine_similarity_mean']:.4f} ± {pad_specific['cosine_similarity_std']:.4f}") |
| report.append(f" 角度误差 (均值±标准差): {pad_specific['angle_error_mean']:.2f}° ± {pad_specific['angle_error_std']:.2f}°") |
| |
| report.append("\n" + "=" * 60) |
| |
| report_text = "\n".join(report) |
| |
| |
| if save_path: |
| with open(save_path, 'w', encoding='utf-8') as f: |
| f.write(report_text) |
| self.logger.info(f"评估报告已保存到: {save_path}") |
| |
| return report_text |
|
|
|
|
| def create_metrics(metric_type: str = 'pad', **kwargs) -> Any: |
| """ |
| 创建评估指标的工厂函数 |
| |
| Args: |
| metric_type: 指标类型 ('regression', 'calibration', 'pad') |
| **kwargs: 指标参数 |
| |
| Returns: |
| 指标实例 |
| """ |
| if metric_type == 'regression': |
| return RegressionMetrics() |
| elif metric_type == 'calibration': |
| return CalibrationMetrics(**kwargs) |
| elif metric_type == 'pad': |
| return PADMetrics() |
| else: |
| raise ValueError(f"不支持的指标类型: {metric_type}") |
|
|
|
|
| if __name__ == "__main__": |
| |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| |
| |
| batch_size = 100 |
| predictions = torch.randn(batch_size, 5).to(device) |
| targets = torch.randn(batch_size, 5).to(device) |
| |
| print("测试评估指标:") |
| print(f"输入形状: {predictions.shape}") |
| |
| |
| regression_metrics = RegressionMetrics() |
| regression_results = regression_metrics.compute_all_metrics(predictions, targets) |
| |
| print(f"\n整体回归指标:") |
| for key, value in regression_results['overall'].items(): |
| print(f" {key}: {value:.6f}") |
| |
| |
| calibration_metrics = CalibrationMetrics(n_bins=10) |
| pred_components = predictions[:, :4] |
| target_components = targets[:, :4] |
| pred_confidence = predictions[:, 4:5] |
| |
| ece, bin_info = calibration_metrics.expected_calibration_error( |
| pred_components, target_components, pred_confidence |
| ) |
| print(f"\nECE: {ece:.6f}") |
| |
| |
| pad_metrics = PADMetrics() |
| full_results = pad_metrics.evaluate_predictions(predictions, targets) |
| |
| print(f"\n校准指标:") |
| calibration = full_results['calibration'] |
| print(f" ECE: {calibration['ece']:.6f}") |
| print(f" Sharpness: {calibration['sharpness']:.6f}") |
| |
| print(f"\nPAD特定指标:") |
| pad_specific = full_results['pad_specific'] |
| for key, value in pad_specific.items(): |
| print(f" {key}: {value:.6f}") |
| |
| |
| report = pad_metrics.generate_evaluation_report(predictions, targets) |
| print(f"\n评估报告:") |
| print(report) |
| |
| print("\n评估指标测试完成!") |