File size: 6,684 Bytes
fdc1efd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import numpy as np
from sklearn.metrics import (
accuracy_score,
average_precision_score,
f1_score,
hamming_loss,
precision_recall_curve,
zero_one_loss,
)
def hamming_score(preds, targets, thresholds: np.array = None):
"""Compute Hamming Score.
This function computes the Hamming Score, a performance metric used for multi-label classification tasks.
The Hamming Score measures the similarity between the predicted labels and the ground truth labels, where
a higher score indicates better prediction accuracy.
:param preds: The predicted labels.
:type preds: numpy array
:param targets: The ground truth labels.
:type targets: numpy array
:return: The computed Hamming Score.
:rtype: int
"""
if thresholds is None:
thresholds = optimize_accuracy(preds, targets)
preds = (preds > thresholds).astype(int)
return 1 - hamming_loss(targets, preds)
def zero_one_score(preds, targets, thresholds: np.array = None):
"""
Compute Zero-One Score.
This function computes the Zero-One Score, a performance metric used for
multi-label classification tasks. The Zero-One Score measures the similarity
between the predicted labels and the ground truth labels, where a higher score
indicates better prediction accuracy. The Zero-One Score ranges from 0 to 1, with 1 being a perfect match.
:param preds: The predicted labels.
:type preds: numpy array
:param targets: The ground truth labels.
:type targets: numpy array
:return: The computed Zero-One Score.
:rtype: int
"""
if thresholds is None:
thresholds = optimize_accuracy(preds, targets)
preds = (preds > thresholds).astype(int)
return 1 - zero_one_loss(targets, preds, normalize=True)
def mean_f1_score(preds, targets, thresholds: np.array = None):
"""Compute Mean F1 Score.
This function computes the Mean F1 Score, a performance metric used for multi-label
classification tasks. The Mean F1 Score measures the trade-off between precision and recall,
where a higher score indicates better prediction accuracy. The Mean F1 Score ranges from
0 to 1, with 1 being a perfect match.
:param preds: The predicted labels.
:type preds: numpy array
:param targets: The ground truth labels.
:type targets: numpy array
:return: The computed Mean F1 Score.
:rtype: int
"""
if thresholds is None:
thresholds = optimize_f1_score(preds, targets)
preds = (preds > thresholds).astype(int)
return f1_score(targets, preds, average="samples", zero_division=0)
def per_instr_f1_score(preds, targets, thresholds: np.array = None):
"""Compute Per-Instrument F1 Score.
This function computes the F1 Score for each instrument separately in a multi-label
classification task. The Per-Instrument F1 Score measures the prediction accuracy for
each instrument class independently. The F1 Score is the harmonic mean of precision and recall,
where a higher score indicates better prediction accuracy. The Per-Instrument F1 Score ranges
from 0 to 1, with 1 being a perfect match.
:param preds: The predicted labels.
:type preds: numpy array
:param targets: The ground truth labels.
:type targets: numpy array
:return: The computed Per-Instrument F1 Score.
:rtype: numpy array
"""
if thresholds is None:
thresholds = optimize_f1_score(preds, targets)
preds = (preds > thresholds).astype(int)
return f1_score(targets, preds, average=None, zero_division=0)
def mean_average_precision(preds, targets):
"""
Compute mean Average Precision (mAP).
This function computes the mean Average Precision (mAP), a performance metric used
for multi-label classification tasks. The mAP measures the average precision across
all classes, taking into account the precision-recall trade-off, where a higher score
indicates better prediction accuracy.
:param preds: The predicted probabilities or scores.
:type preds: numpy array
:param targets: The ground truth labels.
:type targets: numpy array
:return: The computed mAP score.
:rtype: int
"""
return average_precision_score(targets, preds, average="samples")
def optimize_f1_score(preds, targets):
"""
Optimize Threshold.
This function optimizes the threshold for binary classification based on the predicted probabilities
and ground truth labels. It computes the precision, recall, and F1 Score for each class separately
using the precision_recall_curve function from sklearn.metrics module. It then selects the threshold
that maximizes the F1 Score for each class.
:param preds: The predicted probabilities.
:type preds: numpy array
:param targets: The ground truth labels.
:type targets: numpy array
:return: The optimized thresholds for binary classification.
:rtype: numpy array
"""
label_thresholds = np.empty(preds.shape[1])
for i in range(preds.shape[1]):
precision, recall, thresholds = precision_recall_curve(targets[:, i], preds[:, i])
fscore = (2 * precision * recall) / (precision + recall)
ix = np.argmax(fscore)
best_thresh = thresholds[ix]
label_thresholds[i] = best_thresh
return label_thresholds
def optimize_accuracy(preds, targets):
"""
Determine the optimal threshold for each label, based on the predicted probabilities and the true targets,
in order to maximize the accuracy of the predictions.
:param preds: A 2D NumPy array containing the predicted probabilities for each label.
:type preds: numpy.ndarray
:param targets: A 2D NumPy array containing the true binary targets for each label.
:type targets: numpy.ndarray
:raises ValueError: If the input arrays are not 2D arrays or have incompatible shapes.
:return: A 1D NumPy array containing the optimal threshold for each label.
:rtype: numpy.ndarray
"""
# Vary the threshold for each label and calculate accuracy for each threshold
thresholds = np.arange(0.0001, 1, 0.0001)
best_thresholds = np.empty(preds.shape[1])
for i in range(preds.shape[1]):
accuracies = []
for th in thresholds:
y_pred = (preds[:, i] >= th).astype(int) # Convert probabilities to binary predictions using the threshold
acc = accuracy_score(targets[:, i], y_pred)
accuracies.append(acc)
# Find the threshold that gives the highest accuracy for this label
best_idx = np.argmax(accuracies)
best_thresholds[i] = thresholds[best_idx]
return best_thresholds
|