File size: 6,236 Bytes
b7d9967 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# This code is part of a Qiskit project.
#
# (C) Copyright IBM 2021, 2023.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.
""" Loss utilities """
from abc import ABC, abstractmethod
import numpy as np
from ...exceptions import QiskitMachineLearningError
class Loss(ABC):
"""
Abstract base class for computing Loss.
"""
def __call__(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
"""
This method calls the ``evaluate`` method. This is a convenient method to compute loss.
"""
return self.evaluate(predict, target)
@abstractmethod
def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
"""
An abstract method for evaluating the loss function. Inputs are expected in a shape
of ``(N, *)``. Where ``N`` is a number of samples. Loss is computed for each sample
individually.
Args:
predict: an array of predicted values using the model.
target: an array of the true values.
Returns:
An array with values of the loss function of the shape ``(N, 1)``.
Raises:
QiskitMachineLearningError: shapes of predict and target do not match
"""
raise NotImplementedError
@staticmethod
def _validate_shapes(predict: np.ndarray, target: np.ndarray) -> None:
"""
Validates that shapes of both parameters are identical.
Args:
predict: an array of predicted values using the model
target: an array of the true values
Raises:
QiskitMachineLearningError: shapes of predict and target do not match.
"""
if predict.shape != target.shape:
raise QiskitMachineLearningError(
f"Shapes don't match, predict: {predict.shape}, target: {target.shape}!"
)
@abstractmethod
def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
"""
An abstract method for computing the gradient. Inputs are expected in a shape
of ``(N, *)``. Where ``N`` is a number of samples. Gradient is computed for each sample
individually.
Args:
predict: an array of predicted values using the model.
target: an array of the true values.
Returns:
An array with gradient values of the shape ``(N, *)``. The output shape depends on
the loss function.
Raises:
QiskitMachineLearningError: shapes of predict and target do not match.
"""
raise NotImplementedError
class L1Loss(Loss):
r"""
This class computes the L1 loss (i.e. absolute error) for each sample as:
.. math::
\text{L1Loss}(predict, target) = \sum_{i=0}^{N_{\text{elements}}} \left| predict_i -
target_i \right|.
"""
def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
self._validate_shapes(predict, target)
if len(predict.shape) <= 1:
return np.abs(predict - target)
else:
return np.linalg.norm(predict - target, ord=1, axis=tuple(range(1, len(predict.shape))))
def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
self._validate_shapes(predict, target)
return np.sign(predict - target)
class L2Loss(Loss):
r"""
This class computes the L2 loss (i.e. squared error) for each sample as:
.. math::
\text{L2Loss}(predict, target) = \sum_{i=0}^{N_{\text{elements}}} (predict_i - target_i)^2.
"""
def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
self._validate_shapes(predict, target)
if len(predict.shape) <= 1:
return (predict - target) ** 2
else:
return np.linalg.norm(predict - target, axis=tuple(range(1, len(predict.shape)))) ** 2
def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
self._validate_shapes(predict, target)
return 2 * (predict - target)
class CrossEntropyLoss(Loss):
r"""
This class computes the cross entropy loss for each sample as:
.. math::
\text{CrossEntropyLoss}(predict, target) = -\sum_{i=0}^{N_{\text{classes}}}
target_i * log(predict_i).
"""
def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
self._validate_shapes(predict, target)
if len(predict.shape) == 1:
predict = predict.reshape(1, -1)
target = target.reshape(1, -1)
# multiply target and log(predict) matrices row by row and sum up each row
# into a single float, so the output is of shape(N,), where N number or samples.
# then reshape
# before taking the log we clip the predicted probabilities at a small positive number. This
# ensures that in cases where a class is predicted to have 0 probability we don't get `nan`.
val = -np.einsum(
"ij,ij->i", target, np.log2(np.clip(predict, a_min=1e-10, a_max=None))
).reshape(-1, 1)
return val
def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
"""Assume softmax is used, and target vector may or may not be one-hot encoding"""
self._validate_shapes(predict, target)
if len(predict.shape) == 1:
predict = predict.reshape(1, -1)
target = target.reshape(1, -1)
# sum up target along rows, then multiply predict by this sum element wise,
# then subtract target
grad = np.einsum("ij,i->ij", predict, np.sum(target, axis=1)) - target
return grad
|