Spaces:
Sleeping
Sleeping
File size: 2,335 Bytes
6307b4f 4dc8296 6307b4f d7ea050 6307b4f d7ea050 6307b4f d7ea050 6307b4f d7ea050 6307b4f 6d0453c 6307b4f d7ea050 6307b4f d7ea050 6307b4f d7ea050 6307b4f d7ea050 6d0453c 6307b4f d7ea050 6307b4f 6d0453c 6307b4f 6d0453c d7ea050 6d0453c d7ea050 6d0453c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from abc import ABC, abstractmethod
from .activation import SoftMax
import numpy as np
class Loss(ABC):
@staticmethod
@abstractmethod
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
pass
@staticmethod
@abstractmethod
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
pass
class LogitsLoss(Loss):
pass
class MSE(Loss):
@staticmethod
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
@staticmethod
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
return (y_hat - y_true) * (2 / y_true.shape[0])
class CrossEntropy(Loss):
@staticmethod
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
y_hat = np.asarray(y_hat)
y_true = np.asarray(y_true)
m = y_true.shape[0]
p = SoftMax().forward(y_hat)
eps = 1e-15 # to prevent log(0)
log_likelihood = -np.log(
np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
)
loss = np.sum(log_likelihood) / m
return loss
@staticmethod
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
y_hat = np.asarray(y_hat)
y_true = np.asarray(y_true)
grad = y_hat - y_true
return grad / y_true.shape[0]
class CrossEntropyWithLogits(LogitsLoss):
@staticmethod
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
# Apply the log-sum-exp trick for numerical stability
max_logits = np.max(y_hat, axis=1, keepdims=True)
log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
log_probs = y_hat - max_logits - log_sum_exp
# Select the log probability of the true class
loss = -np.sum(log_probs * y_true) / y_true.shape[0]
return loss
@staticmethod
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
# Compute softmax probabilities
exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
probs = exps / np.sum(exps, axis=1, keepdims=True)
# Subtract the one-hot encoded labels from the probabilities
grad = (probs - y_true) / y_true.shape[0]
return grad
|