Spaces:
Running
Running
| import torch | |
| import torch.nn.functional as F | |
| import math | |
| from torch.optim import Optimizer # Ensure Optimizer is imported for custom classes | |
| class DCLR(Optimizer): | |
| def __init__(self, params, lr=0.01, lambda_=1.0, epsilon=1e-8, delta=1e-12, verbose=True): | |
| defaults = dict(lr=lr, lambda_=lambda_, epsilon=epsilon, delta=delta, verbose=verbose) | |
| super(DCLR, self).__init__(params, defaults) | |
| def step(self, closure=None, output_activations=None): | |
| if output_activations is None: | |
| raise ValueError("Output activations must be provided to compute entropy.") | |
| loss = None | |
| if closure is not None: | |
| loss = closure() | |
| probs = torch.nn.functional.softmax(output_activations, dim=1) | |
| log_probs = torch.log(probs + self.defaults['delta']) | |
| entropy = -torch.sum(probs * log_probs, dim=1).mean() | |
| for group in self.param_groups: | |
| lr_0 = group['lr'] | |
| lambda_ = group['lambda_'] | |
| epsilon = group['epsilon'] | |
| verbose = group['verbose'] | |
| for p in group['params']: | |
| if p.grad is None: | |
| continue | |
| grad = p.grad.data | |
| grad_norm_sq = grad.norm() ** 2 | |
| eta_t = lr_0 * math.exp(-lambda_ * grad_norm_sq.item() / (entropy.item() + epsilon)) | |
| if verbose: | |
| print(f"[DCLR] Entropy: {entropy.item():.6f} | GradNorm²: {grad_norm_sq.item():.6f} | η(t): {eta_t:.6e}") | |
| # Fix for UserWarning: This overload of add_ is deprecated: | |
| # add_(Number alpha, Tensor other) | |
| # Consider using one of the following signatures instead: | |
| # add_(Tensor other, *, Number alpha = 1) | |
| p.data.add_(grad, alpha=-eta_t) | |
| return loss | |