ssa-perin / utility /autoclip.py
larkkin's picture
Add code and readme
c45d283
raw
history blame
No virus
1.43 kB
#!/usr/bin/env python3
# coding=utf-8
import torch
import torch.nn as nn
class AutoClip:
def __init__(self, parameters, initial_clipping=0.1, percentile=50, history_len=1000):
self.parameters = list(parameters)
self.grad_history = [torch.full([history_len], initial_clipping) for _ in self.parameters]
self.index = 0
self.history_len = history_len
self.percentile = percentile
@torch.no_grad()
def __call__(self):
self._add_to_history(self.parameters)
grad_norms = []
for parameter, history in zip(self.parameters, self.grad_history):
if parameter.grad is None or not parameter.grad.abs().sum().is_nonzero():
continue
clip_value = self._get_percentile(history, self.percentile)
grad_norms.append(nn.utils.clip_grad_norm_(parameter, clip_value).item())
return sum(grad_norms) / len(grad_norms)
def _add_to_history(self, parameters):
for i, param in enumerate(parameters):
if param.grad is None or not param.grad.abs().sum().is_nonzero():
continue
self.grad_history[i][self.index] = param.grad.data.norm(2)
self.index = (self.index + 1) % self.history_len
def _get_percentile(self, tensor, percentile):
k = 1 + round(0.01 * percentile * (tensor.numel() - 1))
return tensor.kthvalue(k).values.item()