from torch.optim.lr_scheduler import _LRScheduler # This is rather suboptimal, because we need to import a protected class. Unfortunately, I don't see another way. class ToucanWarmupScheduler(_LRScheduler): """ A warmup scheduler that should be called after every batch. """ def __init__(self, optimizer, peak_lr=0.0002, warmup_steps=20000, max_steps=200000, last_epoch=-1): self.warmup_steps = warmup_steps self.peak_lr = peak_lr self.max_steps = max_steps self.plateau = self.warmup_steps * 4 self.last_lr = 0.0 # __init__() must be invoked before setting field # because step() is also invoked in __init__() super().__init__(optimizer, last_epoch) def __repr__(self): return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})" def get_lr(self): step_num = self.last_epoch + 1 if step_num <= self.warmup_steps: lr = self.peak_lr * min(step_num / self.warmup_steps, 1.0) self.last_lr = lr return [lr for _ in self.base_lrs] elif step_num < self.warmup_steps + self.plateau: self.last_lr = self.peak_lr return [self.peak_lr for _ in self.base_lrs] else: scale = 1 - (((step_num - (self.warmup_steps + self.plateau)) / self.max_steps) / (self.max_steps / 10)) self.last_lr = max(self.last_lr * scale, 1e-7) return [self.last_lr for _ in self.base_lrs] class WarmupScheduler(_LRScheduler): """ The WarmupLR scheduler This scheduler is almost same as NoamLR Scheduler except for following difference: NoamLR: lr = optimizer.lr * model_size ** -0.5 * min(step ** -0.5, step * warmup_step ** -1.5) WarmupLR: lr = optimizer.lr * warmup_step ** 0.5 * min(step ** -0.5, step * warmup_step ** -1.5) Note that the maximum lr equals to optimizer.lr in this scheduler. Taken from ESPnet """ def __init__(self, optimizer, warmup_steps=25000, last_epoch=-1): self.warmup_steps = warmup_steps # __init__() must be invoked before setting field # because step() is also invoked in __init__() super().__init__(optimizer, last_epoch) def __repr__(self): return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})" def get_lr(self): step_num = self.last_epoch + 1 return [lr * self.warmup_steps ** 0.5 * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5) for lr in self.base_lrs] if __name__ == '__main__': lrs = list() warmup_steps = 30000 peak_lr = 0.0005 max_steps = 800000 plateau_size = warmup_steps * 5 for step_num in range(max_steps): if step_num <= warmup_steps: lr = peak_lr * min(step_num / warmup_steps, 1.0) lrs.append(lr) elif step_num < warmup_steps + plateau_size: lrs.append(peak_lr) else: scale = 1 - (((step_num - (warmup_steps + plateau_size)) / max_steps) / (max_steps / 10)) lrs.append(max(lrs[-1] * scale, 1e-7)) import matplotlib.pyplot as plt plt.plot(lrs) plt.show()