Ashaar / poetry_diacritizer /util /learning_rates.py
aaaaaabbbbbbbdddddddduuuuulllll's picture
Duplicate from arbml/Ashaar
77a12fd
import numpy as np
import math
class LearningRateDecay:
def __init__(self, lr=0.002, warmup_steps=4000.0) -> None:
self.lr = lr
self.warmup_steps = warmup_steps
def __call__(self, global_step) -> float:
step = global_step + 1.0
lr = (
self.lr
* self.warmup_steps ** 0.5
* np.minimum(step * self.warmup_steps ** -1.5, step ** -0.5)
)
return lr
class SquareRootScheduler:
def __init__(self, lr=0.1):
self.lr = lr
def __call__(self, global_step):
global_step = global_step // 1000
return self.lr * pow(global_step + 1.0, -0.5)
class CosineScheduler:
def __init__(
self, max_update, base_lr=0.02, final_lr=0, warmup_steps=0, warmup_begin_lr=0
):
self.base_lr_orig = base_lr
self.max_update = max_update
self.final_lr = final_lr
self.warmup_steps = warmup_steps
self.warmup_begin_lr = warmup_begin_lr
self.max_steps = self.max_update - self.warmup_steps
def get_warmup_lr(self, global_step):
increase = (
(self.base_lr_orig - self.warmup_begin_lr)
* float(global_step)
/ float(self.warmup_steps)
)
return self.warmup_begin_lr + increase
def __call__(self, global_step):
if global_step < self.warmup_steps:
return self.get_warmup_lr(global_step)
if global_step <= self.max_update:
self.base_lr = (
self.final_lr
+ (self.base_lr_orig - self.final_lr)
* (
1
+ math.cos(
math.pi * (global_step - self.warmup_steps) / self.max_steps
)
)
/ 2
)
return self.base_lr
def adjust_learning_rate(optimizer, global_step):
lr = LearningRateDecay()(global_step=global_step)
for param_group in optimizer.param_groups:
param_group["lr"] = lr
return lr