|
import numpy as np |
|
import math |
|
|
|
|
|
class LearningRateDecay: |
|
def __init__(self, lr=0.002, warmup_steps=4000.0) -> None: |
|
self.lr = lr |
|
self.warmup_steps = warmup_steps |
|
|
|
def __call__(self, global_step) -> float: |
|
step = global_step + 1.0 |
|
lr = ( |
|
self.lr |
|
* self.warmup_steps ** 0.5 |
|
* np.minimum(step * self.warmup_steps ** -1.5, step ** -0.5) |
|
) |
|
|
|
return lr |
|
|
|
class SquareRootScheduler: |
|
def __init__(self, lr=0.1): |
|
self.lr = lr |
|
|
|
def __call__(self, global_step): |
|
global_step = global_step // 1000 |
|
return self.lr * pow(global_step + 1.0, -0.5) |
|
|
|
|
|
class CosineScheduler: |
|
def __init__( |
|
self, max_update, base_lr=0.02, final_lr=0, warmup_steps=0, warmup_begin_lr=0 |
|
): |
|
self.base_lr_orig = base_lr |
|
self.max_update = max_update |
|
self.final_lr = final_lr |
|
self.warmup_steps = warmup_steps |
|
self.warmup_begin_lr = warmup_begin_lr |
|
self.max_steps = self.max_update - self.warmup_steps |
|
|
|
def get_warmup_lr(self, global_step): |
|
increase = ( |
|
(self.base_lr_orig - self.warmup_begin_lr) |
|
* float(global_step) |
|
/ float(self.warmup_steps) |
|
) |
|
return self.warmup_begin_lr + increase |
|
|
|
def __call__(self, global_step): |
|
if global_step < self.warmup_steps: |
|
return self.get_warmup_lr(global_step) |
|
if global_step <= self.max_update: |
|
self.base_lr = ( |
|
self.final_lr |
|
+ (self.base_lr_orig - self.final_lr) |
|
* ( |
|
1 |
|
+ math.cos( |
|
math.pi * (global_step - self.warmup_steps) / self.max_steps |
|
) |
|
) |
|
/ 2 |
|
) |
|
return self.base_lr |
|
|
|
def adjust_learning_rate(optimizer, global_step): |
|
lr = LearningRateDecay()(global_step=global_step) |
|
for param_group in optimizer.param_groups: |
|
param_group["lr"] = lr |
|
return lr |
|
|
|
|