HPSv2 / src /training /scheduler.py
tgxs002's picture
init
54199b6
import numpy as np
def assign_learning_rate(optimizer, new_lr):
for param_group in optimizer.param_groups:
param_group["lr"] = new_lr
def _warmup_lr(base_lr, warmup_length, step):
return base_lr * (step + 1) / warmup_length
def const_lr(optimizer, base_lr, warmup_length, steps):
def _lr_adjuster(step):
if step < warmup_length:
lr = _warmup_lr(base_lr, warmup_length, step)
else:
lr = base_lr
assign_learning_rate(optimizer, lr)
return lr
return _lr_adjuster
def const_lr_cooldown(optimizer, base_lr, warmup_length, steps, cooldown_steps, cooldown_power=1.0, cooldown_end_lr=0.):
def _lr_adjuster(step):
start_cooldown_step = steps - cooldown_steps
if step < warmup_length:
lr = _warmup_lr(base_lr, warmup_length, step)
else:
if step < start_cooldown_step:
lr = base_lr
else:
e = step - start_cooldown_step
es = steps - start_cooldown_step
# linear decay if power == 1; polynomial decay otherwise;
decay = (1 - (e/es)) ** cooldown_power
lr = decay * (base_lr - cooldown_end_lr) + cooldown_end_lr
assign_learning_rate(optimizer, lr)
return lr
return _lr_adjuster
def cosine_lr(optimizer, base_lr, warmup_length, steps):
def _lr_adjuster(step):
if step < warmup_length:
lr = _warmup_lr(base_lr, warmup_length, step)
else:
e = step - warmup_length
es = steps - warmup_length
lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
assign_learning_rate(optimizer, lr)
return lr
return _lr_adjuster