|
import numpy as np |
|
|
|
|
|
def assign_learning_rate(optimizer, new_lr): |
|
for param_group in optimizer.param_groups: |
|
param_group["lr"] = new_lr |
|
|
|
|
|
def _warmup_lr(base_lr, warmup_length, step): |
|
return base_lr * (step + 1) / warmup_length |
|
|
|
|
|
def const_lr(optimizer, base_lr, warmup_length): |
|
def _lr_adjuster(step): |
|
if step < warmup_length: |
|
lr = _warmup_lr(base_lr, warmup_length, step) |
|
else: |
|
lr = base_lr |
|
assign_learning_rate(optimizer, lr) |
|
return lr |
|
|
|
return _lr_adjuster |
|
|
|
|
|
def const_lr_cooldown( |
|
optimizer, |
|
base_lr, |
|
warmup_length, |
|
steps, |
|
cooldown_steps, |
|
cooldown_power=1.0, |
|
cooldown_end_lr=0.0, |
|
): |
|
def _lr_adjuster(step): |
|
start_cooldown_step = steps - cooldown_steps |
|
if step < warmup_length: |
|
lr = _warmup_lr(base_lr, warmup_length, step) |
|
else: |
|
if step < start_cooldown_step: |
|
lr = base_lr |
|
else: |
|
e = step - start_cooldown_step |
|
es = steps - start_cooldown_step |
|
|
|
decay = (1 - (e / es)) ** cooldown_power |
|
lr = decay * (base_lr - cooldown_end_lr) + cooldown_end_lr |
|
assign_learning_rate(optimizer, lr) |
|
return lr |
|
|
|
return _lr_adjuster |
|
|
|
|
|
def cosine_lr(optimizer, base_lr, warmup_length, steps, min_lr, force_min_lr): |
|
def _lr_adjuster(step): |
|
if step < warmup_length: |
|
lr = _warmup_lr(base_lr, warmup_length, step) |
|
else: |
|
e = step - warmup_length |
|
es = steps - warmup_length |
|
lr = min_lr + 0.5 * (1 + np.cos(np.pi * e / es)) * (base_lr - min_lr) |
|
lr = max(lr, force_min_lr) |
|
assign_learning_rate(optimizer, lr) |
|
return lr |
|
|
|
return _lr_adjuster |
|
|