import numpy as np def assign_learning_rate(optimizer, new_lr): for param_group in optimizer.param_groups: param_group["lr"] = new_lr def _warmup_lr(base_lr, warmup_length, step): return base_lr * (step + 1) / warmup_length def const_lr(optimizer, base_lr, warmup_length, steps): def _lr_adjuster(step): if step < warmup_length: lr = _warmup_lr(base_lr, warmup_length, step) else: lr = base_lr assign_learning_rate(optimizer, lr) return lr return _lr_adjuster def const_lr_cooldown(optimizer, base_lr, warmup_length, steps, cooldown_steps, cooldown_power=1.0, cooldown_end_lr=0.): def _lr_adjuster(step): start_cooldown_step = steps - cooldown_steps if step < warmup_length: lr = _warmup_lr(base_lr, warmup_length, step) else: if step < start_cooldown_step: lr = base_lr else: e = step - start_cooldown_step es = steps - start_cooldown_step # linear decay if power == 1; polynomial decay otherwise; decay = (1 - (e/es)) ** cooldown_power lr = decay * (base_lr - cooldown_end_lr) + cooldown_end_lr assign_learning_rate(optimizer, lr) return lr return _lr_adjuster def cosine_lr(optimizer, base_lr, warmup_length, steps): def _lr_adjuster(step): if step < warmup_length: lr = _warmup_lr(base_lr, warmup_length, step) else: e = step - warmup_length es = steps - warmup_length lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr assign_learning_rate(optimizer, lr) return lr return _lr_adjuster