Spaces:
Running
Running
🔥 [Remove] & fix typo of momentum schedule
Browse files- yolo/config/config.py +1 -0
- yolo/utils/model_utils.py +7 -8
yolo/config/config.py
CHANGED
|
@@ -66,6 +66,7 @@ class DataConfig:
|
|
| 66 |
class OptimizerArgs:
|
| 67 |
lr: float
|
| 68 |
weight_decay: float
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
@dataclass
|
|
|
|
| 66 |
class OptimizerArgs:
|
| 67 |
lr: float
|
| 68 |
weight_decay: float
|
| 69 |
+
momentum: float
|
| 70 |
|
| 71 |
|
| 72 |
@dataclass
|
yolo/utils/model_utils.py
CHANGED
|
@@ -8,7 +8,6 @@ import torch
|
|
| 8 |
import torch.distributed as dist
|
| 9 |
from lightning import LightningModule, Trainer
|
| 10 |
from lightning.pytorch.callbacks import Callback
|
| 11 |
-
from lightning.pytorch.utilities import rank_zero_only
|
| 12 |
from omegaconf import ListConfig
|
| 13 |
from torch import Tensor, no_grad
|
| 14 |
from torch.optim import Optimizer
|
|
@@ -77,9 +76,9 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
| 77 |
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
| 78 |
|
| 79 |
model_parameters = [
|
| 80 |
-
{"params": bias_params, "momentum": 0.
|
| 81 |
-
{"params": conv_params, "momentum": 0.
|
| 82 |
-
{"params": norm_params, "momentum": 0.
|
| 83 |
]
|
| 84 |
|
| 85 |
def next_epoch(self, batch_num, epoch_idx):
|
|
@@ -89,8 +88,8 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
| 89 |
# 0.937: Start Momentum
|
| 90 |
# 0.8 : Normal Momemtum
|
| 91 |
# 3 : The warm up epoch num
|
| 92 |
-
self.min_mom = lerp(0.937, 0.8,
|
| 93 |
-
self.max_mom = lerp(0.937, 0.8,
|
| 94 |
self.batch_num = batch_num
|
| 95 |
self.batch_idx = 0
|
| 96 |
|
|
@@ -100,7 +99,7 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
| 100 |
for lr_idx, param_group in enumerate(self.param_groups):
|
| 101 |
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
| 102 |
param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
|
| 103 |
-
param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
|
| 104 |
lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
|
| 105 |
return lr_dict
|
| 106 |
|
|
@@ -125,7 +124,7 @@ def create_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LR
|
|
| 125 |
lambda1 = lambda epoch: (epoch + 1) / wepoch if epoch < wepoch else 1
|
| 126 |
lambda2 = lambda epoch: 10 - 9 * ((epoch + 1) / wepoch) if epoch < wepoch else 1
|
| 127 |
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda2, lambda1, lambda1])
|
| 128 |
-
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[
|
| 129 |
return schedule
|
| 130 |
|
| 131 |
|
|
|
|
| 8 |
import torch.distributed as dist
|
| 9 |
from lightning import LightningModule, Trainer
|
| 10 |
from lightning.pytorch.callbacks import Callback
|
|
|
|
| 11 |
from omegaconf import ListConfig
|
| 12 |
from torch import Tensor, no_grad
|
| 13 |
from torch.optim import Optimizer
|
|
|
|
| 76 |
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
| 77 |
|
| 78 |
model_parameters = [
|
| 79 |
+
{"params": bias_params, "momentum": 0.937, "weight_decay": 0},
|
| 80 |
+
{"params": conv_params, "momentum": 0.937},
|
| 81 |
+
{"params": norm_params, "momentum": 0.937, "weight_decay": 0},
|
| 82 |
]
|
| 83 |
|
| 84 |
def next_epoch(self, batch_num, epoch_idx):
|
|
|
|
| 88 |
# 0.937: Start Momentum
|
| 89 |
# 0.8 : Normal Momemtum
|
| 90 |
# 3 : The warm up epoch num
|
| 91 |
+
self.min_mom = lerp(0.937, 0.8, min(epoch_idx, 3), 3)
|
| 92 |
+
self.max_mom = lerp(0.937, 0.8, min(epoch_idx + 1, 3), 3)
|
| 93 |
self.batch_num = batch_num
|
| 94 |
self.batch_idx = 0
|
| 95 |
|
|
|
|
| 99 |
for lr_idx, param_group in enumerate(self.param_groups):
|
| 100 |
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
| 101 |
param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
|
| 102 |
+
# param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
|
| 103 |
lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
|
| 104 |
return lr_dict
|
| 105 |
|
|
|
|
| 124 |
lambda1 = lambda epoch: (epoch + 1) / wepoch if epoch < wepoch else 1
|
| 125 |
lambda2 = lambda epoch: 10 - 9 * ((epoch + 1) / wepoch) if epoch < wepoch else 1
|
| 126 |
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda2, lambda1, lambda1])
|
| 127 |
+
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[wepoch - 1])
|
| 128 |
return schedule
|
| 129 |
|
| 130 |
|