RevCol / training /config.py
LarryTsai's picture
Training Code:cls/det
b9425fd
raw
history blame
7.12 kB
# --------------------------------------------------------
# Reversible Column Networks
# Copyright (c) 2022 Megvii Inc.
# Licensed under TheApache License 2.0 [see LICENSE for details]
# Written by Yuxuan Cai
# --------------------------------------------------------
import os
import yaml
from yacs.config import CfgNode as CN
_C = CN()
# Base config files
_C.BASE = ['']
# -----------------------------------------------------------------------------
# Data settings
# -----------------------------------------------------------------------------
_C.DATA = CN()
# Batch size for a single GPU, could be overwritten by command line argument
_C.DATA.BATCH_SIZE = 128
# Path to dataset, could be overwritten by command line argument
_C.DATA.DATA_PATH = 'path/to/imagenet'
# Dataset name
_C.DATA.DATASET = 'imagenet'
# Input image size
_C.DATA.IMG_SIZE = 224
# Interpolation to resize image (random, bilinear, bicubic)
_C.DATA.INTERPOLATION = 'bicubic'
# Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.
_C.DATA.PIN_MEMORY = True
# Number of data loading threads
_C.DATA.NUM_WORKERS = 8
# Path to evaluation dataset for ImageNet 22k
_C.DATA.EVAL_DATA_PATH = 'path/to/eval/data'
# -----------------------------------------------------------------------------
# Model settings
# -----------------------------------------------------------------------------
_C.MODEL = CN()
# Model type
_C.MODEL.TYPE = ''
# Model name
_C.MODEL.NAME = ''
# Checkpoint to resume, could be overwritten by command line argument
_C.MODEL.RESUME = ''
# Checkpoint to finetune, could be overwritten by command line argument
_C.MODEL.FINETUNE = ''
# Number of classes, overwritten in data preparation
_C.MODEL.NUM_CLASSES = 1000
# Label Smoothing
_C.MODEL.LABEL_SMOOTHING = 0.0
# -----------------------------------------------------------------------------
# Specific Model settings
# -----------------------------------------------------------------------------
_C.REVCOL = CN()
_C.REVCOL.INTER_SUPV = True
_C.REVCOL.SAVEMM = True
_C.REVCOL.FCOE = 4.0
_C.REVCOL.CCOE = 0.8
_C.REVCOL.KERNEL_SIZE = 3
_C.REVCOL.DROP_PATH = 0.1
_C.REVCOL.HEAD_INIT_SCALE = None
# -----------------------------------------------------------------------------
# Training settings
# -----------------------------------------------------------------------------
_C.TRAIN = CN()
_C.TRAIN.START_EPOCH = 0
_C.TRAIN.EPOCHS = 300
_C.TRAIN.WARMUP_EPOCHS = 5
_C.TRAIN.WEIGHT_DECAY = 4e-5
_C.TRAIN.BASE_LR = 0.4
_C.TRAIN.WARMUP_LR = 0.05
_C.TRAIN.MIN_LR = 1e-5
# Clip gradient norm
_C.TRAIN.CLIP_GRAD = 10.0
# Auto resume from latest checkpoint
_C.TRAIN.AUTO_RESUME = True
# Check point
_C.TRAIN.USE_CHECKPOINT = False
_C.TRAIN.AMP = True
# LR scheduler
_C.TRAIN.LR_SCHEDULER = CN()
# LR scheduler
_C.TRAIN.LR_SCHEDULER.NAME = 'cosine'
# Epoch interval to decay LR, used in StepLRScheduler
_C.TRAIN.LR_SCHEDULER.DECAY_EPOCHS = 30
# LR decay rate, used in StepLRScheduler
_C.TRAIN.LR_SCHEDULER.DECAY_RATE = 0.1
# Optimizer
_C.TRAIN.OPTIMIZER = CN()
_C.TRAIN.OPTIMIZER.NAME = 'sgd'
# Optimizer Epsilon fow adamw
_C.TRAIN.OPTIMIZER.EPS = 1e-8
# Optimizer Betas fow adamw
_C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999)
# SGD momentum
_C.TRAIN.OPTIMIZER.MOMENTUM = 0.9
# Layer Decay
_C.TRAIN.OPTIMIZER.LAYER_DECAY = 1.0
# -----------------------------------------------------------------------------
# Augmentation settings
# -----------------------------------------------------------------------------
_C.AUG = CN()
# Color jitter factor
_C.AUG.COLOR_JITTER = 0.4
# Use AutoAugment policy. "v0" or "original"
_C.AUG.AUTO_AUGMENT = 'rand-m9-mstd0.5-inc1'
# Random erase prob
_C.AUG.REPROB = 0.25
# Random erase mode
_C.AUG.REMODE = 'pixel'
# Random erase count
_C.AUG.RECOUNT = 1
# Mixup alpha, mixup enabled if > 0
_C.AUG.MIXUP = 0.8
# Cutmix alpha, cutmix enabled if > 0
_C.AUG.CUTMIX = 1.0
# Cutmix min/max ratio, overrides alpha and enables cutmix if set
_C.AUG.CUTMIX_MINMAX = None
# Probability of performing mixup or cutmix when either/both is enabled
_C.AUG.MIXUP_PROB = 1.0
# Probability of switching to cutmix when both mixup and cutmix enabled
_C.AUG.MIXUP_SWITCH_PROB = 0.5
# How to apply mixup/cutmix params. Per "batch", "pair", or "elem"
_C.AUG.MIXUP_MODE = 'batch'
# -----------------------------------------------------------------------------
# Testing settings
# -----------------------------------------------------------------------------
_C.TEST = CN()
# Whether to use center crop when testing
_C.TEST.CROP = True
# -----------------------------------------------------------------------------
# Misc
# -----------------------------------------------------------------------------
# Path to output folder, overwritten by command line argument
_C.OUTPUT = 'outputs/'
# Tag of experiment, overwritten by command line argument
_C.TAG = 'default'
# Frequency to save checkpoint
_C.SAVE_FREQ = 1
# Frequency to logging info
_C.PRINT_FREQ = 100
# Fixed random seed
_C.SEED = 0
# Perform evaluation only, overwritten by command line argument
_C.EVAL_MODE = False
# Test throughput only, overwritten by command line argument
_C.THROUGHPUT_MODE = False
# local rank for DistributedDataParallel, given by command line argument
_C.LOCAL_RANK = 0
# EMA
_C.MODEL_EMA = False
_C.MODEL_EMA_DECAY = 0.9999
# Machine
_C.MACHINE = CN()
_C.MACHINE.MACHINE_WORLD_SIZE = None
_C.MACHINE.MACHINE_RANK = None
def _update_config_from_file(config, cfg_file):
config.defrost()
with open(cfg_file, 'r') as f:
yaml_cfg = yaml.load(f, Loader=yaml.FullLoader)
for cfg in yaml_cfg.setdefault('BASE', ['']):
if cfg:
_update_config_from_file(
config, os.path.join(os.path.dirname(cfg_file), cfg)
)
print('=> merge config from {}'.format(cfg_file))
config.merge_from_file(cfg_file)
config.freeze()
def update_config(config, args):
_update_config_from_file(config, args.cfg)
config.defrost()
if args.opts:
config.merge_from_list(args.opts)
# merge from specific arguments
if args.batch_size:
config.DATA.BATCH_SIZE = args.batch_size
if args.data_path:
config.DATA.DATA_PATH = args.data_path
if args.resume:
config.MODEL.RESUME = args.resume
if args.finetune:
config.MODEL.FINETUNE = args.finetune
if args.use_checkpoint:
config.TRAIN.USE_CHECKPOINT = True
if args.output:
config.OUTPUT = args.output
if args.tag:
config.TAG = args.tag
if args.eval:
config.EVAL_MODE = True
if args.model_ema:
config.MODEL_EMA = True
config.dist_url = args.dist_url
# set local rank for distributed training
config.LOCAL_RANK = args.local_rank
# output folder
config.OUTPUT = os.path.join(config.OUTPUT, config.MODEL.NAME, config.TAG)
config.freeze()
def get_config(args):
"""Get a yacs CfgNode object with default values."""
# Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern
config = _C.clone()
update_config(config, args)
return config