Spaces:
Runtime error
Runtime error
File size: 11,281 Bytes
a5f8a35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
from typing import Any, List, Optional
from fvcore.common.config import CfgNode as CN
class Config(object):
r"""
This class provides package-wide configuration management. It is a
nested dict-like structure with nested keys accessible as attributes. It
contains sensible default values, which can be modified by (first) a YAML
file and (second) a list of attributes and values.
An instantiated object is immutable: modifying any attribute is illegal.
You must override required parameter values either through ``config_file``
or ``override_list`` arguments. For adding more parameters at runtime
(based on existing parameters), modify :meth:`add_derived_params`.
Parameters
----------
config_file: str
Path to a YAML file containing configuration parameters to override.
config_override: List[Any], optional (default = [])
A list of sequential attributes and values of parameters to override.
This happens after overriding from YAML file.
Examples
--------
Let a YAML file named "config.yaml" specify these parameters to override::
OPTIM:
BATCH_SIZE: 512
LR: 0.01
>>> _C = Config("config.yaml", ["OPTIM.BATCH_SIZE", 1024])
>>> _C.LR # default: 0.001
0.01
>>> _C.OPTIM.BATCH_SIZE # default: 256, file: 512
1024
"""
def __init__(
self, config_file: Optional[str] = None, override_list: List[Any] = []
):
_C = CN()
# Random seed for NumPy and PyTorch, important for reproducibility.
_C.RANDOM_SEED = 0
# Train with Automatic Mixed Precision (native PyTorch).
_C.AMP = True
# Set CUDNN deterministic flag (torch.backends.cudnn.deterministic).
# Setting this will ensure exact results on every run at the cost of
# little slowdown. Good for debugging.
_C.CUDNN_DETERMINISTIC = False
# Set CUDNN benchmark flag (torch.backends.cudnn.benchmark). Enables
# CUDNN to select fastest implementation for operations based on GPU.
# May change results (in decimals) on different hardware, but faster
# to train. Turn off while debugging.
_C.CUDNN_BENCHMARK = True
# ---------------------------------------------------------------------
# Data paths and parameters related to dataloading.
# ---------------------------------------------------------------------
_C.DATA = CN()
# Path to the dataset root, which structure as per README. Path is
# assumed to be relative to project root.
_C.DATA.ROOT = "datasets/coco"
# Path to .model file generated by ``sentencepiece``.
_C.DATA.TOKENIZER_MODEL = "datasets/vocab/coco_10k.model"
# Handy config params for vocab size and indices of special tokens.
# While these can be picked up from the tokenizer, having these in
# the config makes it easy to create a model without instantiating too
# many tokenizer instances (especially when not needed, e.g. model zoo).
# These must match according to what's present in ``TOKENIZER_VOCAB``
# and ``TOKENIZER_MODEL`` above.
_C.DATA.VOCAB_SIZE = 10000
# Index of out-of-vocabulary (and padding) token.
_C.DATA.UNK_INDEX = 0
# Index of the start-of-sentence [SOS] token.
_C.DATA.SOS_INDEX = 1
# Index of the end-of-sentence [EOS] token.
_C.DATA.EOS_INDEX = 2
# Index of the word masking token. While not used for captioning, having
# this extra token makes it possible to train an MLM model without
# re-creating a new vocab mapping.
_C.DATA.MASK_INDEX = 3
# Size of the image (square) to crop from original input image.
_C.DATA.IMAGE_CROP_SIZE = 224
# Maximum length of input caption (number of tokens).
# Longer captions will be truncated up to this length.
_C.DATA.MAX_CAPTION_LENGTH = 30
# COCO Captions has five captions per image. If ``True``, training will
# use one random caption per image (data efficiency ablations).
_C.DATA.USE_SINGLE_CAPTION = False
# Percentage of dataset to use for training (data efficiency ablations).
_C.DATA.USE_PERCENTAGE = 100.0
# List of image transforms (pre-processing and data augmentation) to be
# applied sequentially (always or randomly) during training and
# validation. Refer ``virtex/facetories.py`` for all possible transforms.
_C.DATA.IMAGE_TRANSFORM_TRAIN = [
"random_resized_crop",
"horizontal_flip",
"color_jitter",
"normalize",
]
_C.DATA.IMAGE_TRANSFORM_VAL = [
"smallest_resize",
"center_crop",
"normalize",
]
# Hyper-parameters for masked LM pretraining task. These are only used
# when ``MODEL.NAME`` is "masked_lm".
_C.DATA.MASKED_LM = CN()
# Fraction of tokens to choose for masking, this must be less than 1.
_C.DATA.MASKED_LM.MASK_PROPORTION = 0.15
# Probability to replace chosen tokens with [MASK] token.
_C.DATA.MASKED_LM.MASK_PROBABILITY = 0.85
# Probability to replace chosen tokens with a random token.
_C.DATA.MASKED_LM.REPLACE_PROBABILITY = 0.10
# ---------------------------------------------------------------------
# Model architecture: visual backbone and textual head.
# ---------------------------------------------------------------------
_C.MODEL = CN()
# Name of model, based on pretraining task.
# Possible choices: {"token_classification", "multilabel_classification",
# "captioning", "bicaptioning", "masked_lm", "virtex"}
_C.MODEL.NAME = "virtex"
_C.MODEL.VISUAL = CN()
# Name of visual backbone. Possible choices: {"blind", "torchvision"}
# Models from torchvision can be specified as shown below.
_C.MODEL.VISUAL.NAME = "torchvision::resnet50"
# Number of channels in pooled spatial features of visual backbone.
_C.MODEL.VISUAL.FEATURE_SIZE = 2048
# Whether to load ImageNet pretrained weights into visual backbone.
_C.MODEL.VISUAL.PRETRAINED = False
# Whether to keep visual backbone frozen and train only textual head.
_C.MODEL.VISUAL.FROZEN = False
_C.MODEL.TEXTUAL = CN()
# Name of textual head. Set to "none" for MODEL.NAME = "*_classification".
# Possible choices: {"transdec_postnorm", "transdec_prenorm"}.
# Architectural hyper-parameters are specified as shown above.
_C.MODEL.TEXTUAL.NAME = "transdec_postnorm::L1_H2048_A32_F8192"
# L = Number of layers in the transformer.
# H = Hidden size of the transformer (embeddings, attention features).
# A = Number of attention heads in the transformer.
# F = Size of feedforward layers in the transformer.
# Typically, we have (A = H / 64) and (F = 4 * H).
# Dropout probability for embedding, hidden features in textual head.
_C.MODEL.TEXTUAL.DROPOUT = 0.1
# Apply label smoothing to targets for (cross entropy) loss computation.
_C.MODEL.LABEL_SMOOTHING = 0.0
_C.MODEL.DECODER = CN()
# What algorithm to use for decoding. Supported values: {"beam_search",
# "nucleus_sampling"}.
_C.MODEL.DECODER.NAME = "beam_search"
# Number of beams to decode (1 = greedy decoding). Ignored when decoding
# through nucleus sampling.
_C.MODEL.DECODER.BEAM_SIZE = 5
# Size of nucleus for sampling predictions. Ignored when decoding through
# beam search.
_C.MODEL.DECODER.NUCLEUS_SIZE = 0.9
# Maximum length of decoded caption. Decoding may end earlier when [EOS]
# token is sampled.
_C.MODEL.DECODER.MAX_DECODING_STEPS = _C.DATA.MAX_CAPTION_LENGTH
# ---------------------------------------------------------------------
# Optimization hyper-parameters, default values are for pretraining
# our best model on bicaptioning task (COCO Captions).
# ---------------------------------------------------------------------
_C.OPTIM = CN()
# Name of optimizer to use. Supported values: {"sgd", "adamw"}.
# AdamW uses default (beta1, beta2) values from PyTorch.
_C.OPTIM.OPTIMIZER_NAME = "sgd"
# Momentum co-efficient for SGD. Ignored for AdamW.
_C.OPTIM.SGD_MOMENTUM = 0.9
# Weight decay co-efficient for the optimizer.
_C.OPTIM.WEIGHT_DECAY = 0.0001
# Regex pattern of params for which there will be no weight decay.
_C.OPTIM.NO_DECAY = ".*textual.(embedding|transformer).*(norm.*|bias)"
# Max gradient norm for clipping to avoid exploding gradients.
_C.OPTIM.CLIP_GRAD_NORM = 10.0
# Wrap our optimizer with Lookahead (https://arxiv.org/abs/1907.08610).
_C.OPTIM.LOOKAHEAD = CN()
_C.OPTIM.LOOKAHEAD.USE = True
_C.OPTIM.LOOKAHEAD.ALPHA = 0.5
_C.OPTIM.LOOKAHEAD.STEPS = 5
# We set different learning rates for CNN (visual backbone) and rest of
# the model. CNN LR is typically much higher for training from scratch.
# Both LRs undergo same warmup-decay schedules.
# Total batch size (will be distributed evenly across GPUs).
_C.OPTIM.BATCH_SIZE = 256
# Max learning rate for CNN (visual backbone).
_C.OPTIM.CNN_LR = 0.2
# Max learning rate for rest of the model.
_C.OPTIM.LR = 0.001
# Number of iterations to train for, batches are randomly sampled.
_C.OPTIM.NUM_ITERATIONS = 500000
# Number of steps at the start of training for linear LR warmup.
_C.OPTIM.WARMUP_STEPS = 10000
# Learning rate annealing schedule for decay after warmup.
# Possible choices: {"none", "linear", "cosine", "multistep"}.
_C.OPTIM.LR_DECAY_NAME = "cosine"
# Steps to decay LR for "multistep" schedule.
_C.OPTIM.LR_STEPS = []
# Factor to multiply with LR for "multistep" schedule.
_C.OPTIM.LR_GAMMA = 0.1
# Override parameter values from YAML file first, then from override
# list, then add derived params.
self._C = _C
if config_file is not None:
self._C.merge_from_file(config_file)
self._C.merge_from_list(override_list)
self.add_derived_params()
# Make an instantiated object of this class immutable.
self._C.freeze()
def add_derived_params(self):
r"""Add parameters with values derived from existing parameters."""
# We don't have any such cases so far.
pass
def dump(self, file_path: str):
r"""Save config at the specified file path.
Parameters
----------
file_path: str
(YAML) path to save config at.
"""
self._C.dump(stream=open(file_path, "w"))
def __getattr__(self, attr: str):
return self._C.__getattr__(attr)
def __str__(self):
return self._C.__str__()
def __repr__(self):
return self._C.__repr__()
|