virtex-redcaps / config.yaml
AMP: true
CUDNN_BENCHMARK: true
CUDNN_DETERMINISTIC: false
DATA:
EOS_INDEX: 2
IMAGE_CROP_SIZE: 224
IMAGE_TRANSFORM_TRAIN:
- random_resized_crop
- horizontal_flip
- color_jitter
- normalize
IMAGE_TRANSFORM_VAL:
- smallest_resize
- center_crop
- normalize
MASKED_LM:
MASK_PROBABILITY: 0.85
MASK_PROPORTION: 0.15
REPLACE_PROBABILITY: 0.1
MASK_INDEX: 3
MAX_CAPTION_LENGTH: 50
ROOT: datasets/redcaps/tarfiles/*.tar
SOS_INDEX: 1
TOKENIZER_MODEL: datasets/common_30k.model
UNK_INDEX: 0
USE_PERCENTAGE: 100.0
USE_SINGLE_CAPTION: false
VOCAB_SIZE: 30000
MODEL:
DECODER:
BEAM_SIZE: 5
MAX_DECODING_STEPS: 30
NAME: nucleus_sampling
NUCLEUS_SIZE: 0.9
LABEL_SMOOTHING: 0.1
NAME: virtex_web
TEXTUAL:
DROPOUT: 0.1
NAME: transdec_prenorm::L6_H512_A8_F2048
VISUAL:
FEATURE_SIZE: 2048
FROZEN: false
NAME: torchvision::resnet50
PRETRAINED: false
OPTIM:
BATCH_SIZE: 256
CLIP_GRAD_NORM: 10.0
CNN_LR: 0.0005
LOOKAHEAD:
ALPHA: 0.5
STEPS: 5
USE: false
LR: 0.0005
LR_DECAY_NAME: cosine
LR_GAMMA: 0.1
LR_STEPS: []
NO_DECAY: .*textual.(embedding|transformer).*(norm.*|bias)
NUM_ITERATIONS: 1500000
OPTIMIZER_NAME: adamw
SGD_MOMENTUM: 0.9
WARMUP_STEPS: 10000
WEIGHT_DECAY: 0.01
RANDOM_SEED: 0