_BASE_: "../_base_bicaptioning_R_50_L1_H1024.yaml" AMP: True DATA: ROOT: "datasets/sbu/tarfiles/*.tar" TOKENIZER_MODEL: "datasets/vocab/common_30k.model" VOCAB_SIZE: 30000 UNK_INDEX: 0 SOS_INDEX: 1 EOS_INDEX: 2 MASK_INDEX: 3 MAX_CAPTION_LENGTH: 50 MODEL: NAME: "miniclip_web" TEXTUAL: NAME: "transenc_prenorm::L12_H512_A8_F2048" LABEL_SMOOTHING: 0.1 OPTIM: OPTIMIZER_NAME: "adamw" WEIGHT_DECAY: 0.01 LOOKAHEAD: USE: false BATCH_SIZE: 256 CNN_LR: 0.0005 LR: 0.0005 NUM_ITERATIONS: 1500000 WARMUP_STEPS: 10000 LR_DECAY_NAME: "cosine"