_BASE_: "Base.yaml" MODEL: TRAIN_TASK: ["DenseCap"] TEST_TASK: "DenseCap" MASK_ON: False ROI_HEADS: SOFT_NMS_ENABLED: False BEAM_SIZE: 1 WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth" BACKBONE: NAME: build_vit_fpn_backbone VIT_LAYERS: 12 SOLVER: VIT_LAYER_DECAY_RATE: 0.7 DATASETS: TRAIN: ("vg_train",) TEST: ("vg_test",) DATALOADER: DATASET_BS: 2 OUTPUT_DIR: "./output/GRiT_B_DenseCap"