vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/ vg_root: '/export/share/datasets/vision/visual-genome/' #followed by image/ train_files: ['vqa_train','vqa_val','vg_qa'] ann_root: 'annotation' # set pretrained as a file path or an url pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth' # size of vit model; base or large vit: 'base' batch_size_train: 16 batch_size_test: 32 vit_grad_ckpt: False vit_ckpt_layer: 0 init_lr: 2e-5 image_size: 480 k_test: 128 inference: 'rank' # optimizer weight_decay: 0.05 min_lr: 0 max_epoch: 10