|
|
|
|
|
|
|
|
|
|
|
|
|
model:
|
|
arch: blip2_t5
|
|
model_type: pretrain_flant5xl
|
|
use_grad_checkpoint: False
|
|
|
|
datasets:
|
|
gqa:
|
|
type: balanced_testdev
|
|
vis_processor:
|
|
eval:
|
|
name: "blip_image_eval"
|
|
image_size: 224
|
|
text_processor:
|
|
eval:
|
|
name: "blip_question"
|
|
build_info:
|
|
images:
|
|
storage: "/export/share/datasets/vision/GQA/images/"
|
|
|
|
run:
|
|
task: gqa
|
|
|
|
batch_size_train: 16
|
|
batch_size_eval: 64
|
|
num_workers: 4
|
|
|
|
|
|
max_len: 10
|
|
min_len: 1
|
|
num_beams: 5
|
|
inference_method: "generate"
|
|
prompt: "Question: {} Short answer:"
|
|
|
|
seed: 42
|
|
output_dir: "output/BLIP2/GQA"
|
|
|
|
evaluate: True
|
|
test_splits: ["val"]
|
|
|
|
|
|
device: "cuda"
|
|
world_size: 1
|
|
dist_url: "env://"
|
|
distributed: True
|
|
|