|
|
|
|
|
|
|
|
|
|
|
model:
|
|
arch: blip2_opt
|
|
model_type: caption_coco_opt2.7b
|
|
load_finetuned: False
|
|
use_grad_checkpoint: True
|
|
freeze_vit: False
|
|
|
|
datasets:
|
|
coco_caption:
|
|
vis_processor:
|
|
train:
|
|
name: "blip2_image_train"
|
|
image_size: 364
|
|
eval:
|
|
name: "blip_image_eval"
|
|
image_size: 364
|
|
text_processor:
|
|
train:
|
|
name: "blip_caption"
|
|
prompt: "a photo of "
|
|
eval:
|
|
name: "blip_caption"
|
|
|
|
|
|
|
|
|
|
run:
|
|
task: captioning
|
|
|
|
lr_sched: "linear_warmup_cosine_lr"
|
|
init_lr: 1e-5
|
|
min_lr: 0
|
|
warmup_lr: 1e-8
|
|
warmup_steps: 1000
|
|
weight_decay: 0.05
|
|
max_epoch: 5
|
|
batch_size_train: 16
|
|
batch_size_eval: 8
|
|
num_workers: 4
|
|
accum_grad_iters: 1
|
|
|
|
max_len: 30
|
|
min_len: 8
|
|
num_beams: 5
|
|
|
|
seed: 42
|
|
output_dir: "output/BLIP2/Caption_coco"
|
|
|
|
amp: True
|
|
resume_ckpt_path: null
|
|
|
|
evaluate: False
|
|
train_splits: ["train"]
|
|
valid_splits: ["val"]
|
|
test_splits: ["test"]
|
|
|
|
device: "cuda"
|
|
world_size: 1
|
|
dist_url: "env://"
|
|
distributed: True
|
|
|