image_root: '/export/share/datasets/vision/nocaps/' | |
ann_root: 'annotation' | |
# set pretrained as a file path or an url | |
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth' | |
vit: 'base' | |
batch_size: 32 | |
image_size: 384 | |
max_length: 20 | |
min_length: 5 | |
num_beams: 3 | |
prompt: 'a picture of ' |