# Copyright (c) 2022, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause model: arch: sevila model_type: pretrain_flant5xl load_finetuned: True finetuned: 'checkpoints/blip2_pretrained_flant5xl.pth' use_grad_checkpoint: False freeze_vit: True frame_num: 8 answer_num: 4 task: freeze_qa_vid qformer_input_text: False datasets: how2qa: vis_processor: eval: name: "blip_video_eval" n_frms: 4 image_size: 224 text_processor: eval: name: "blip_question" max_words: 50 run: task: videoqa # optimizer lr_sched: "linear_warmup_cosine_lr" init_lr: 1e-5 min_lr: 0 warmup_lr: 1e-8 warmup_steps: 200 weight_decay: 0.05 max_epoch: 5 batch_size_train: 32 batch_size_eval: 32 num_workers: 8 accum_grad_iters: 1 max_len: 30 min_len: 8 num_beams: 5 seed: 42 output_dir: "" amp: True resume_ckpt_path: null evaluate: False train_splits: ["train"] valid_splits: ["val"] test_splits: ["val"] device: "cuda" world_size: 1 dist_url: "env://" distributed: True find_unused_parameters: True