File size: 613 Bytes
45d16e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
model:
  arch: video_llama
  model_type: pretrain_vicuna
  freeze_vit: True
  freeze_qformer: True
  max_txt_len: 512
  end_sym: "###"
  low_resource: False


  llama_model: "DAMO-NLP-SG/vicuna-7b"

  fusion_head_layers: 2
  max_frame_pos: 32
  fusion_header_type: "seqTransf"

  ckpt: 'ckpt/finetune-vicuna7b-v2-nofrozen_imageQ.pth'
  q_former_model: 'ckpt/blip2_pretrained_flant5xxl.pth'

datasets:
  webvid:
    vis_processor:
      train:
        name: "alpro_video_eval"
        n_frms: 8
        image_size: 224
    text_processor:
      train:
        name: "blip_caption"

run:
  task: video_text_pretrain