Spaces:
Paused
Paused
model: | |
arch: video_llama | |
model_type: pretrain_vicuna | |
freeze_vit: True | |
freeze_qformer: True | |
max_txt_len: 512 | |
end_sym: "###" | |
low_resource: False | |
frozen_llama_proj: False | |
# If you want use LLaMA-2-chat, | |
# some ckpts could be download from our provided huggingface repo | |
# i.e. https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned | |
llama_model: "ckpt/vicuna-13b/" or "ckpt/vicuna-7b/" or "ckpt/llama-2-7b-chat-hf" or "ckpt/llama-2-13b-chat-hf" | |
ckpt: 'path/pretrained_visual_branch_ckpt' # you can use our pretrained ckpt from https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Pretrained/ | |
equip_audio_branch: False | |
fusion_head_layers: 2 | |
max_frame_pos: 32 | |
fusion_header_type: "seqTransf" | |
datasets: | |
webvid: | |
vis_processor: | |
train: | |
name: "alpro_video_eval" | |
n_frms: 8 | |
image_size: 224 | |
text_processor: | |
train: | |
name: "blip_caption" | |
run: | |
task: video_text_pretrain | |