Spaces:
Build error
Build error
image_res: 224 | |
batch_size_train: 8 | |
batch_size_test: 64 | |
k_test: 64 | |
warm_up: True | |
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02, prompt_lr: 1e-5} | |
schedular: {sched: cosine, scheduler_groups: 0 , lr: 2e-5, epochs: 26, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 4, cooldown_epochs: 0} | |
use_vis_prefix: True | |
start_layer_idx: 19 | |
end_layer_idx: 31 | |
injected_hidden_states: 6 | |
lm_loss_weight: 0.1 | |
unfreeze_text_layer_norm: False | |
unfreeze_vision_layer_norm: False | |
num_workers: 4 | |
special_answer_token: '</a>' | |
replace_added_tokens: True | |
use_cache: False | |
connector_per_text_layer: False | |
text_step: 1 | |
num_beams: 3 | |
do_sample: False | |
# Prompt tuning | |
prompt_tuning: True | |
prompt_len: 10 | |
# video | |
modality: 'video' | |
dataset_name: 'msrvtt' | |
train_split: 'msrvtt_vqa_train' | |
val_split: 'msrvtt_vqa_val' | |
test_split: 'msrvtt_vqa_test' | |
num_frames: 8 | |
sample_type: 'rand' | |
num_tries: 1 | |
as_images: False | |
all_vis_tokens: False | |
vision_model_name: 'timesformer' | |
pretrained_model: '/gpfswork/rech/dyf/ugz83ue/.cache/torch/hub/checkpoints/TimeSformer_divST_8x32_224_K600.pyth' | |