|
all_vis_tokens: false |
|
append_eos_token: true |
|
as_images: false |
|
batch_size_test: 8 |
|
batch_size_train: 16 |
|
dataset_name: msrvtt |
|
end_layer_idx: 31 |
|
image_res: 224 |
|
injected_hidden_states: 6 |
|
lm_loss_weight: 0.1 |
|
modality: video |
|
num_frames: 16 |
|
num_tries: 1 |
|
num_workers: 4 |
|
optimizer: {lr: 2e-05, opt: adamW, prompt_lr: 1e-05, weight_decay: 0.02} |
|
pretrained_model: /gpfswork/rech/dyf/ugz83ue/.cache/torch/hub/checkpoints/TimeSformer_divST_8x32_224_K600.pyth |
|
prompt_len: 10 |
|
prompt_tuning: true |
|
replace_added_tokens: true |
|
sample_type: rand |
|
schedular: {cooldown_epochs: 0, decay_rate: 1, epochs: 26, lr: 2e-05, min_lr: 1e-06, |
|
sched: cosine, scheduler_groups: 0, warmup_epochs: 4, warmup_lr: 1e-05} |
|
shift_labels: false |
|
start_layer_idx: 19 |
|
test_split: msrvtt_caption_test |
|
train_split: msrvtt_caption_train7k |
|
unfreeze_text_layer_norm: false |
|
unfreeze_vision_layer_norm: false |
|
use_cache: false |
|
use_vis_prefix: true |
|
val_split: msrvtt_caption_test |
|
vision_model_name: timesformer |
|
warm_up: true |
|
|