includes: projects/task/ft.yaml | |
dataset: | |
meta_processor: CrossTaskMetaProcessor | |
train_path: data/crosstask/crosstask_release/videos.csv # dummy | |
train_csv_path: data/crosstask/crosstask_release/videos.csv | |
val_path: data/crosstask/crosstask_release/videos_val.csv # dummy | |
val_csv_path: data/crosstask/crosstask_release/videos_val.csv | |
primary_path: data/crosstask/crosstask_release/tasks_primary.txt | |
related_path: data/crosstask/crosstask_release/tasks_related.txt | |
vfeat_dir: data/feat/feat_crosstask_s3d | |
annotation_path: data/crosstask/crosstask_release/annotations | |
n_train: 30 | |
video_processor: CrossTaskVideoProcessor | |
text_processor: CrossTaskTextProcessor | |
aligner: CrossTaskAligner | |
num_iso_layer: 12 | |
sliding_window: 16 | |
sliding_window_size: 32 | |
model: | |
model_cls: MMFusionActionLocalization | |
mm_encoder_cls: MMBertForJoint | |
loss: | |
loss_cls: BCE | |
fairseq: | |
dataset: | |
batch_size: 1 | |
optimization: | |
max_epoch: 5 | |
checkpoint: | |
save_dir: runs/task/crosstask | |
restore_file: runs/task/checkpoint11.pt # for VLM | |