app: vjepa | |
data: | |
crop_size: 224 | |
decode_one_clip: true | |
filter_short_videos: false | |
num_clips: 1 | |
num_frames: 16 | |
num_workers: 4 | |
patch_size: 16 | |
pin_mem: true | |
sampling_rate: 4 | |
tubelet_size: 1 | |
data_aug: | |
auto_augment: false | |
motion_shift: false | |
random_resize_aspect_ratio: | |
- 0.75 | |
- 1.35 | |
random_resize_scale: | |
- 0.3 | |
- 1.0 | |
reprob: 0.0 | |
logging: | |
folder: /media/rpal/Drive_10TB/John/jepa/logs | |
write_tag: jepa | |
mask: | |
- aspect_ratio: | |
- 0.75 | |
- 1.5 | |
max_keep: null | |
max_temporal_keep: 1.0 | |
num_blocks: 8 | |
spatial_scale: | |
- 0.15 | |
- 0.15 | |
temporal_scale: | |
- 1.0 | |
- 1.0 | |
- aspect_ratio: | |
- 0.75 | |
- 1.5 | |
max_keep: null | |
max_temporal_keep: 1.0 | |
num_blocks: 2 | |
spatial_scale: | |
- 0.7 | |
- 0.7 | |
temporal_scale: | |
- 1.0 | |
- 1.0 | |
meta: | |
dtype: bfloat16 | |
eval_freq: 100 | |
load_checkpoint: true | |
read_checkpoint: /media/rpal/Drive_10TB/John/jepa/huggingface/jepa-latest.pth.tar | |
save_every_freq: 5 | |
seed: 234 | |
use_sdpa: false | |
model: | |
model_name: vit_large | |
pred_depth: 12 | |
pred_embed_dim: 384 | |
uniform_power: true | |
use_mask_tokens: true | |
zero_init_mask_tokens: true | |