vjepa-self-driving / params-encoder.yaml
jonathanzkoch's picture
update demo files and embedding config
81cffda
app: vjepa
data:
crop_size: 224
decode_one_clip: true
filter_short_videos: false
num_clips: 1
num_frames: 16
num_workers: 4
patch_size: 16
pin_mem: true
sampling_rate: 4
tubelet_size: 1
data_aug:
auto_augment: false
motion_shift: false
random_resize_aspect_ratio:
- 0.75
- 1.35
random_resize_scale:
- 0.3
- 1.0
reprob: 0.0
logging:
folder: /media/rpal/Drive_10TB/John/jepa/logs
write_tag: jepa
mask:
- aspect_ratio:
- 0.75
- 1.5
max_keep: null
max_temporal_keep: 1.0
num_blocks: 8
spatial_scale:
- 0.15
- 0.15
temporal_scale:
- 1.0
- 1.0
- aspect_ratio:
- 0.75
- 1.5
max_keep: null
max_temporal_keep: 1.0
num_blocks: 2
spatial_scale:
- 0.7
- 0.7
temporal_scale:
- 1.0
- 1.0
meta:
dtype: bfloat16
eval_freq: 100
load_checkpoint: true
read_checkpoint: /media/rpal/Drive_10TB/John/jepa/huggingface/jepa-latest.pth.tar
save_every_freq: 5
seed: 234
use_sdpa: false
model:
model_name: vit_large
pred_depth: 12
pred_embed_dim: 384
uniform_power: true
use_mask_tokens: true
zero_init_mask_tokens: true