vjepa-self-driving / params-encoder.yaml
jonathanzkoch's picture
Upload 4 files
41c6668 verified
raw
history blame
1.42 kB
app: vjepa
data:
batch_size: 8
clip_duration: null
crop_size: 224
dataset_type: VideoDataset
datasets:
- /path/to/dataset.csv
decode_one_clip: true
filter_short_videos: false
num_clips: 1
num_frames: 16
num_workers: 4
patch_size: 16
pin_mem: true
sampling_rate: 4
tubelet_size: 2
data_aug:
auto_augment: false
motion_shift: false
random_resize_aspect_ratio:
- 0.75
- 1.35
random_resize_scale:
- 0.3
- 1.0
reprob: 0.0
logging:
folder: /path/to/logs
write_tag: jepa
loss:
loss_exp: 1.0
reg_coeff: 0.0
mask:
- aspect_ratio:
- 0.75
- 1.5
max_keep: null
max_temporal_keep: 1.0
num_blocks: 8
spatial_scale:
- 0.15
- 0.15
temporal_scale:
- 1.0
- 1.0
- aspect_ratio:
- 0.75
- 1.5
max_keep: null
max_temporal_keep: 1.0
num_blocks: 2
spatial_scale:
- 0.7
- 0.7
temporal_scale:
- 1.0
- 1.0
meta:
dtype: bfloat16
eval_freq: 100
load_checkpoint: true
read_checkpoint: /path/to/vitl16.pth.tar
save_every_freq: 5
seed: 234
use_sdpa: true
model:
model_name: vit_large
pred_depth: 12
pred_embed_dim: 384
uniform_power: true
use_mask_tokens: true
zero_init_mask_tokens: true
nodes: 16
optimization:
clip_grad: 10.0
ema:
- 0.998
- 1.0
epochs: 25
final_lr: 1.0e-06
final_weight_decay: 0.4
ipe: 300
ipe_scale: 1.25
lr: 0.000625
start_lr: 0.0002
warmup: 40
weight_decay: 0.04
tasks_per_node: 8