| { |
| "adamw_lr": null, |
| "adamw_weight_decay": null, |
| "autoencoder_rae_pretrained_name": "nyu-visionx/RAE-dinov2-wReg-base-ViTXL-n08", |
| "batch_size": 128, |
| "checkpoint_dir": "checkpoints/train_2026-05-06_14-11-11", |
| "checkpoint_every": 2000, |
| "cls_register_loss_weight": 0.1, |
| "decoder_head_dim": 64, |
| "decoder_intermediate_size": 3072, |
| "decoder_num_heads": 12, |
| "decoder_num_layers": 8, |
| "decoder_pos_embed_side": 16, |
| "disable_wandb": false, |
| "dropout": 0.1, |
| "grad_accum_steps": 1, |
| "image_size": 224, |
| "log_every": 10, |
| "lr": 0.0001, |
| "max_grad_norm": 1.0, |
| "max_steps": 50000, |
| "mixed_precision": "bf16", |
| "mixture_name": "default", |
| "muon_lr": null, |
| "muon_weight_decay": null, |
| "num_proc": 16, |
| "num_shards": null, |
| "num_vis": 16, |
| "num_workers": 4, |
| "pretrained_backbone_name": "facebook/dinov2-with-registers-base", |
| "pretrained_path": null, |
| "recon_loss_weight": 1.0, |
| "redistributor_latent_size": 64, |
| "refiner_head_dim": 64, |
| "refiner_intermediate_size": 3072, |
| "refiner_num_heads": 12, |
| "refiner_num_layers": 12, |
| "resume": "checkpoints/train_2026-05-06_12-27-28/step_14000", |
| "seed": 43, |
| "shuffle_buffer": 10000, |
| "steps_per_epoch": null, |
| "train_encoder": true, |
| "uncertainty_loss_weight": 0.001, |
| "use_muon": true, |
| "val_every": 500, |
| "val_steps": 100, |
| "wandb_mode": null, |
| "wandb_project": "finevit-ae", |
| "wandb_run_name": null, |
| "warmup_steps": 500, |
| "weight_decay": 0.01 |
| } |