aa: rand-m7-inc1-mstd1.0-n3 amp: true amp_dtype: float16 amp_impl: native aug_repeats: 3.0 aug_splits: 0 batch_size: 256 bce_loss: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: null cooldown_epochs: 10 crop_pct: 0.95 cutmix: 1.0 cutmix_minmax: null data: null data_dir: /data/imagenet/ dataset: imagenet dataset_download: false decay_epochs: 100 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 dist_bn: reduce drop: 0.0 drop_block: null drop_connect: null drop_path: 0.2 epoch_repeats: 0.0 epochs: 600 eval_metric: top1 experiment: '' fast_norm: false force_cpu: false fuser: '' gp: null grad_accum_steps: 1 grad_checkpointing: false head_init_bias: null head_init_scale: null hflip: 0.5 img_size: 256 in_chans: null initial_checkpoint: '' input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.0008 lr_base_scale: '' lr_base_size: 512 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 5.0e-07 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: vit_betwixt_patch16_rope_reg4_gap_256 model_ema: false model_ema_decay: 0.9998 model_ema_force_cpu: false model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: nadamw opt_betas: null opt_eps: 1.0e-08 opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: true pretrained: false ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.2 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 0 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false torchcompile: inductor torchscript: false train_interpolation: random train_split: train tta: 0 use_multi_epochs_loader: false val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 5.0e-07 warmup_prefix: false weight_decay: 0.08 worker_seeding: all workers: 8