{ "optim": { "type": "AdamW", "lr": 0.001, "betas": [ 0.9, 0.98 ], "eps": 1e-12, "weight_decay": 0.01, "amsgrad": false, "fused": null }, "optim_mod": { "name": "none" }, "name": "bert-o4", "limited_decay_keys": [ "bias", "LayerNorm.bias", "LayerNorm.weight", "norm" ], "warmup_steps": 0, "cooldown_steps": 0, "steps": 900000, "scheduler": "budget-triangle2", "batch_size": 8192, "batch_size_ramp": 0.6, "gradient_clipping": 0.5, "pretrain_in_train_mode": false, "objective": { "name": "masked-lm", "mlm_probability": 0.25, "use_80_20_rule": true, "disable_mlm": false, "token_drop": 0.0 }, "reverse_dataset_order": false, "budget": 24 }