{ "optim": { "type": "AdamW", "lr": 0.0005, "betas": [ 0.9, 0.98 ], "eps": 1e-12, "weight_decay": 0.01, "amsgrad": false, "fused": null }, "optim_mod": { "name": "none" }, "name": "bert-base", "limited_decay_keys": [ "bias", "LayerNorm.bias", "LayerNorm.weight", "norm" ], "warmup_steps": 30000, "cooldown_steps": 0, "steps": 900000, "scheduler": "budget-cosine-decay", "batch_size": 1536, "batch_size_ramp": 0, "gradient_clipping": null, "pretrain_in_train_mode": false, "objective": { "name": "masked-lm", "mlm_probability": 0.15, "use_80_20_rule": true, "disable_mlm": false, "token_drop": 0.0 }, "reverse_dataset_order": false, "budget": 24 }