{ "optim": { "type": "AdamW", "lr": 0.001, "betas": [ 0.9, 0.98 ], "eps": 1e-12, "weight_decay": 0.01, "amsgrad": false }, "optim_mod": { "name": "none" }, "name": "bert-o3", "limited_decay_keys": [ "bias", "LayerNorm.bias", "LayerNorm.weight" ], "warmup_steps": 0, "cooldown_steps": 0, "steps": 600000, "scheduler": "budget-triangle2", "batch_size": 4096, "batch_size_ramp": 300000, "gradient_clipping": 0.5, "pretrain_in_train_mode": false, "objective": { "name": "masked-lm", "mlm_probability": 0.15, "use_80_20_rule": true, "disable_mlm": false, "token_drop": 0.0 }, "reverse_dataset_order": false, "budget": 24, "gradinit": { "enabled": false } }