{ "zero_optimization": { "stage": 2, "offload_optimizer": { "device": "cpu" }, "contiguous_gradients": true, "overlap_comm": true }, "optimizer": { "type": "AdamW", "params": { "lr": "auto", "betas": "auto", "eps": "auto", "weight_decay": "auto" } }, "train_micro_batch_size_per_gpu": "auto", "gradient_accumulation_steps": "auto" }