{ "best_metric": 3.5487985610961914, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-25000", "epoch": 181.15855855855855, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 18.12, "learning_rate": 9e-05, "loss": 4.2544, "step": 2500 }, { "epoch": 36.23, "learning_rate": 8e-05, "loss": 3.9123, "step": 5000 }, { "epoch": 36.23, "eval_loss": 3.843057632446289, "eval_runtime": 513.4541, "eval_samples_per_second": 9.555, "eval_steps_per_second": 4.777, "step": 5000 }, { "epoch": 54.35, "learning_rate": 7e-05, "loss": 3.7584, "step": 7500 }, { "epoch": 72.46, "learning_rate": 6e-05, "loss": 3.6674, "step": 10000 }, { "epoch": 72.46, "eval_loss": 3.6707117557525635, "eval_runtime": 512.9035, "eval_samples_per_second": 9.565, "eval_steps_per_second": 4.783, "step": 10000 }, { "epoch": 90.58, "learning_rate": 5e-05, "loss": 3.6049, "step": 12500 }, { "epoch": 108.69, "learning_rate": 4e-05, "loss": 3.5633, "step": 15000 }, { "epoch": 108.69, "eval_loss": 3.59525203704834, "eval_runtime": 512.888, "eval_samples_per_second": 9.565, "eval_steps_per_second": 4.783, "step": 15000 }, { "epoch": 126.81, "learning_rate": 3e-05, "loss": 3.5333, "step": 17500 }, { "epoch": 144.92, "learning_rate": 2e-05, "loss": 3.5125, "step": 20000 }, { "epoch": 144.92, "eval_loss": 3.559532403945923, "eval_runtime": 512.8641, "eval_samples_per_second": 9.566, "eval_steps_per_second": 4.783, "step": 20000 }, { "epoch": 163.04, "learning_rate": 1e-05, "loss": 3.501, "step": 22500 }, { "epoch": 181.16, "learning_rate": 0.0, "loss": 3.493, "step": 25000 }, { "epoch": 181.16, "eval_loss": 3.5487985610961914, "eval_runtime": 512.9062, "eval_samples_per_second": 9.565, "eval_steps_per_second": 4.783, "step": 25000 }, { "epoch": 181.16, "step": 25000, "total_flos": 3.7349994596715725e+17, "train_loss": 3.6800425, "train_runtime": 52880.0792, "train_samples_per_second": 3.782, "train_steps_per_second": 0.473 } ], "max_steps": 25000, "num_train_epochs": 182, "total_flos": 3.7349994596715725e+17, "trial_name": null, "trial_params": null }