{ "best_metric": 1.734375, "best_model_checkpoint": "/home/zhengxinyong/outputs/bloom-1b1_ru_continual-pretrain_100000samples_-1vocab_original/checkpoint-25000", "epoch": 1.4637002341920375, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 0.0001, "loss": 2.2756, "step": 2500 }, { "epoch": 0.29, "learning_rate": 0.0001, "loss": 2.0005, "step": 5000 }, { "epoch": 0.29, "eval_loss": 1.9326171875, "eval_runtime": 5.8642, "eval_samples_per_second": 232.428, "eval_steps_per_second": 29.16, "step": 5000 }, { "epoch": 0.44, "learning_rate": 0.0001, "loss": 1.9088, "step": 7500 }, { "epoch": 0.59, "learning_rate": 0.0001, "loss": 1.8653, "step": 10000 }, { "epoch": 0.59, "eval_loss": 1.8349609375, "eval_runtime": 5.8764, "eval_samples_per_second": 231.946, "eval_steps_per_second": 29.1, "step": 10000 }, { "epoch": 0.73, "learning_rate": 0.0001, "loss": 1.8281, "step": 12500 }, { "epoch": 0.88, "learning_rate": 0.0001, "loss": 1.7953, "step": 15000 }, { "epoch": 0.88, "eval_loss": 1.7861328125, "eval_runtime": 5.856, "eval_samples_per_second": 232.753, "eval_steps_per_second": 29.201, "step": 15000 }, { "epoch": 1.02, "learning_rate": 0.0001, "loss": 1.7593, "step": 17500 }, { "epoch": 1.17, "learning_rate": 0.0001, "loss": 1.6602, "step": 20000 }, { "epoch": 1.17, "eval_loss": 1.7548828125, "eval_runtime": 5.8587, "eval_samples_per_second": 232.645, "eval_steps_per_second": 29.187, "step": 20000 }, { "epoch": 1.32, "learning_rate": 0.0001, "loss": 1.672, "step": 22500 }, { "epoch": 1.46, "learning_rate": 0.0001, "loss": 1.6639, "step": 25000 }, { "epoch": 1.46, "eval_loss": 1.734375, "eval_runtime": 5.8469, "eval_samples_per_second": 233.117, "eval_steps_per_second": 29.246, "step": 25000 }, { "epoch": 1.46, "step": 25000, "total_flos": 8.355380415011226e+17, "train_loss": 1.84288763671875, "train_runtime": 4774.5297, "train_samples_per_second": 41.889, "train_steps_per_second": 5.236 } ], "max_steps": 25000, "num_train_epochs": 2, "total_flos": 8.355380415011226e+17, "trial_name": null, "trial_params": null }