{ "best_metric": NaN, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_bloomz-7b1-mt_TQA/checkpoint-200", "epoch": 0.6990715456034954, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 5.9999999999999995e-05, "loss": 56.1213, "step": 20 }, { "epoch": 0.14, "learning_rate": 0.00011999999999999999, "loss": 46.3785, "step": 40 }, { "epoch": 0.21, "learning_rate": 0.00017999999999999998, "loss": 27.2008, "step": 60 }, { "epoch": 0.28, "learning_rate": 0.00023999999999999998, "loss": 58.3372, "step": 80 }, { "epoch": 0.35, "learning_rate": 0.0003, "loss": 21.5097, "step": 100 }, { "epoch": 0.42, "learning_rate": 0.00029208443271767806, "loss": 149.5568, "step": 120 }, { "epoch": 0.49, "learning_rate": 0.0002841688654353562, "loss": 12.1125, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.0002762532981530343, "loss": 64.6813, "step": 160 }, { "epoch": 0.63, "learning_rate": 0.00026833773087071237, "loss": 12.8244, "step": 180 }, { "epoch": 0.7, "learning_rate": 0.0002604221635883905, "loss": 13.6392, "step": 200 }, { "epoch": 0.7, "eval_loss": NaN, "eval_runtime": 39.2226, "eval_samples_per_second": 50.991, "eval_steps_per_second": 1.606, "step": 200 } ], "max_steps": 858, "num_train_epochs": 3, "total_flos": 1.887154186856956e+18, "trial_name": null, "trial_params": null }