{ "best_metric": NaN, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llama-7b-hf_TQA/checkpoint-200", "epoch": 0.6990715456034954, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 5.1e-05, "loss": 15278261862.4, "step": 20 }, { "epoch": 0.14, "learning_rate": 0.00010799999999999998, "loss": 155534963507.2, "step": 40 }, { "epoch": 0.21, "learning_rate": 0.000168, "loss": 26388629094.4, "step": 60 }, { "epoch": 0.28, "learning_rate": 0.00022799999999999999, "loss": 1740693708.8, "step": 80 }, { "epoch": 0.35, "learning_rate": 0.00028799999999999995, "loss": 353188315136.0, "step": 100 }, { "epoch": 0.42, "learning_rate": 0.0002936675461741425, "loss": 15921389568.0, "step": 120 }, { "epoch": 0.49, "learning_rate": 0.00028575197889182057, "loss": 8279447142.4, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.00027783641160949866, "loss": 31305360998.4, "step": 160 }, { "epoch": 0.63, "learning_rate": 0.00026992084432717674, "loss": 100943947366.4, "step": 180 }, { "epoch": 0.7, "learning_rate": 0.0002620052770448549, "loss": 1337943859.2, "step": 200 }, { "epoch": 0.7, "eval_loss": NaN, "eval_runtime": 33.6458, "eval_samples_per_second": 59.443, "eval_steps_per_second": 1.872, "step": 200 } ], "max_steps": 858, "num_train_epochs": 3, "total_flos": 2.0791367854467318e+18, "trial_name": null, "trial_params": null }