{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 7647, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 2.8038446449588075e-05, "loss": 2.5909, "step": 500 }, { "epoch": 0.39, "learning_rate": 2.6076892899176145e-05, "loss": 2.3461, "step": 1000 }, { "epoch": 0.59, "learning_rate": 2.4115339348764223e-05, "loss": 2.2111, "step": 1500 }, { "epoch": 0.78, "learning_rate": 2.2153785798352297e-05, "loss": 2.1523, "step": 2000 }, { "epoch": 0.98, "learning_rate": 2.019223224794037e-05, "loss": 2.0715, "step": 2500 }, { "epoch": 1.0, "eval_loss": 1.9488946199417114, "eval_runtime": 854.0815, "eval_samples_per_second": 2.984, "eval_steps_per_second": 0.747, "step": 2549 }, { "epoch": 1.18, "learning_rate": 1.8230678697528442e-05, "loss": 1.4507, "step": 3000 }, { "epoch": 1.37, "learning_rate": 1.6269125147116516e-05, "loss": 1.3564, "step": 3500 }, { "epoch": 1.57, "learning_rate": 1.430757159670459e-05, "loss": 1.3545, "step": 4000 }, { "epoch": 1.77, "learning_rate": 1.2346018046292664e-05, "loss": 1.3372, "step": 4500 }, { "epoch": 1.96, "learning_rate": 1.0384464495880738e-05, "loss": 1.3039, "step": 5000 }, { "epoch": 2.0, "eval_loss": 1.8286019563674927, "eval_runtime": 849.7166, "eval_samples_per_second": 3.0, "eval_steps_per_second": 0.751, "step": 5098 }, { "epoch": 2.16, "learning_rate": 8.42291094546881e-06, "loss": 0.987, "step": 5500 }, { "epoch": 2.35, "learning_rate": 6.461357395056885e-06, "loss": 0.8986, "step": 6000 }, { "epoch": 2.55, "learning_rate": 4.4998038446449585e-06, "loss": 0.9008, "step": 6500 }, { "epoch": 2.75, "learning_rate": 2.538250294233033e-06, "loss": 0.8903, "step": 7000 }, { "epoch": 2.94, "learning_rate": 5.766967438211064e-07, "loss": 0.8313, "step": 7500 }, { "epoch": 3.0, "eval_loss": 1.8424451351165771, "eval_runtime": 846.7842, "eval_samples_per_second": 3.01, "eval_steps_per_second": 0.753, "step": 7647 } ], "logging_steps": 500, "max_steps": 7647, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.6566780520562688e+16, "trial_name": null, "trial_params": null }