{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9773260359655981, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 2.8373729476153247e-05, "loss": 1.6126, "step": 5000 }, { "epoch": 0.16, "eval_exact_match": 68.15336463223787, "eval_f1": 73.98256617857639, "eval_loss": 1.189887523651123, "eval_runtime": 1542.7266, "eval_samples_per_second": 7.463, "eval_steps_per_second": 3.732, "step": 5000 }, { "epoch": 0.33, "learning_rate": 2.674550430023456e-05, "loss": 1.1965, "step": 10000 }, { "epoch": 0.33, "eval_exact_match": 75.13475917231786, "eval_f1": 79.95184555682803, "eval_loss": 0.9056070446968079, "eval_runtime": 1533.9546, "eval_samples_per_second": 7.505, "eval_steps_per_second": 3.753, "step": 10000 }, { "epoch": 0.49, "learning_rate": 2.511695334897055e-05, "loss": 1.0769, "step": 15000 }, { "epoch": 0.49, "eval_exact_match": 77.56042427403929, "eval_f1": 82.22503770504048, "eval_loss": 0.9495312571525574, "eval_runtime": 1533.818, "eval_samples_per_second": 7.506, "eval_steps_per_second": 3.753, "step": 15000 }, { "epoch": 0.65, "learning_rate": 2.3488728173051864e-05, "loss": 0.9992, "step": 20000 }, { "epoch": 0.65, "eval_exact_match": 81.09024517475221, "eval_f1": 85.49992934663945, "eval_loss": 0.8323877453804016, "eval_runtime": 1541.3611, "eval_samples_per_second": 7.469, "eval_steps_per_second": 3.735, "step": 20000 }, { "epoch": 0.81, "learning_rate": 2.1860502997133176e-05, "loss": 0.9329, "step": 25000 }, { "epoch": 0.81, "eval_exact_match": 81.264127977743, "eval_f1": 85.43481659242194, "eval_loss": 0.7231199741363525, "eval_runtime": 1535.496, "eval_samples_per_second": 7.498, "eval_steps_per_second": 3.749, "step": 25000 }, { "epoch": 0.98, "learning_rate": 2.0232277821214492e-05, "loss": 0.8913, "step": 30000 }, { "epoch": 0.98, "eval_exact_match": 81.92488262910798, "eval_f1": 85.96065424027016, "eval_loss": 0.7569286227226257, "eval_runtime": 1529.0019, "eval_samples_per_second": 7.53, "eval_steps_per_second": 3.765, "step": 30000 } ], "max_steps": 92088, "num_train_epochs": 3, "total_flos": 6.619410874368e+16, "trial_name": null, "trial_params": null }