{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.303030303030305, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.6655681133270264, "eval_runtime": 31.9535, "eval_samples_per_second": 16.242, "eval_steps_per_second": 1.033, "step": 33 }, { "epoch": 2.0, "eval_loss": 0.34202826023101807, "eval_runtime": 32.6272, "eval_samples_per_second": 15.907, "eval_steps_per_second": 1.011, "step": 66 }, { "epoch": 3.0, "eval_loss": 0.1993253082036972, "eval_runtime": 34.1947, "eval_samples_per_second": 15.178, "eval_steps_per_second": 0.965, "step": 99 }, { "epoch": 4.0, "eval_loss": 0.12108779698610306, "eval_runtime": 33.5003, "eval_samples_per_second": 15.492, "eval_steps_per_second": 0.985, "step": 132 }, { "epoch": 5.0, "eval_loss": 0.08059267699718475, "eval_runtime": 33.0363, "eval_samples_per_second": 15.71, "eval_steps_per_second": 0.999, "step": 165 }, { "epoch": 6.0, "eval_loss": 0.0539543516933918, "eval_runtime": 33.3191, "eval_samples_per_second": 15.577, "eval_steps_per_second": 0.99, "step": 198 }, { "epoch": 7.0, "eval_loss": 0.033514220267534256, "eval_runtime": 33.9315, "eval_samples_per_second": 15.296, "eval_steps_per_second": 0.973, "step": 231 }, { "epoch": 8.0, "eval_loss": 0.02793893776834011, "eval_runtime": 33.1021, "eval_samples_per_second": 15.679, "eval_steps_per_second": 0.997, "step": 264 }, { "epoch": 9.0, "eval_loss": 0.017579322680830956, "eval_runtime": 33.1929, "eval_samples_per_second": 15.636, "eval_steps_per_second": 0.994, "step": 297 }, { "epoch": 10.0, "eval_loss": 0.017992401495575905, "eval_runtime": 33.5989, "eval_samples_per_second": 15.447, "eval_steps_per_second": 0.982, "step": 330 }, { "epoch": 11.0, "eval_loss": 0.011186002753674984, "eval_runtime": 33.2114, "eval_samples_per_second": 15.627, "eval_steps_per_second": 0.994, "step": 363 }, { "epoch": 12.0, "eval_loss": 0.009413644671440125, "eval_runtime": 33.6973, "eval_samples_per_second": 15.402, "eval_steps_per_second": 0.979, "step": 396 }, { "epoch": 13.0, "eval_loss": 0.008357277140021324, "eval_runtime": 34.2116, "eval_samples_per_second": 15.17, "eval_steps_per_second": 0.965, "step": 429 }, { "epoch": 14.0, "eval_loss": 0.006698057986795902, "eval_runtime": 32.3567, "eval_samples_per_second": 16.04, "eval_steps_per_second": 1.02, "step": 462 }, { "epoch": 15.0, "eval_loss": 0.005631112959235907, "eval_runtime": 30.3383, "eval_samples_per_second": 17.107, "eval_steps_per_second": 1.088, "step": 495 }, { "epoch": 15.15, "learning_rate": 1.2424242424242425e-05, "loss": 0.1575, "step": 500 }, { "epoch": 16.0, "eval_loss": 0.00457022013142705, "eval_runtime": 30.6053, "eval_samples_per_second": 16.958, "eval_steps_per_second": 1.078, "step": 528 }, { "epoch": 17.0, "eval_loss": 0.005157523322850466, "eval_runtime": 30.0767, "eval_samples_per_second": 17.256, "eval_steps_per_second": 1.097, "step": 561 }, { "epoch": 18.0, "eval_loss": 0.0044335490092635155, "eval_runtime": 29.0299, "eval_samples_per_second": 17.878, "eval_steps_per_second": 1.137, "step": 594 }, { "epoch": 19.0, "eval_loss": 0.003722449066117406, "eval_runtime": 28.4937, "eval_samples_per_second": 18.215, "eval_steps_per_second": 1.158, "step": 627 }, { "epoch": 20.0, "eval_loss": 0.004425578285008669, "eval_runtime": 32.1425, "eval_samples_per_second": 16.147, "eval_steps_per_second": 1.027, "step": 660 }, { "epoch": 21.0, "eval_loss": 0.0040681445971131325, "eval_runtime": 28.8069, "eval_samples_per_second": 18.017, "eval_steps_per_second": 1.146, "step": 693 }, { "epoch": 22.0, "eval_loss": 0.003019771073013544, "eval_runtime": 28.6404, "eval_samples_per_second": 18.121, "eval_steps_per_second": 1.152, "step": 726 }, { "epoch": 23.0, "eval_loss": 0.002829624805599451, "eval_runtime": 29.787, "eval_samples_per_second": 17.424, "eval_steps_per_second": 1.108, "step": 759 }, { "epoch": 24.0, "eval_loss": 0.002751641208305955, "eval_runtime": 28.377, "eval_samples_per_second": 18.289, "eval_steps_per_second": 1.163, "step": 792 }, { "epoch": 25.0, "eval_loss": 0.002945221494883299, "eval_runtime": 29.5958, "eval_samples_per_second": 17.536, "eval_steps_per_second": 1.115, "step": 825 }, { "epoch": 26.0, "eval_loss": 0.0026160639245063066, "eval_runtime": 29.0161, "eval_samples_per_second": 17.887, "eval_steps_per_second": 1.137, "step": 858 }, { "epoch": 27.0, "eval_loss": 0.002537393243983388, "eval_runtime": 28.4904, "eval_samples_per_second": 18.217, "eval_steps_per_second": 1.158, "step": 891 }, { "epoch": 28.0, "eval_loss": 0.00242584478110075, "eval_runtime": 29.437, "eval_samples_per_second": 17.631, "eval_steps_per_second": 1.121, "step": 924 }, { "epoch": 29.0, "eval_loss": 0.0026495754718780518, "eval_runtime": 28.3889, "eval_samples_per_second": 18.282, "eval_steps_per_second": 1.162, "step": 957 }, { "epoch": 30.0, "eval_loss": 0.0023259243462234735, "eval_runtime": 28.1977, "eval_samples_per_second": 18.406, "eval_steps_per_second": 1.17, "step": 990 }, { "epoch": 30.3, "learning_rate": 4.848484848484849e-06, "loss": 0.0065, "step": 1000 } ], "logging_steps": 500, "max_steps": 1320, "num_train_epochs": 40, "save_steps": 500, "total_flos": 278856790097838.0, "trial_name": null, "trial_params": null }