{ "best_metric": 2.206695079803467, "best_model_checkpoint": "detr-r50-finetuned-mist1-gb-4ah-6l/checkpoint-5060", "epoch": 50.0, "eval_steps": 500, "global_step": 5750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 9.812173913043479e-06, "loss": 3.4044, "step": 115 }, { "epoch": 1.0, "eval_loss": 3.004835844039917, "eval_runtime": 5.8385, "eval_samples_per_second": 6.851, "eval_steps_per_second": 0.856, "step": 115 }, { "epoch": 2.0, "learning_rate": 9.612173913043479e-06, "loss": 3.1708, "step": 230 }, { "epoch": 2.0, "eval_loss": 2.902806520462036, "eval_runtime": 5.7867, "eval_samples_per_second": 6.912, "eval_steps_per_second": 0.864, "step": 230 }, { "epoch": 3.0, "learning_rate": 9.412173913043479e-06, "loss": 3.0756, "step": 345 }, { "epoch": 3.0, "eval_loss": 2.853818416595459, "eval_runtime": 5.8759, "eval_samples_per_second": 6.808, "eval_steps_per_second": 0.851, "step": 345 }, { "epoch": 4.0, "learning_rate": 9.21217391304348e-06, "loss": 2.9769, "step": 460 }, { "epoch": 4.0, "eval_loss": 2.814894914627075, "eval_runtime": 5.8445, "eval_samples_per_second": 6.844, "eval_steps_per_second": 0.855, "step": 460 }, { "epoch": 5.0, "learning_rate": 9.01217391304348e-06, "loss": 2.8999, "step": 575 }, { "epoch": 5.0, "eval_loss": 2.7331762313842773, "eval_runtime": 5.9217, "eval_samples_per_second": 6.755, "eval_steps_per_second": 0.844, "step": 575 }, { "epoch": 6.0, "learning_rate": 8.81217391304348e-06, "loss": 2.8609, "step": 690 }, { "epoch": 6.0, "eval_loss": 2.7212414741516113, "eval_runtime": 5.879, "eval_samples_per_second": 6.804, "eval_steps_per_second": 0.85, "step": 690 }, { "epoch": 7.0, "learning_rate": 8.613913043478262e-06, "loss": 2.8338, "step": 805 }, { "epoch": 7.0, "eval_loss": 2.689382553100586, "eval_runtime": 5.8299, "eval_samples_per_second": 6.861, "eval_steps_per_second": 0.858, "step": 805 }, { "epoch": 8.0, "learning_rate": 8.413913043478262e-06, "loss": 2.8103, "step": 920 }, { "epoch": 8.0, "eval_loss": 2.704505681991577, "eval_runtime": 5.7873, "eval_samples_per_second": 6.912, "eval_steps_per_second": 0.864, "step": 920 }, { "epoch": 9.0, "learning_rate": 8.21391304347826e-06, "loss": 2.8036, "step": 1035 }, { "epoch": 9.0, "eval_loss": 2.778614044189453, "eval_runtime": 5.7444, "eval_samples_per_second": 6.963, "eval_steps_per_second": 0.87, "step": 1035 }, { "epoch": 10.0, "learning_rate": 8.013913043478262e-06, "loss": 2.7486, "step": 1150 }, { "epoch": 10.0, "eval_loss": 2.688080310821533, "eval_runtime": 5.7616, "eval_samples_per_second": 6.943, "eval_steps_per_second": 0.868, "step": 1150 }, { "epoch": 11.0, "learning_rate": 7.813913043478263e-06, "loss": 2.7076, "step": 1265 }, { "epoch": 11.0, "eval_loss": 2.605870485305786, "eval_runtime": 5.8415, "eval_samples_per_second": 6.848, "eval_steps_per_second": 0.856, "step": 1265 }, { "epoch": 12.0, "learning_rate": 7.615652173913044e-06, "loss": 2.7156, "step": 1380 }, { "epoch": 12.0, "eval_loss": 2.648322343826294, "eval_runtime": 5.8314, "eval_samples_per_second": 6.859, "eval_steps_per_second": 0.857, "step": 1380 }, { "epoch": 13.0, "learning_rate": 7.415652173913044e-06, "loss": 2.6655, "step": 1495 }, { "epoch": 13.0, "eval_loss": 2.5438005924224854, "eval_runtime": 5.7667, "eval_samples_per_second": 6.936, "eval_steps_per_second": 0.867, "step": 1495 }, { "epoch": 14.0, "learning_rate": 7.215652173913043e-06, "loss": 2.6368, "step": 1610 }, { "epoch": 14.0, "eval_loss": 2.5342297554016113, "eval_runtime": 5.7673, "eval_samples_per_second": 6.936, "eval_steps_per_second": 0.867, "step": 1610 }, { "epoch": 15.0, "learning_rate": 7.015652173913044e-06, "loss": 2.5982, "step": 1725 }, { "epoch": 15.0, "eval_loss": 2.5287182331085205, "eval_runtime": 5.8321, "eval_samples_per_second": 6.859, "eval_steps_per_second": 0.857, "step": 1725 }, { "epoch": 16.0, "learning_rate": 6.815652173913045e-06, "loss": 2.6116, "step": 1840 }, { "epoch": 16.0, "eval_loss": 2.4446094036102295, "eval_runtime": 5.7529, "eval_samples_per_second": 6.953, "eval_steps_per_second": 0.869, "step": 1840 }, { "epoch": 17.0, "learning_rate": 6.615652173913044e-06, "loss": 2.5592, "step": 1955 }, { "epoch": 17.0, "eval_loss": 2.4365103244781494, "eval_runtime": 5.7499, "eval_samples_per_second": 6.957, "eval_steps_per_second": 0.87, "step": 1955 }, { "epoch": 18.0, "learning_rate": 6.415652173913044e-06, "loss": 2.5528, "step": 2070 }, { "epoch": 18.0, "eval_loss": 2.484395980834961, "eval_runtime": 5.7801, "eval_samples_per_second": 6.92, "eval_steps_per_second": 0.865, "step": 2070 }, { "epoch": 19.0, "learning_rate": 6.215652173913044e-06, "loss": 2.5248, "step": 2185 }, { "epoch": 19.0, "eval_loss": 2.4194891452789307, "eval_runtime": 5.7656, "eval_samples_per_second": 6.938, "eval_steps_per_second": 0.867, "step": 2185 }, { "epoch": 20.0, "learning_rate": 6.015652173913044e-06, "loss": 2.4853, "step": 2300 }, { "epoch": 20.0, "eval_loss": 2.453843832015991, "eval_runtime": 5.8034, "eval_samples_per_second": 6.893, "eval_steps_per_second": 0.862, "step": 2300 }, { "epoch": 21.0, "learning_rate": 5.815652173913045e-06, "loss": 2.5295, "step": 2415 }, { "epoch": 21.0, "eval_loss": 2.569608211517334, "eval_runtime": 5.8097, "eval_samples_per_second": 6.885, "eval_steps_per_second": 0.861, "step": 2415 }, { "epoch": 22.0, "learning_rate": 5.615652173913044e-06, "loss": 2.5069, "step": 2530 }, { "epoch": 22.0, "eval_loss": 2.4536538124084473, "eval_runtime": 5.7892, "eval_samples_per_second": 6.909, "eval_steps_per_second": 0.864, "step": 2530 }, { "epoch": 23.0, "learning_rate": 5.4156521739130445e-06, "loss": 2.4504, "step": 2645 }, { "epoch": 23.0, "eval_loss": 2.515150547027588, "eval_runtime": 5.8674, "eval_samples_per_second": 6.817, "eval_steps_per_second": 0.852, "step": 2645 }, { "epoch": 24.0, "learning_rate": 5.215652173913044e-06, "loss": 2.4447, "step": 2760 }, { "epoch": 24.0, "eval_loss": 2.4431958198547363, "eval_runtime": 5.9181, "eval_samples_per_second": 6.759, "eval_steps_per_second": 0.845, "step": 2760 }, { "epoch": 25.0, "learning_rate": 5.015652173913044e-06, "loss": 2.4303, "step": 2875 }, { "epoch": 25.0, "eval_loss": 2.4032533168792725, "eval_runtime": 5.9198, "eval_samples_per_second": 6.757, "eval_steps_per_second": 0.845, "step": 2875 }, { "epoch": 26.0, "learning_rate": 4.815652173913044e-06, "loss": 2.4137, "step": 2990 }, { "epoch": 26.0, "eval_loss": 2.3795552253723145, "eval_runtime": 5.8418, "eval_samples_per_second": 6.847, "eval_steps_per_second": 0.856, "step": 2990 }, { "epoch": 27.0, "learning_rate": 4.615652173913044e-06, "loss": 2.41, "step": 3105 }, { "epoch": 27.0, "eval_loss": 2.3598742485046387, "eval_runtime": 5.8868, "eval_samples_per_second": 6.795, "eval_steps_per_second": 0.849, "step": 3105 }, { "epoch": 28.0, "learning_rate": 4.415652173913044e-06, "loss": 2.3816, "step": 3220 }, { "epoch": 28.0, "eval_loss": 2.401759386062622, "eval_runtime": 5.8373, "eval_samples_per_second": 6.852, "eval_steps_per_second": 0.857, "step": 3220 }, { "epoch": 29.0, "learning_rate": 4.215652173913044e-06, "loss": 2.3752, "step": 3335 }, { "epoch": 29.0, "eval_loss": 2.3115689754486084, "eval_runtime": 5.8962, "eval_samples_per_second": 6.784, "eval_steps_per_second": 0.848, "step": 3335 }, { "epoch": 30.0, "learning_rate": 4.0156521739130435e-06, "loss": 2.3929, "step": 3450 }, { "epoch": 30.0, "eval_loss": 2.310469150543213, "eval_runtime": 5.8122, "eval_samples_per_second": 6.882, "eval_steps_per_second": 0.86, "step": 3450 }, { "epoch": 31.0, "learning_rate": 3.815652173913044e-06, "loss": 2.3791, "step": 3565 }, { "epoch": 31.0, "eval_loss": 2.367689371109009, "eval_runtime": 5.837, "eval_samples_per_second": 6.853, "eval_steps_per_second": 0.857, "step": 3565 }, { "epoch": 32.0, "learning_rate": 3.615652173913044e-06, "loss": 2.3639, "step": 3680 }, { "epoch": 32.0, "eval_loss": 2.431199550628662, "eval_runtime": 5.8179, "eval_samples_per_second": 6.875, "eval_steps_per_second": 0.859, "step": 3680 }, { "epoch": 33.0, "learning_rate": 3.4156521739130437e-06, "loss": 2.3475, "step": 3795 }, { "epoch": 33.0, "eval_loss": 2.3051953315734863, "eval_runtime": 5.7888, "eval_samples_per_second": 6.91, "eval_steps_per_second": 0.864, "step": 3795 }, { "epoch": 34.0, "learning_rate": 3.2156521739130435e-06, "loss": 2.3429, "step": 3910 }, { "epoch": 34.0, "eval_loss": 2.322197675704956, "eval_runtime": 5.7936, "eval_samples_per_second": 6.904, "eval_steps_per_second": 0.863, "step": 3910 }, { "epoch": 35.0, "learning_rate": 3.015652173913044e-06, "loss": 2.3115, "step": 4025 }, { "epoch": 35.0, "eval_loss": 2.3126182556152344, "eval_runtime": 5.8416, "eval_samples_per_second": 6.847, "eval_steps_per_second": 0.856, "step": 4025 }, { "epoch": 36.0, "learning_rate": 2.815652173913044e-06, "loss": 2.3276, "step": 4140 }, { "epoch": 36.0, "eval_loss": 2.3154168128967285, "eval_runtime": 5.7988, "eval_samples_per_second": 6.898, "eval_steps_per_second": 0.862, "step": 4140 }, { "epoch": 37.0, "learning_rate": 2.6156521739130438e-06, "loss": 2.3126, "step": 4255 }, { "epoch": 37.0, "eval_loss": 2.353442430496216, "eval_runtime": 5.7487, "eval_samples_per_second": 6.958, "eval_steps_per_second": 0.87, "step": 4255 }, { "epoch": 38.0, "learning_rate": 2.417391304347826e-06, "loss": 2.2934, "step": 4370 }, { "epoch": 38.0, "eval_loss": 2.2566468715667725, "eval_runtime": 5.902, "eval_samples_per_second": 6.777, "eval_steps_per_second": 0.847, "step": 4370 }, { "epoch": 39.0, "learning_rate": 2.2173913043478264e-06, "loss": 2.2901, "step": 4485 }, { "epoch": 39.0, "eval_loss": 2.274752140045166, "eval_runtime": 5.8191, "eval_samples_per_second": 6.874, "eval_steps_per_second": 0.859, "step": 4485 }, { "epoch": 40.0, "learning_rate": 2.017391304347826e-06, "loss": 2.2622, "step": 4600 }, { "epoch": 40.0, "eval_loss": 2.2620463371276855, "eval_runtime": 5.7456, "eval_samples_per_second": 6.962, "eval_steps_per_second": 0.87, "step": 4600 }, { "epoch": 41.0, "learning_rate": 1.8173913043478262e-06, "loss": 2.2707, "step": 4715 }, { "epoch": 41.0, "eval_loss": 2.2336184978485107, "eval_runtime": 5.8344, "eval_samples_per_second": 6.856, "eval_steps_per_second": 0.857, "step": 4715 }, { "epoch": 42.0, "learning_rate": 1.6173913043478262e-06, "loss": 2.2338, "step": 4830 }, { "epoch": 42.0, "eval_loss": 2.224193811416626, "eval_runtime": 5.7896, "eval_samples_per_second": 6.909, "eval_steps_per_second": 0.864, "step": 4830 }, { "epoch": 43.0, "learning_rate": 1.4173913043478262e-06, "loss": 2.2457, "step": 4945 }, { "epoch": 43.0, "eval_loss": 2.219238758087158, "eval_runtime": 5.8096, "eval_samples_per_second": 6.885, "eval_steps_per_second": 0.861, "step": 4945 }, { "epoch": 44.0, "learning_rate": 1.2173913043478262e-06, "loss": 2.227, "step": 5060 }, { "epoch": 44.0, "eval_loss": 2.206695079803467, "eval_runtime": 5.7447, "eval_samples_per_second": 6.963, "eval_steps_per_second": 0.87, "step": 5060 }, { "epoch": 45.0, "learning_rate": 1.0173913043478262e-06, "loss": 2.2215, "step": 5175 }, { "epoch": 45.0, "eval_loss": 2.2183449268341064, "eval_runtime": 5.7702, "eval_samples_per_second": 6.932, "eval_steps_per_second": 0.867, "step": 5175 }, { "epoch": 46.0, "learning_rate": 8.173913043478261e-07, "loss": 2.2075, "step": 5290 }, { "epoch": 46.0, "eval_loss": 2.2187769412994385, "eval_runtime": 5.7737, "eval_samples_per_second": 6.928, "eval_steps_per_second": 0.866, "step": 5290 }, { "epoch": 47.0, "learning_rate": 6.173913043478262e-07, "loss": 2.2286, "step": 5405 }, { "epoch": 47.0, "eval_loss": 2.2306272983551025, "eval_runtime": 5.7527, "eval_samples_per_second": 6.953, "eval_steps_per_second": 0.869, "step": 5405 }, { "epoch": 48.0, "learning_rate": 4.1739130434782616e-07, "loss": 2.2292, "step": 5520 }, { "epoch": 48.0, "eval_loss": 2.2159781455993652, "eval_runtime": 5.8705, "eval_samples_per_second": 6.814, "eval_steps_per_second": 0.852, "step": 5520 }, { "epoch": 49.0, "learning_rate": 2.173913043478261e-07, "loss": 2.219, "step": 5635 }, { "epoch": 49.0, "eval_loss": 2.2207822799682617, "eval_runtime": 5.7679, "eval_samples_per_second": 6.935, "eval_steps_per_second": 0.867, "step": 5635 }, { "epoch": 50.0, "learning_rate": 1.739130434782609e-08, "loss": 2.2125, "step": 5750 }, { "epoch": 50.0, "eval_loss": 2.216578722000122, "eval_runtime": 5.8453, "eval_samples_per_second": 6.843, "eval_steps_per_second": 0.855, "step": 5750 }, { "epoch": 50.0, "step": 5750, "total_flos": 1.098949102848e+19, "train_loss": 2.5100708697775134, "train_runtime": 4619.9285, "train_samples_per_second": 4.978, "train_steps_per_second": 1.245 } ], "logging_steps": 500, "max_steps": 5750, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.098949102848e+19, "trial_name": null, "trial_params": null }