polejowska's picture
End of training
943c725
{
"best_metric": 2.206695079803467,
"best_model_checkpoint": "detr-r50-finetuned-mist1-gb-4ah-6l/checkpoint-5060",
"epoch": 50.0,
"eval_steps": 500,
"global_step": 5750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.812173913043479e-06,
"loss": 3.4044,
"step": 115
},
{
"epoch": 1.0,
"eval_loss": 3.004835844039917,
"eval_runtime": 5.8385,
"eval_samples_per_second": 6.851,
"eval_steps_per_second": 0.856,
"step": 115
},
{
"epoch": 2.0,
"learning_rate": 9.612173913043479e-06,
"loss": 3.1708,
"step": 230
},
{
"epoch": 2.0,
"eval_loss": 2.902806520462036,
"eval_runtime": 5.7867,
"eval_samples_per_second": 6.912,
"eval_steps_per_second": 0.864,
"step": 230
},
{
"epoch": 3.0,
"learning_rate": 9.412173913043479e-06,
"loss": 3.0756,
"step": 345
},
{
"epoch": 3.0,
"eval_loss": 2.853818416595459,
"eval_runtime": 5.8759,
"eval_samples_per_second": 6.808,
"eval_steps_per_second": 0.851,
"step": 345
},
{
"epoch": 4.0,
"learning_rate": 9.21217391304348e-06,
"loss": 2.9769,
"step": 460
},
{
"epoch": 4.0,
"eval_loss": 2.814894914627075,
"eval_runtime": 5.8445,
"eval_samples_per_second": 6.844,
"eval_steps_per_second": 0.855,
"step": 460
},
{
"epoch": 5.0,
"learning_rate": 9.01217391304348e-06,
"loss": 2.8999,
"step": 575
},
{
"epoch": 5.0,
"eval_loss": 2.7331762313842773,
"eval_runtime": 5.9217,
"eval_samples_per_second": 6.755,
"eval_steps_per_second": 0.844,
"step": 575
},
{
"epoch": 6.0,
"learning_rate": 8.81217391304348e-06,
"loss": 2.8609,
"step": 690
},
{
"epoch": 6.0,
"eval_loss": 2.7212414741516113,
"eval_runtime": 5.879,
"eval_samples_per_second": 6.804,
"eval_steps_per_second": 0.85,
"step": 690
},
{
"epoch": 7.0,
"learning_rate": 8.613913043478262e-06,
"loss": 2.8338,
"step": 805
},
{
"epoch": 7.0,
"eval_loss": 2.689382553100586,
"eval_runtime": 5.8299,
"eval_samples_per_second": 6.861,
"eval_steps_per_second": 0.858,
"step": 805
},
{
"epoch": 8.0,
"learning_rate": 8.413913043478262e-06,
"loss": 2.8103,
"step": 920
},
{
"epoch": 8.0,
"eval_loss": 2.704505681991577,
"eval_runtime": 5.7873,
"eval_samples_per_second": 6.912,
"eval_steps_per_second": 0.864,
"step": 920
},
{
"epoch": 9.0,
"learning_rate": 8.21391304347826e-06,
"loss": 2.8036,
"step": 1035
},
{
"epoch": 9.0,
"eval_loss": 2.778614044189453,
"eval_runtime": 5.7444,
"eval_samples_per_second": 6.963,
"eval_steps_per_second": 0.87,
"step": 1035
},
{
"epoch": 10.0,
"learning_rate": 8.013913043478262e-06,
"loss": 2.7486,
"step": 1150
},
{
"epoch": 10.0,
"eval_loss": 2.688080310821533,
"eval_runtime": 5.7616,
"eval_samples_per_second": 6.943,
"eval_steps_per_second": 0.868,
"step": 1150
},
{
"epoch": 11.0,
"learning_rate": 7.813913043478263e-06,
"loss": 2.7076,
"step": 1265
},
{
"epoch": 11.0,
"eval_loss": 2.605870485305786,
"eval_runtime": 5.8415,
"eval_samples_per_second": 6.848,
"eval_steps_per_second": 0.856,
"step": 1265
},
{
"epoch": 12.0,
"learning_rate": 7.615652173913044e-06,
"loss": 2.7156,
"step": 1380
},
{
"epoch": 12.0,
"eval_loss": 2.648322343826294,
"eval_runtime": 5.8314,
"eval_samples_per_second": 6.859,
"eval_steps_per_second": 0.857,
"step": 1380
},
{
"epoch": 13.0,
"learning_rate": 7.415652173913044e-06,
"loss": 2.6655,
"step": 1495
},
{
"epoch": 13.0,
"eval_loss": 2.5438005924224854,
"eval_runtime": 5.7667,
"eval_samples_per_second": 6.936,
"eval_steps_per_second": 0.867,
"step": 1495
},
{
"epoch": 14.0,
"learning_rate": 7.215652173913043e-06,
"loss": 2.6368,
"step": 1610
},
{
"epoch": 14.0,
"eval_loss": 2.5342297554016113,
"eval_runtime": 5.7673,
"eval_samples_per_second": 6.936,
"eval_steps_per_second": 0.867,
"step": 1610
},
{
"epoch": 15.0,
"learning_rate": 7.015652173913044e-06,
"loss": 2.5982,
"step": 1725
},
{
"epoch": 15.0,
"eval_loss": 2.5287182331085205,
"eval_runtime": 5.8321,
"eval_samples_per_second": 6.859,
"eval_steps_per_second": 0.857,
"step": 1725
},
{
"epoch": 16.0,
"learning_rate": 6.815652173913045e-06,
"loss": 2.6116,
"step": 1840
},
{
"epoch": 16.0,
"eval_loss": 2.4446094036102295,
"eval_runtime": 5.7529,
"eval_samples_per_second": 6.953,
"eval_steps_per_second": 0.869,
"step": 1840
},
{
"epoch": 17.0,
"learning_rate": 6.615652173913044e-06,
"loss": 2.5592,
"step": 1955
},
{
"epoch": 17.0,
"eval_loss": 2.4365103244781494,
"eval_runtime": 5.7499,
"eval_samples_per_second": 6.957,
"eval_steps_per_second": 0.87,
"step": 1955
},
{
"epoch": 18.0,
"learning_rate": 6.415652173913044e-06,
"loss": 2.5528,
"step": 2070
},
{
"epoch": 18.0,
"eval_loss": 2.484395980834961,
"eval_runtime": 5.7801,
"eval_samples_per_second": 6.92,
"eval_steps_per_second": 0.865,
"step": 2070
},
{
"epoch": 19.0,
"learning_rate": 6.215652173913044e-06,
"loss": 2.5248,
"step": 2185
},
{
"epoch": 19.0,
"eval_loss": 2.4194891452789307,
"eval_runtime": 5.7656,
"eval_samples_per_second": 6.938,
"eval_steps_per_second": 0.867,
"step": 2185
},
{
"epoch": 20.0,
"learning_rate": 6.015652173913044e-06,
"loss": 2.4853,
"step": 2300
},
{
"epoch": 20.0,
"eval_loss": 2.453843832015991,
"eval_runtime": 5.8034,
"eval_samples_per_second": 6.893,
"eval_steps_per_second": 0.862,
"step": 2300
},
{
"epoch": 21.0,
"learning_rate": 5.815652173913045e-06,
"loss": 2.5295,
"step": 2415
},
{
"epoch": 21.0,
"eval_loss": 2.569608211517334,
"eval_runtime": 5.8097,
"eval_samples_per_second": 6.885,
"eval_steps_per_second": 0.861,
"step": 2415
},
{
"epoch": 22.0,
"learning_rate": 5.615652173913044e-06,
"loss": 2.5069,
"step": 2530
},
{
"epoch": 22.0,
"eval_loss": 2.4536538124084473,
"eval_runtime": 5.7892,
"eval_samples_per_second": 6.909,
"eval_steps_per_second": 0.864,
"step": 2530
},
{
"epoch": 23.0,
"learning_rate": 5.4156521739130445e-06,
"loss": 2.4504,
"step": 2645
},
{
"epoch": 23.0,
"eval_loss": 2.515150547027588,
"eval_runtime": 5.8674,
"eval_samples_per_second": 6.817,
"eval_steps_per_second": 0.852,
"step": 2645
},
{
"epoch": 24.0,
"learning_rate": 5.215652173913044e-06,
"loss": 2.4447,
"step": 2760
},
{
"epoch": 24.0,
"eval_loss": 2.4431958198547363,
"eval_runtime": 5.9181,
"eval_samples_per_second": 6.759,
"eval_steps_per_second": 0.845,
"step": 2760
},
{
"epoch": 25.0,
"learning_rate": 5.015652173913044e-06,
"loss": 2.4303,
"step": 2875
},
{
"epoch": 25.0,
"eval_loss": 2.4032533168792725,
"eval_runtime": 5.9198,
"eval_samples_per_second": 6.757,
"eval_steps_per_second": 0.845,
"step": 2875
},
{
"epoch": 26.0,
"learning_rate": 4.815652173913044e-06,
"loss": 2.4137,
"step": 2990
},
{
"epoch": 26.0,
"eval_loss": 2.3795552253723145,
"eval_runtime": 5.8418,
"eval_samples_per_second": 6.847,
"eval_steps_per_second": 0.856,
"step": 2990
},
{
"epoch": 27.0,
"learning_rate": 4.615652173913044e-06,
"loss": 2.41,
"step": 3105
},
{
"epoch": 27.0,
"eval_loss": 2.3598742485046387,
"eval_runtime": 5.8868,
"eval_samples_per_second": 6.795,
"eval_steps_per_second": 0.849,
"step": 3105
},
{
"epoch": 28.0,
"learning_rate": 4.415652173913044e-06,
"loss": 2.3816,
"step": 3220
},
{
"epoch": 28.0,
"eval_loss": 2.401759386062622,
"eval_runtime": 5.8373,
"eval_samples_per_second": 6.852,
"eval_steps_per_second": 0.857,
"step": 3220
},
{
"epoch": 29.0,
"learning_rate": 4.215652173913044e-06,
"loss": 2.3752,
"step": 3335
},
{
"epoch": 29.0,
"eval_loss": 2.3115689754486084,
"eval_runtime": 5.8962,
"eval_samples_per_second": 6.784,
"eval_steps_per_second": 0.848,
"step": 3335
},
{
"epoch": 30.0,
"learning_rate": 4.0156521739130435e-06,
"loss": 2.3929,
"step": 3450
},
{
"epoch": 30.0,
"eval_loss": 2.310469150543213,
"eval_runtime": 5.8122,
"eval_samples_per_second": 6.882,
"eval_steps_per_second": 0.86,
"step": 3450
},
{
"epoch": 31.0,
"learning_rate": 3.815652173913044e-06,
"loss": 2.3791,
"step": 3565
},
{
"epoch": 31.0,
"eval_loss": 2.367689371109009,
"eval_runtime": 5.837,
"eval_samples_per_second": 6.853,
"eval_steps_per_second": 0.857,
"step": 3565
},
{
"epoch": 32.0,
"learning_rate": 3.615652173913044e-06,
"loss": 2.3639,
"step": 3680
},
{
"epoch": 32.0,
"eval_loss": 2.431199550628662,
"eval_runtime": 5.8179,
"eval_samples_per_second": 6.875,
"eval_steps_per_second": 0.859,
"step": 3680
},
{
"epoch": 33.0,
"learning_rate": 3.4156521739130437e-06,
"loss": 2.3475,
"step": 3795
},
{
"epoch": 33.0,
"eval_loss": 2.3051953315734863,
"eval_runtime": 5.7888,
"eval_samples_per_second": 6.91,
"eval_steps_per_second": 0.864,
"step": 3795
},
{
"epoch": 34.0,
"learning_rate": 3.2156521739130435e-06,
"loss": 2.3429,
"step": 3910
},
{
"epoch": 34.0,
"eval_loss": 2.322197675704956,
"eval_runtime": 5.7936,
"eval_samples_per_second": 6.904,
"eval_steps_per_second": 0.863,
"step": 3910
},
{
"epoch": 35.0,
"learning_rate": 3.015652173913044e-06,
"loss": 2.3115,
"step": 4025
},
{
"epoch": 35.0,
"eval_loss": 2.3126182556152344,
"eval_runtime": 5.8416,
"eval_samples_per_second": 6.847,
"eval_steps_per_second": 0.856,
"step": 4025
},
{
"epoch": 36.0,
"learning_rate": 2.815652173913044e-06,
"loss": 2.3276,
"step": 4140
},
{
"epoch": 36.0,
"eval_loss": 2.3154168128967285,
"eval_runtime": 5.7988,
"eval_samples_per_second": 6.898,
"eval_steps_per_second": 0.862,
"step": 4140
},
{
"epoch": 37.0,
"learning_rate": 2.6156521739130438e-06,
"loss": 2.3126,
"step": 4255
},
{
"epoch": 37.0,
"eval_loss": 2.353442430496216,
"eval_runtime": 5.7487,
"eval_samples_per_second": 6.958,
"eval_steps_per_second": 0.87,
"step": 4255
},
{
"epoch": 38.0,
"learning_rate": 2.417391304347826e-06,
"loss": 2.2934,
"step": 4370
},
{
"epoch": 38.0,
"eval_loss": 2.2566468715667725,
"eval_runtime": 5.902,
"eval_samples_per_second": 6.777,
"eval_steps_per_second": 0.847,
"step": 4370
},
{
"epoch": 39.0,
"learning_rate": 2.2173913043478264e-06,
"loss": 2.2901,
"step": 4485
},
{
"epoch": 39.0,
"eval_loss": 2.274752140045166,
"eval_runtime": 5.8191,
"eval_samples_per_second": 6.874,
"eval_steps_per_second": 0.859,
"step": 4485
},
{
"epoch": 40.0,
"learning_rate": 2.017391304347826e-06,
"loss": 2.2622,
"step": 4600
},
{
"epoch": 40.0,
"eval_loss": 2.2620463371276855,
"eval_runtime": 5.7456,
"eval_samples_per_second": 6.962,
"eval_steps_per_second": 0.87,
"step": 4600
},
{
"epoch": 41.0,
"learning_rate": 1.8173913043478262e-06,
"loss": 2.2707,
"step": 4715
},
{
"epoch": 41.0,
"eval_loss": 2.2336184978485107,
"eval_runtime": 5.8344,
"eval_samples_per_second": 6.856,
"eval_steps_per_second": 0.857,
"step": 4715
},
{
"epoch": 42.0,
"learning_rate": 1.6173913043478262e-06,
"loss": 2.2338,
"step": 4830
},
{
"epoch": 42.0,
"eval_loss": 2.224193811416626,
"eval_runtime": 5.7896,
"eval_samples_per_second": 6.909,
"eval_steps_per_second": 0.864,
"step": 4830
},
{
"epoch": 43.0,
"learning_rate": 1.4173913043478262e-06,
"loss": 2.2457,
"step": 4945
},
{
"epoch": 43.0,
"eval_loss": 2.219238758087158,
"eval_runtime": 5.8096,
"eval_samples_per_second": 6.885,
"eval_steps_per_second": 0.861,
"step": 4945
},
{
"epoch": 44.0,
"learning_rate": 1.2173913043478262e-06,
"loss": 2.227,
"step": 5060
},
{
"epoch": 44.0,
"eval_loss": 2.206695079803467,
"eval_runtime": 5.7447,
"eval_samples_per_second": 6.963,
"eval_steps_per_second": 0.87,
"step": 5060
},
{
"epoch": 45.0,
"learning_rate": 1.0173913043478262e-06,
"loss": 2.2215,
"step": 5175
},
{
"epoch": 45.0,
"eval_loss": 2.2183449268341064,
"eval_runtime": 5.7702,
"eval_samples_per_second": 6.932,
"eval_steps_per_second": 0.867,
"step": 5175
},
{
"epoch": 46.0,
"learning_rate": 8.173913043478261e-07,
"loss": 2.2075,
"step": 5290
},
{
"epoch": 46.0,
"eval_loss": 2.2187769412994385,
"eval_runtime": 5.7737,
"eval_samples_per_second": 6.928,
"eval_steps_per_second": 0.866,
"step": 5290
},
{
"epoch": 47.0,
"learning_rate": 6.173913043478262e-07,
"loss": 2.2286,
"step": 5405
},
{
"epoch": 47.0,
"eval_loss": 2.2306272983551025,
"eval_runtime": 5.7527,
"eval_samples_per_second": 6.953,
"eval_steps_per_second": 0.869,
"step": 5405
},
{
"epoch": 48.0,
"learning_rate": 4.1739130434782616e-07,
"loss": 2.2292,
"step": 5520
},
{
"epoch": 48.0,
"eval_loss": 2.2159781455993652,
"eval_runtime": 5.8705,
"eval_samples_per_second": 6.814,
"eval_steps_per_second": 0.852,
"step": 5520
},
{
"epoch": 49.0,
"learning_rate": 2.173913043478261e-07,
"loss": 2.219,
"step": 5635
},
{
"epoch": 49.0,
"eval_loss": 2.2207822799682617,
"eval_runtime": 5.7679,
"eval_samples_per_second": 6.935,
"eval_steps_per_second": 0.867,
"step": 5635
},
{
"epoch": 50.0,
"learning_rate": 1.739130434782609e-08,
"loss": 2.2125,
"step": 5750
},
{
"epoch": 50.0,
"eval_loss": 2.216578722000122,
"eval_runtime": 5.8453,
"eval_samples_per_second": 6.843,
"eval_steps_per_second": 0.855,
"step": 5750
},
{
"epoch": 50.0,
"step": 5750,
"total_flos": 1.098949102848e+19,
"train_loss": 2.5100708697775134,
"train_runtime": 4619.9285,
"train_samples_per_second": 4.978,
"train_steps_per_second": 1.245
}
],
"logging_steps": 500,
"max_steps": 5750,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 1.098949102848e+19,
"trial_name": null,
"trial_params": null
}