{ "best_metric": 0.8478260869565217, "best_model_checkpoint": "beit-base-patch16-224-ve-U13-b-80b\\checkpoint-312", "epoch": 73.84615384615384, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.92, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.3181706666946411, "eval_runtime": 0.8174, "eval_samples_per_second": 56.274, "eval_steps_per_second": 2.447, "step": 6 }, { "epoch": 1.54, "learning_rate": 1.0416666666666668e-05, "loss": 1.6182, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.3055709600448608, "eval_runtime": 0.8052, "eval_samples_per_second": 57.127, "eval_steps_per_second": 2.484, "step": 13 }, { "epoch": 2.92, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.2883871793746948, "eval_runtime": 0.8141, "eval_samples_per_second": 56.505, "eval_steps_per_second": 2.457, "step": 19 }, { "epoch": 3.08, "learning_rate": 2.0833333333333336e-05, "loss": 1.592, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.280716061592102, "eval_runtime": 0.844, "eval_samples_per_second": 54.501, "eval_steps_per_second": 2.37, "step": 26 }, { "epoch": 4.62, "learning_rate": 3.125e-05, "loss": 1.4756, "step": 30 }, { "epoch": 4.92, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.2991440296173096, "eval_runtime": 0.9835, "eval_samples_per_second": 46.77, "eval_steps_per_second": 2.033, "step": 32 }, { "epoch": 6.0, "eval_accuracy": 0.5, "eval_loss": 1.2451491355895996, "eval_runtime": 0.8204, "eval_samples_per_second": 56.071, "eval_steps_per_second": 2.438, "step": 39 }, { "epoch": 6.15, "learning_rate": 4.166666666666667e-05, "loss": 1.352, "step": 40 }, { "epoch": 6.92, "eval_accuracy": 0.5217391304347826, "eval_loss": 1.1844638586044312, "eval_runtime": 0.7995, "eval_samples_per_second": 57.535, "eval_steps_per_second": 2.502, "step": 45 }, { "epoch": 7.69, "learning_rate": 4.976851851851852e-05, "loss": 1.2143, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 1.0315006971359253, "eval_runtime": 0.8068, "eval_samples_per_second": 57.014, "eval_steps_per_second": 2.479, "step": 52 }, { "epoch": 8.92, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9288709759712219, "eval_runtime": 0.8497, "eval_samples_per_second": 54.135, "eval_steps_per_second": 2.354, "step": 58 }, { "epoch": 9.23, "learning_rate": 4.8611111111111115e-05, "loss": 1.0327, "step": 60 }, { "epoch": 10.0, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.8924712538719177, "eval_runtime": 0.8251, "eval_samples_per_second": 55.751, "eval_steps_per_second": 2.424, "step": 65 }, { "epoch": 10.77, "learning_rate": 4.745370370370371e-05, "loss": 0.8878, "step": 70 }, { "epoch": 10.92, "eval_accuracy": 0.5652173913043478, "eval_loss": 0.8632795214653015, "eval_runtime": 0.8493, "eval_samples_per_second": 54.163, "eval_steps_per_second": 2.355, "step": 71 }, { "epoch": 12.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.7565776705741882, "eval_runtime": 0.8341, "eval_samples_per_second": 55.152, "eval_steps_per_second": 2.398, "step": 78 }, { "epoch": 12.31, "learning_rate": 4.62962962962963e-05, "loss": 0.7712, "step": 80 }, { "epoch": 12.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.7668902277946472, "eval_runtime": 0.8109, "eval_samples_per_second": 56.725, "eval_steps_per_second": 2.466, "step": 84 }, { "epoch": 13.85, "learning_rate": 4.5138888888888894e-05, "loss": 0.6808, "step": 90 }, { "epoch": 14.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.763481616973877, "eval_runtime": 0.8235, "eval_samples_per_second": 55.861, "eval_steps_per_second": 2.429, "step": 91 }, { "epoch": 14.92, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.865291953086853, "eval_runtime": 0.8503, "eval_samples_per_second": 54.099, "eval_steps_per_second": 2.352, "step": 97 }, { "epoch": 15.38, "learning_rate": 4.3981481481481486e-05, "loss": 0.5844, "step": 100 }, { "epoch": 16.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.7192845344543457, "eval_runtime": 0.8184, "eval_samples_per_second": 56.204, "eval_steps_per_second": 2.444, "step": 104 }, { "epoch": 16.92, "learning_rate": 4.282407407407408e-05, "loss": 0.4332, "step": 110 }, { "epoch": 16.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.6186434626579285, "eval_runtime": 0.8325, "eval_samples_per_second": 55.254, "eval_steps_per_second": 2.402, "step": 110 }, { "epoch": 18.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.0295032262802124, "eval_runtime": 0.8366, "eval_samples_per_second": 54.987, "eval_steps_per_second": 2.391, "step": 117 }, { "epoch": 18.46, "learning_rate": 4.166666666666667e-05, "loss": 0.3607, "step": 120 }, { "epoch": 18.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8007151484489441, "eval_runtime": 0.8163, "eval_samples_per_second": 56.352, "eval_steps_per_second": 2.45, "step": 123 }, { "epoch": 20.0, "learning_rate": 4.0509259259259265e-05, "loss": 0.3134, "step": 130 }, { "epoch": 20.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.679006814956665, "eval_runtime": 0.817, "eval_samples_per_second": 56.305, "eval_steps_per_second": 2.448, "step": 130 }, { "epoch": 20.92, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.801273763179779, "eval_runtime": 0.8163, "eval_samples_per_second": 56.35, "eval_steps_per_second": 2.45, "step": 136 }, { "epoch": 21.54, "learning_rate": 3.935185185185186e-05, "loss": 0.2988, "step": 140 }, { "epoch": 22.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.7480794787406921, "eval_runtime": 0.8337, "eval_samples_per_second": 55.173, "eval_steps_per_second": 2.399, "step": 143 }, { "epoch": 22.92, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9279541373252869, "eval_runtime": 0.8048, "eval_samples_per_second": 57.157, "eval_steps_per_second": 2.485, "step": 149 }, { "epoch": 23.08, "learning_rate": 3.8194444444444444e-05, "loss": 0.2487, "step": 150 }, { "epoch": 24.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.6541624665260315, "eval_runtime": 0.7972, "eval_samples_per_second": 57.704, "eval_steps_per_second": 2.509, "step": 156 }, { "epoch": 24.62, "learning_rate": 3.7037037037037037e-05, "loss": 0.1912, "step": 160 }, { "epoch": 24.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.7133772373199463, "eval_runtime": 0.8524, "eval_samples_per_second": 53.966, "eval_steps_per_second": 2.346, "step": 162 }, { "epoch": 26.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8421454429626465, "eval_runtime": 0.8221, "eval_samples_per_second": 55.956, "eval_steps_per_second": 2.433, "step": 169 }, { "epoch": 26.15, "learning_rate": 3.587962962962963e-05, "loss": 0.1946, "step": 170 }, { "epoch": 26.92, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.7283844947814941, "eval_runtime": 0.8187, "eval_samples_per_second": 56.184, "eval_steps_per_second": 2.443, "step": 175 }, { "epoch": 27.69, "learning_rate": 3.472222222222222e-05, "loss": 0.1685, "step": 180 }, { "epoch": 28.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.7506698369979858, "eval_runtime": 0.8254, "eval_samples_per_second": 55.732, "eval_steps_per_second": 2.423, "step": 182 }, { "epoch": 28.92, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.761018693447113, "eval_runtime": 0.9254, "eval_samples_per_second": 49.709, "eval_steps_per_second": 2.161, "step": 188 }, { "epoch": 29.23, "learning_rate": 3.3564814814814815e-05, "loss": 0.1646, "step": 190 }, { "epoch": 30.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.8012712001800537, "eval_runtime": 0.8337, "eval_samples_per_second": 55.178, "eval_steps_per_second": 2.399, "step": 195 }, { "epoch": 30.77, "learning_rate": 3.240740740740741e-05, "loss": 0.166, "step": 200 }, { "epoch": 30.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.8803007006645203, "eval_runtime": 0.9377, "eval_samples_per_second": 49.054, "eval_steps_per_second": 2.133, "step": 201 }, { "epoch": 32.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.7894579172134399, "eval_runtime": 0.8269, "eval_samples_per_second": 55.629, "eval_steps_per_second": 2.419, "step": 208 }, { "epoch": 32.31, "learning_rate": 3.125e-05, "loss": 0.1372, "step": 210 }, { "epoch": 32.92, "eval_accuracy": 0.717391304347826, "eval_loss": 0.7760207056999207, "eval_runtime": 0.8341, "eval_samples_per_second": 55.149, "eval_steps_per_second": 2.398, "step": 214 }, { "epoch": 33.85, "learning_rate": 3.0092592592592593e-05, "loss": 0.1424, "step": 220 }, { "epoch": 34.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9389709830284119, "eval_runtime": 0.8316, "eval_samples_per_second": 55.318, "eval_steps_per_second": 2.405, "step": 221 }, { "epoch": 34.92, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7839226722717285, "eval_runtime": 0.8199, "eval_samples_per_second": 56.103, "eval_steps_per_second": 2.439, "step": 227 }, { "epoch": 35.38, "learning_rate": 2.8935185185185186e-05, "loss": 0.1399, "step": 230 }, { "epoch": 36.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.9422168135643005, "eval_runtime": 0.8523, "eval_samples_per_second": 53.975, "eval_steps_per_second": 2.347, "step": 234 }, { "epoch": 36.92, "learning_rate": 2.777777777777778e-05, "loss": 0.1238, "step": 240 }, { "epoch": 36.92, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8710301518440247, "eval_runtime": 0.817, "eval_samples_per_second": 56.3, "eval_steps_per_second": 2.448, "step": 240 }, { "epoch": 38.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.8683509230613708, "eval_runtime": 0.8145, "eval_samples_per_second": 56.475, "eval_steps_per_second": 2.455, "step": 247 }, { "epoch": 38.46, "learning_rate": 2.6620370370370372e-05, "loss": 0.123, "step": 250 }, { "epoch": 38.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8193817138671875, "eval_runtime": 0.8004, "eval_samples_per_second": 57.472, "eval_steps_per_second": 2.499, "step": 253 }, { "epoch": 40.0, "learning_rate": 2.5462962962962965e-05, "loss": 0.1381, "step": 260 }, { "epoch": 40.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.9698395133018494, "eval_runtime": 0.8139, "eval_samples_per_second": 56.515, "eval_steps_per_second": 2.457, "step": 260 }, { "epoch": 40.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8545413017272949, "eval_runtime": 0.8323, "eval_samples_per_second": 55.266, "eval_steps_per_second": 2.403, "step": 266 }, { "epoch": 41.54, "learning_rate": 2.4305555555555558e-05, "loss": 0.1081, "step": 270 }, { "epoch": 42.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9924511313438416, "eval_runtime": 0.8284, "eval_samples_per_second": 55.531, "eval_steps_per_second": 2.414, "step": 273 }, { "epoch": 42.92, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.9319804906845093, "eval_runtime": 0.818, "eval_samples_per_second": 56.235, "eval_steps_per_second": 2.445, "step": 279 }, { "epoch": 43.08, "learning_rate": 2.314814814814815e-05, "loss": 0.0929, "step": 280 }, { "epoch": 44.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.0241632461547852, "eval_runtime": 0.8281, "eval_samples_per_second": 55.549, "eval_steps_per_second": 2.415, "step": 286 }, { "epoch": 44.62, "learning_rate": 2.1990740740740743e-05, "loss": 0.0898, "step": 290 }, { "epoch": 44.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.9411269426345825, "eval_runtime": 0.8388, "eval_samples_per_second": 54.838, "eval_steps_per_second": 2.384, "step": 292 }, { "epoch": 46.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8995408415794373, "eval_runtime": 0.8513, "eval_samples_per_second": 54.038, "eval_steps_per_second": 2.349, "step": 299 }, { "epoch": 46.15, "learning_rate": 2.0833333333333336e-05, "loss": 0.12, "step": 300 }, { "epoch": 46.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.774114191532135, "eval_runtime": 0.8167, "eval_samples_per_second": 56.327, "eval_steps_per_second": 2.449, "step": 305 }, { "epoch": 47.69, "learning_rate": 1.967592592592593e-05, "loss": 0.1126, "step": 310 }, { "epoch": 48.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.7122377157211304, "eval_runtime": 0.8146, "eval_samples_per_second": 56.469, "eval_steps_per_second": 2.455, "step": 312 }, { "epoch": 48.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9098690152168274, "eval_runtime": 0.8003, "eval_samples_per_second": 57.477, "eval_steps_per_second": 2.499, "step": 318 }, { "epoch": 49.23, "learning_rate": 1.8518518518518518e-05, "loss": 0.1088, "step": 320 }, { "epoch": 50.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.114755630493164, "eval_runtime": 0.8219, "eval_samples_per_second": 55.97, "eval_steps_per_second": 2.433, "step": 325 }, { "epoch": 50.77, "learning_rate": 1.736111111111111e-05, "loss": 0.0851, "step": 330 }, { "epoch": 50.92, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.9296599626541138, "eval_runtime": 0.8335, "eval_samples_per_second": 55.191, "eval_steps_per_second": 2.4, "step": 331 }, { "epoch": 52.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.8800845742225647, "eval_runtime": 0.8362, "eval_samples_per_second": 55.008, "eval_steps_per_second": 2.392, "step": 338 }, { "epoch": 52.31, "learning_rate": 1.6203703703703704e-05, "loss": 0.1001, "step": 340 }, { "epoch": 52.92, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.8427883386611938, "eval_runtime": 0.8167, "eval_samples_per_second": 56.324, "eval_steps_per_second": 2.449, "step": 344 }, { "epoch": 53.85, "learning_rate": 1.5046296296296297e-05, "loss": 0.0718, "step": 350 }, { "epoch": 54.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9720932245254517, "eval_runtime": 0.8541, "eval_samples_per_second": 53.859, "eval_steps_per_second": 2.342, "step": 351 }, { "epoch": 54.92, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.8770763278007507, "eval_runtime": 0.884, "eval_samples_per_second": 52.036, "eval_steps_per_second": 2.262, "step": 357 }, { "epoch": 55.38, "learning_rate": 1.388888888888889e-05, "loss": 0.0842, "step": 360 }, { "epoch": 56.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9981948137283325, "eval_runtime": 0.8228, "eval_samples_per_second": 55.903, "eval_steps_per_second": 2.431, "step": 364 }, { "epoch": 56.92, "learning_rate": 1.2731481481481482e-05, "loss": 0.1069, "step": 370 }, { "epoch": 56.92, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.1083166599273682, "eval_runtime": 0.8167, "eval_samples_per_second": 56.321, "eval_steps_per_second": 2.449, "step": 370 }, { "epoch": 58.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9071816205978394, "eval_runtime": 0.8396, "eval_samples_per_second": 54.785, "eval_steps_per_second": 2.382, "step": 377 }, { "epoch": 58.46, "learning_rate": 1.1574074074074075e-05, "loss": 0.0803, "step": 380 }, { "epoch": 58.92, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.7978565096855164, "eval_runtime": 0.8347, "eval_samples_per_second": 55.108, "eval_steps_per_second": 2.396, "step": 383 }, { "epoch": 60.0, "learning_rate": 1.0416666666666668e-05, "loss": 0.0752, "step": 390 }, { "epoch": 60.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.7488988041877747, "eval_runtime": 0.8278, "eval_samples_per_second": 55.57, "eval_steps_per_second": 2.416, "step": 390 }, { "epoch": 60.92, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.802253246307373, "eval_runtime": 0.8336, "eval_samples_per_second": 55.183, "eval_steps_per_second": 2.399, "step": 396 }, { "epoch": 61.54, "learning_rate": 9.259259259259259e-06, "loss": 0.0646, "step": 400 }, { "epoch": 62.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.802682101726532, "eval_runtime": 1.0467, "eval_samples_per_second": 43.949, "eval_steps_per_second": 1.911, "step": 403 }, { "epoch": 62.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.8274930119514465, "eval_runtime": 0.8068, "eval_samples_per_second": 57.017, "eval_steps_per_second": 2.479, "step": 409 }, { "epoch": 63.08, "learning_rate": 8.101851851851852e-06, "loss": 0.0829, "step": 410 }, { "epoch": 64.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.8587205410003662, "eval_runtime": 0.8175, "eval_samples_per_second": 56.27, "eval_steps_per_second": 2.447, "step": 416 }, { "epoch": 64.62, "learning_rate": 6.944444444444445e-06, "loss": 0.0616, "step": 420 }, { "epoch": 64.92, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.8869741559028625, "eval_runtime": 0.8172, "eval_samples_per_second": 56.29, "eval_steps_per_second": 2.447, "step": 422 }, { "epoch": 66.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.8927735090255737, "eval_runtime": 0.8177, "eval_samples_per_second": 56.255, "eval_steps_per_second": 2.446, "step": 429 }, { "epoch": 66.15, "learning_rate": 5.787037037037038e-06, "loss": 0.0693, "step": 430 }, { "epoch": 66.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9289346933364868, "eval_runtime": 0.9088, "eval_samples_per_second": 50.618, "eval_steps_per_second": 2.201, "step": 435 }, { "epoch": 67.69, "learning_rate": 4.6296296296296296e-06, "loss": 0.0657, "step": 440 }, { "epoch": 68.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.9603818655014038, "eval_runtime": 0.8139, "eval_samples_per_second": 56.517, "eval_steps_per_second": 2.457, "step": 442 }, { "epoch": 68.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9559684991836548, "eval_runtime": 0.8119, "eval_samples_per_second": 56.658, "eval_steps_per_second": 2.463, "step": 448 }, { "epoch": 69.23, "learning_rate": 3.4722222222222224e-06, "loss": 0.0588, "step": 450 }, { "epoch": 70.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.9543905854225159, "eval_runtime": 0.8184, "eval_samples_per_second": 56.206, "eval_steps_per_second": 2.444, "step": 455 }, { "epoch": 70.77, "learning_rate": 2.3148148148148148e-06, "loss": 0.0578, "step": 460 }, { "epoch": 70.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9418670535087585, "eval_runtime": 0.8002, "eval_samples_per_second": 57.489, "eval_steps_per_second": 2.5, "step": 461 }, { "epoch": 72.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9473724961280823, "eval_runtime": 0.814, "eval_samples_per_second": 56.514, "eval_steps_per_second": 2.457, "step": 468 }, { "epoch": 72.31, "learning_rate": 1.1574074074074074e-06, "loss": 0.0638, "step": 470 }, { "epoch": 72.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.953983724117279, "eval_runtime": 0.8002, "eval_samples_per_second": 57.487, "eval_steps_per_second": 2.499, "step": 474 }, { "epoch": 73.85, "learning_rate": 0.0, "loss": 0.0592, "step": 480 }, { "epoch": 73.85, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9548673033714294, "eval_runtime": 0.8156, "eval_samples_per_second": 56.403, "eval_steps_per_second": 2.452, "step": 480 }, { "epoch": 73.85, "step": 480, "total_flos": 4.685648449140449e+18, "train_loss": 0.3410949035237233, "train_runtime": 1069.8093, "train_samples_per_second": 61.245, "train_steps_per_second": 0.449 } ], "logging_steps": 10, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 80, "save_steps": 500, "total_flos": 4.685648449140449e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }