{ "best_metric": 0.9950738916256158, "best_model_checkpoint": "swinv2-large-patch4-window12to16-192to256-22kto1k-ft-finetuned-Lesion-Classification-HAM10000-3/checkpoint-1140", "epoch": 9.978118161925602, "eval_steps": 500, "global_step": 1140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.385964912280702e-07, "loss": 2.0125, "step": 5 }, { "epoch": 0.09, "learning_rate": 8.771929824561404e-07, "loss": 2.0062, "step": 10 }, { "epoch": 0.13, "learning_rate": 1.3157894736842106e-06, "loss": 2.0213, "step": 15 }, { "epoch": 0.18, "learning_rate": 1.7543859649122807e-06, "loss": 2.0005, "step": 20 }, { "epoch": 0.22, "learning_rate": 2.1929824561403507e-06, "loss": 1.9605, "step": 25 }, { "epoch": 0.26, "learning_rate": 2.631578947368421e-06, "loss": 1.9693, "step": 30 }, { "epoch": 0.31, "learning_rate": 3.070175438596491e-06, "loss": 1.9539, "step": 35 }, { "epoch": 0.35, "learning_rate": 3.5087719298245615e-06, "loss": 1.8969, "step": 40 }, { "epoch": 0.39, "learning_rate": 3.9473684210526315e-06, "loss": 1.8464, "step": 45 }, { "epoch": 0.44, "learning_rate": 4.3859649122807014e-06, "loss": 1.8472, "step": 50 }, { "epoch": 0.48, "learning_rate": 4.824561403508772e-06, "loss": 1.8315, "step": 55 }, { "epoch": 0.53, "learning_rate": 5.263157894736842e-06, "loss": 1.7747, "step": 60 }, { "epoch": 0.57, "learning_rate": 5.701754385964912e-06, "loss": 1.6972, "step": 65 }, { "epoch": 0.61, "learning_rate": 6.140350877192982e-06, "loss": 1.5887, "step": 70 }, { "epoch": 0.66, "learning_rate": 6.578947368421053e-06, "loss": 1.5541, "step": 75 }, { "epoch": 0.7, "learning_rate": 7.017543859649123e-06, "loss": 1.4549, "step": 80 }, { "epoch": 0.74, "learning_rate": 7.456140350877193e-06, "loss": 1.4104, "step": 85 }, { "epoch": 0.79, "learning_rate": 7.894736842105263e-06, "loss": 1.3381, "step": 90 }, { "epoch": 0.83, "learning_rate": 8.333333333333334e-06, "loss": 1.2994, "step": 95 }, { "epoch": 0.88, "learning_rate": 8.771929824561403e-06, "loss": 1.1369, "step": 100 }, { "epoch": 0.92, "learning_rate": 9.210526315789474e-06, "loss": 1.181, "step": 105 }, { "epoch": 0.96, "learning_rate": 9.649122807017545e-06, "loss": 1.1885, "step": 110 }, { "epoch": 1.0, "eval_accuracy": 0.6592775041050903, "eval_loss": 0.871817946434021, "eval_runtime": 272.9497, "eval_samples_per_second": 4.462, "eval_steps_per_second": 0.561, "step": 114 }, { "epoch": 1.01, "learning_rate": 1.0087719298245614e-05, "loss": 1.0284, "step": 115 }, { "epoch": 1.05, "learning_rate": 1.0526315789473684e-05, "loss": 0.9502, "step": 120 }, { "epoch": 1.09, "learning_rate": 1.0964912280701754e-05, "loss": 1.0752, "step": 125 }, { "epoch": 1.14, "learning_rate": 1.1403508771929824e-05, "loss": 0.8783, "step": 130 }, { "epoch": 1.18, "learning_rate": 1.1842105263157895e-05, "loss": 0.7479, "step": 135 }, { "epoch": 1.23, "learning_rate": 1.2280701754385964e-05, "loss": 0.9468, "step": 140 }, { "epoch": 1.27, "learning_rate": 1.2719298245614037e-05, "loss": 0.8863, "step": 145 }, { "epoch": 1.31, "learning_rate": 1.3157894736842106e-05, "loss": 0.9492, "step": 150 }, { "epoch": 1.36, "learning_rate": 1.3596491228070177e-05, "loss": 0.8521, "step": 155 }, { "epoch": 1.4, "learning_rate": 1.4035087719298246e-05, "loss": 0.7445, "step": 160 }, { "epoch": 1.44, "learning_rate": 1.4473684210526317e-05, "loss": 0.9228, "step": 165 }, { "epoch": 1.49, "learning_rate": 1.4912280701754386e-05, "loss": 0.8637, "step": 170 }, { "epoch": 1.53, "learning_rate": 1.5350877192982457e-05, "loss": 0.7486, "step": 175 }, { "epoch": 1.58, "learning_rate": 1.5789473684210526e-05, "loss": 0.9474, "step": 180 }, { "epoch": 1.62, "learning_rate": 1.62280701754386e-05, "loss": 0.7842, "step": 185 }, { "epoch": 1.66, "learning_rate": 1.6666666666666667e-05, "loss": 0.8101, "step": 190 }, { "epoch": 1.71, "learning_rate": 1.7105263157894737e-05, "loss": 0.8701, "step": 195 }, { "epoch": 1.75, "learning_rate": 1.7543859649122806e-05, "loss": 0.8223, "step": 200 }, { "epoch": 1.79, "learning_rate": 1.7982456140350878e-05, "loss": 0.6153, "step": 205 }, { "epoch": 1.84, "learning_rate": 1.8421052631578947e-05, "loss": 0.7246, "step": 210 }, { "epoch": 1.88, "learning_rate": 1.885964912280702e-05, "loss": 0.5416, "step": 215 }, { "epoch": 1.93, "learning_rate": 1.929824561403509e-05, "loss": 0.5653, "step": 220 }, { "epoch": 1.97, "learning_rate": 1.9736842105263158e-05, "loss": 0.7037, "step": 225 }, { "epoch": 2.0, "eval_accuracy": 0.8637110016420362, "eval_loss": 0.4207600951194763, "eval_runtime": 19.4349, "eval_samples_per_second": 62.671, "eval_steps_per_second": 7.872, "step": 228 }, { "epoch": 2.01, "learning_rate": 2.0175438596491227e-05, "loss": 0.5668, "step": 230 }, { "epoch": 2.06, "learning_rate": 2.06140350877193e-05, "loss": 0.616, "step": 235 }, { "epoch": 2.1, "learning_rate": 2.105263157894737e-05, "loss": 0.6098, "step": 240 }, { "epoch": 2.14, "learning_rate": 2.149122807017544e-05, "loss": 0.5217, "step": 245 }, { "epoch": 2.19, "learning_rate": 2.1929824561403507e-05, "loss": 0.5275, "step": 250 }, { "epoch": 2.23, "learning_rate": 2.236842105263158e-05, "loss": 0.6111, "step": 255 }, { "epoch": 2.28, "learning_rate": 2.280701754385965e-05, "loss": 0.4977, "step": 260 }, { "epoch": 2.32, "learning_rate": 2.324561403508772e-05, "loss": 0.6095, "step": 265 }, { "epoch": 2.36, "learning_rate": 2.368421052631579e-05, "loss": 0.6547, "step": 270 }, { "epoch": 2.41, "learning_rate": 2.412280701754386e-05, "loss": 0.5991, "step": 275 }, { "epoch": 2.45, "learning_rate": 2.456140350877193e-05, "loss": 0.6997, "step": 280 }, { "epoch": 2.49, "learning_rate": 2.5e-05, "loss": 0.532, "step": 285 }, { "epoch": 2.54, "learning_rate": 2.5438596491228074e-05, "loss": 0.4929, "step": 290 }, { "epoch": 2.58, "learning_rate": 2.5877192982456143e-05, "loss": 0.5004, "step": 295 }, { "epoch": 2.63, "learning_rate": 2.6315789473684212e-05, "loss": 0.5739, "step": 300 }, { "epoch": 2.67, "learning_rate": 2.675438596491228e-05, "loss": 0.6012, "step": 305 }, { "epoch": 2.71, "learning_rate": 2.7192982456140354e-05, "loss": 0.5107, "step": 310 }, { "epoch": 2.76, "learning_rate": 2.7631578947368426e-05, "loss": 0.6075, "step": 315 }, { "epoch": 2.8, "learning_rate": 2.8070175438596492e-05, "loss": 0.5597, "step": 320 }, { "epoch": 2.84, "learning_rate": 2.850877192982456e-05, "loss": 0.4941, "step": 325 }, { "epoch": 2.89, "learning_rate": 2.8947368421052634e-05, "loss": 0.6285, "step": 330 }, { "epoch": 2.93, "learning_rate": 2.9385964912280706e-05, "loss": 0.4644, "step": 335 }, { "epoch": 2.98, "learning_rate": 2.9824561403508772e-05, "loss": 0.5085, "step": 340 }, { "epoch": 2.99, "eval_accuracy": 0.874384236453202, "eval_loss": 0.3445686399936676, "eval_runtime": 19.2674, "eval_samples_per_second": 63.216, "eval_steps_per_second": 7.941, "step": 342 }, { "epoch": 3.02, "learning_rate": 3.0263157894736844e-05, "loss": 0.542, "step": 345 }, { "epoch": 3.06, "learning_rate": 3.0701754385964913e-05, "loss": 0.4411, "step": 350 }, { "epoch": 3.11, "learning_rate": 3.1140350877192986e-05, "loss": 0.5084, "step": 355 }, { "epoch": 3.15, "learning_rate": 3.157894736842105e-05, "loss": 0.5346, "step": 360 }, { "epoch": 3.19, "learning_rate": 3.2017543859649124e-05, "loss": 0.4539, "step": 365 }, { "epoch": 3.24, "learning_rate": 3.24561403508772e-05, "loss": 0.308, "step": 370 }, { "epoch": 3.28, "learning_rate": 3.289473684210527e-05, "loss": 0.5257, "step": 375 }, { "epoch": 3.33, "learning_rate": 3.3333333333333335e-05, "loss": 0.3717, "step": 380 }, { "epoch": 3.37, "learning_rate": 3.377192982456141e-05, "loss": 0.3999, "step": 385 }, { "epoch": 3.41, "learning_rate": 3.421052631578947e-05, "loss": 0.5481, "step": 390 }, { "epoch": 3.46, "learning_rate": 3.4649122807017546e-05, "loss": 0.3296, "step": 395 }, { "epoch": 3.5, "learning_rate": 3.508771929824561e-05, "loss": 0.4459, "step": 400 }, { "epoch": 3.54, "learning_rate": 3.5526315789473684e-05, "loss": 0.355, "step": 405 }, { "epoch": 3.59, "learning_rate": 3.5964912280701756e-05, "loss": 0.2903, "step": 410 }, { "epoch": 3.63, "learning_rate": 3.640350877192983e-05, "loss": 0.4224, "step": 415 }, { "epoch": 3.68, "learning_rate": 3.6842105263157895e-05, "loss": 0.4467, "step": 420 }, { "epoch": 3.72, "learning_rate": 3.728070175438597e-05, "loss": 0.298, "step": 425 }, { "epoch": 3.76, "learning_rate": 3.771929824561404e-05, "loss": 0.4309, "step": 430 }, { "epoch": 3.81, "learning_rate": 3.815789473684211e-05, "loss": 0.4773, "step": 435 }, { "epoch": 3.85, "learning_rate": 3.859649122807018e-05, "loss": 0.5006, "step": 440 }, { "epoch": 3.89, "learning_rate": 3.9035087719298244e-05, "loss": 0.412, "step": 445 }, { "epoch": 3.94, "learning_rate": 3.9473684210526316e-05, "loss": 0.3844, "step": 450 }, { "epoch": 3.98, "learning_rate": 3.991228070175439e-05, "loss": 0.2874, "step": 455 }, { "epoch": 4.0, "eval_accuracy": 0.9326765188834154, "eval_loss": 0.20270153880119324, "eval_runtime": 19.2109, "eval_samples_per_second": 63.401, "eval_steps_per_second": 7.964, "step": 457 }, { "epoch": 4.03, "learning_rate": 4.0350877192982455e-05, "loss": 0.4108, "step": 460 }, { "epoch": 4.07, "learning_rate": 4.078947368421053e-05, "loss": 0.3113, "step": 465 }, { "epoch": 4.11, "learning_rate": 4.12280701754386e-05, "loss": 0.3695, "step": 470 }, { "epoch": 4.16, "learning_rate": 4.166666666666667e-05, "loss": 0.4312, "step": 475 }, { "epoch": 4.2, "learning_rate": 4.210526315789474e-05, "loss": 0.5009, "step": 480 }, { "epoch": 4.25, "learning_rate": 4.254385964912281e-05, "loss": 0.4491, "step": 485 }, { "epoch": 4.29, "learning_rate": 4.298245614035088e-05, "loss": 0.3298, "step": 490 }, { "epoch": 4.33, "learning_rate": 4.342105263157895e-05, "loss": 0.3188, "step": 495 }, { "epoch": 4.38, "learning_rate": 4.3859649122807014e-05, "loss": 0.3462, "step": 500 }, { "epoch": 4.42, "learning_rate": 4.429824561403509e-05, "loss": 0.3797, "step": 505 }, { "epoch": 4.46, "learning_rate": 4.473684210526316e-05, "loss": 0.412, "step": 510 }, { "epoch": 4.51, "learning_rate": 4.517543859649123e-05, "loss": 0.3163, "step": 515 }, { "epoch": 4.55, "learning_rate": 4.56140350877193e-05, "loss": 0.4202, "step": 520 }, { "epoch": 4.6, "learning_rate": 4.605263157894737e-05, "loss": 0.3902, "step": 525 }, { "epoch": 4.64, "learning_rate": 4.649122807017544e-05, "loss": 0.3598, "step": 530 }, { "epoch": 4.68, "learning_rate": 4.6929824561403515e-05, "loss": 0.2675, "step": 535 }, { "epoch": 4.73, "learning_rate": 4.736842105263158e-05, "loss": 0.3208, "step": 540 }, { "epoch": 4.77, "learning_rate": 4.780701754385965e-05, "loss": 0.3622, "step": 545 }, { "epoch": 4.81, "learning_rate": 4.824561403508772e-05, "loss": 0.3299, "step": 550 }, { "epoch": 4.86, "learning_rate": 4.868421052631579e-05, "loss": 0.2825, "step": 555 }, { "epoch": 4.9, "learning_rate": 4.912280701754386e-05, "loss": 0.4182, "step": 560 }, { "epoch": 4.95, "learning_rate": 4.956140350877193e-05, "loss": 0.282, "step": 565 }, { "epoch": 4.99, "learning_rate": 5e-05, "loss": 0.355, "step": 570 }, { "epoch": 5.0, "eval_accuracy": 0.9400656814449918, "eval_loss": 0.1666150987148285, "eval_runtime": 19.15, "eval_samples_per_second": 63.603, "eval_steps_per_second": 7.99, "step": 571 }, { "epoch": 5.03, "learning_rate": 4.956140350877193e-05, "loss": 0.3916, "step": 575 }, { "epoch": 5.08, "learning_rate": 4.912280701754386e-05, "loss": 0.3083, "step": 580 }, { "epoch": 5.12, "learning_rate": 4.868421052631579e-05, "loss": 0.2193, "step": 585 }, { "epoch": 5.16, "learning_rate": 4.824561403508772e-05, "loss": 0.2596, "step": 590 }, { "epoch": 5.21, "learning_rate": 4.780701754385965e-05, "loss": 0.2697, "step": 595 }, { "epoch": 5.25, "learning_rate": 4.736842105263158e-05, "loss": 0.2519, "step": 600 }, { "epoch": 5.3, "learning_rate": 4.6929824561403515e-05, "loss": 0.3345, "step": 605 }, { "epoch": 5.34, "learning_rate": 4.649122807017544e-05, "loss": 0.2694, "step": 610 }, { "epoch": 5.38, "learning_rate": 4.605263157894737e-05, "loss": 0.2815, "step": 615 }, { "epoch": 5.43, "learning_rate": 4.56140350877193e-05, "loss": 0.2315, "step": 620 }, { "epoch": 5.47, "learning_rate": 4.517543859649123e-05, "loss": 0.2701, "step": 625 }, { "epoch": 5.51, "learning_rate": 4.473684210526316e-05, "loss": 0.2506, "step": 630 }, { "epoch": 5.56, "learning_rate": 4.429824561403509e-05, "loss": 0.2141, "step": 635 }, { "epoch": 5.6, "learning_rate": 4.3859649122807014e-05, "loss": 0.2323, "step": 640 }, { "epoch": 5.65, "learning_rate": 4.342105263157895e-05, "loss": 0.2465, "step": 645 }, { "epoch": 5.69, "learning_rate": 4.298245614035088e-05, "loss": 0.3737, "step": 650 }, { "epoch": 5.73, "learning_rate": 4.254385964912281e-05, "loss": 0.3958, "step": 655 }, { "epoch": 5.78, "learning_rate": 4.210526315789474e-05, "loss": 0.4959, "step": 660 }, { "epoch": 5.82, "learning_rate": 4.166666666666667e-05, "loss": 0.3131, "step": 665 }, { "epoch": 5.86, "learning_rate": 4.12280701754386e-05, "loss": 0.2104, "step": 670 }, { "epoch": 5.91, "learning_rate": 4.078947368421053e-05, "loss": 0.2501, "step": 675 }, { "epoch": 5.95, "learning_rate": 4.0350877192982455e-05, "loss": 0.3094, "step": 680 }, { "epoch": 6.0, "learning_rate": 3.991228070175439e-05, "loss": 0.2493, "step": 685 }, { "epoch": 6.0, "eval_accuracy": 0.9655172413793104, "eval_loss": 0.09691629558801651, "eval_runtime": 19.1884, "eval_samples_per_second": 63.476, "eval_steps_per_second": 7.974, "step": 685 }, { "epoch": 6.04, "learning_rate": 3.9473684210526316e-05, "loss": 0.2233, "step": 690 }, { "epoch": 6.08, "learning_rate": 3.9035087719298244e-05, "loss": 0.1551, "step": 695 }, { "epoch": 6.13, "learning_rate": 3.859649122807018e-05, "loss": 0.2415, "step": 700 }, { "epoch": 6.17, "learning_rate": 3.815789473684211e-05, "loss": 0.1373, "step": 705 }, { "epoch": 6.21, "learning_rate": 3.771929824561404e-05, "loss": 0.2754, "step": 710 }, { "epoch": 6.26, "learning_rate": 3.728070175438597e-05, "loss": 0.2255, "step": 715 }, { "epoch": 6.3, "learning_rate": 3.6842105263157895e-05, "loss": 0.156, "step": 720 }, { "epoch": 6.35, "learning_rate": 3.640350877192983e-05, "loss": 0.2596, "step": 725 }, { "epoch": 6.39, "learning_rate": 3.5964912280701756e-05, "loss": 0.2366, "step": 730 }, { "epoch": 6.43, "learning_rate": 3.5526315789473684e-05, "loss": 0.2985, "step": 735 }, { "epoch": 6.48, "learning_rate": 3.508771929824561e-05, "loss": 0.1712, "step": 740 }, { "epoch": 6.52, "learning_rate": 3.4649122807017546e-05, "loss": 0.1222, "step": 745 }, { "epoch": 6.56, "learning_rate": 3.421052631578947e-05, "loss": 0.1547, "step": 750 }, { "epoch": 6.61, "learning_rate": 3.377192982456141e-05, "loss": 0.1938, "step": 755 }, { "epoch": 6.65, "learning_rate": 3.3333333333333335e-05, "loss": 0.1712, "step": 760 }, { "epoch": 6.7, "learning_rate": 3.289473684210527e-05, "loss": 0.1423, "step": 765 }, { "epoch": 6.74, "learning_rate": 3.24561403508772e-05, "loss": 0.2396, "step": 770 }, { "epoch": 6.78, "learning_rate": 3.2017543859649124e-05, "loss": 0.2153, "step": 775 }, { "epoch": 6.83, "learning_rate": 3.157894736842105e-05, "loss": 0.2239, "step": 780 }, { "epoch": 6.87, "learning_rate": 3.1140350877192986e-05, "loss": 0.0997, "step": 785 }, { "epoch": 6.91, "learning_rate": 3.0701754385964913e-05, "loss": 0.1459, "step": 790 }, { "epoch": 6.96, "learning_rate": 3.0263157894736844e-05, "loss": 0.1909, "step": 795 }, { "epoch": 6.99, "eval_accuracy": 0.9835796387520526, "eval_loss": 0.05583478882908821, "eval_runtime": 19.3251, "eval_samples_per_second": 63.027, "eval_steps_per_second": 7.917, "step": 799 }, { "epoch": 7.0, "learning_rate": 2.9824561403508772e-05, "loss": 0.1693, "step": 800 }, { "epoch": 7.05, "learning_rate": 2.9385964912280706e-05, "loss": 0.1497, "step": 805 }, { "epoch": 7.09, "learning_rate": 2.8947368421052634e-05, "loss": 0.2175, "step": 810 }, { "epoch": 7.13, "learning_rate": 2.850877192982456e-05, "loss": 0.1438, "step": 815 }, { "epoch": 7.18, "learning_rate": 2.8070175438596492e-05, "loss": 0.1668, "step": 820 }, { "epoch": 7.22, "learning_rate": 2.7631578947368426e-05, "loss": 0.1601, "step": 825 }, { "epoch": 7.26, "learning_rate": 2.7192982456140354e-05, "loss": 0.0939, "step": 830 }, { "epoch": 7.31, "learning_rate": 2.675438596491228e-05, "loss": 0.1289, "step": 835 }, { "epoch": 7.35, "learning_rate": 2.6315789473684212e-05, "loss": 0.1687, "step": 840 }, { "epoch": 7.4, "learning_rate": 2.5877192982456143e-05, "loss": 0.1336, "step": 845 }, { "epoch": 7.44, "learning_rate": 2.5438596491228074e-05, "loss": 0.1448, "step": 850 }, { "epoch": 7.48, "learning_rate": 2.5e-05, "loss": 0.2031, "step": 855 }, { "epoch": 7.53, "learning_rate": 2.456140350877193e-05, "loss": 0.1245, "step": 860 }, { "epoch": 7.57, "learning_rate": 2.412280701754386e-05, "loss": 0.1954, "step": 865 }, { "epoch": 7.61, "learning_rate": 2.368421052631579e-05, "loss": 0.2028, "step": 870 }, { "epoch": 7.66, "learning_rate": 2.324561403508772e-05, "loss": 0.1675, "step": 875 }, { "epoch": 7.7, "learning_rate": 2.280701754385965e-05, "loss": 0.1371, "step": 880 }, { "epoch": 7.75, "learning_rate": 2.236842105263158e-05, "loss": 0.1443, "step": 885 }, { "epoch": 7.79, "learning_rate": 2.1929824561403507e-05, "loss": 0.2014, "step": 890 }, { "epoch": 7.83, "learning_rate": 2.149122807017544e-05, "loss": 0.1106, "step": 895 }, { "epoch": 7.88, "learning_rate": 2.105263157894737e-05, "loss": 0.1049, "step": 900 }, { "epoch": 7.92, "learning_rate": 2.06140350877193e-05, "loss": 0.0893, "step": 905 }, { "epoch": 7.96, "learning_rate": 2.0175438596491227e-05, "loss": 0.1821, "step": 910 }, { "epoch": 8.0, "eval_accuracy": 0.9901477832512315, "eval_loss": 0.041182905435562134, "eval_runtime": 19.2291, "eval_samples_per_second": 63.341, "eval_steps_per_second": 7.957, "step": 914 }, { "epoch": 8.01, "learning_rate": 1.9736842105263158e-05, "loss": 0.137, "step": 915 }, { "epoch": 8.05, "learning_rate": 1.929824561403509e-05, "loss": 0.1225, "step": 920 }, { "epoch": 8.1, "learning_rate": 1.885964912280702e-05, "loss": 0.1465, "step": 925 }, { "epoch": 8.14, "learning_rate": 1.8421052631578947e-05, "loss": 0.1588, "step": 930 }, { "epoch": 8.18, "learning_rate": 1.7982456140350878e-05, "loss": 0.163, "step": 935 }, { "epoch": 8.23, "learning_rate": 1.7543859649122806e-05, "loss": 0.0645, "step": 940 }, { "epoch": 8.27, "learning_rate": 1.7105263157894737e-05, "loss": 0.0546, "step": 945 }, { "epoch": 8.32, "learning_rate": 1.6666666666666667e-05, "loss": 0.0346, "step": 950 }, { "epoch": 8.36, "learning_rate": 1.62280701754386e-05, "loss": 0.099, "step": 955 }, { "epoch": 8.4, "learning_rate": 1.5789473684210526e-05, "loss": 0.1723, "step": 960 }, { "epoch": 8.45, "learning_rate": 1.5350877192982457e-05, "loss": 0.1102, "step": 965 }, { "epoch": 8.49, "learning_rate": 1.4912280701754386e-05, "loss": 0.1221, "step": 970 }, { "epoch": 8.53, "learning_rate": 1.4473684210526317e-05, "loss": 0.1574, "step": 975 }, { "epoch": 8.58, "learning_rate": 1.4035087719298246e-05, "loss": 0.0542, "step": 980 }, { "epoch": 8.62, "learning_rate": 1.3596491228070177e-05, "loss": 0.121, "step": 985 }, { "epoch": 8.67, "learning_rate": 1.3157894736842106e-05, "loss": 0.0355, "step": 990 }, { "epoch": 8.71, "learning_rate": 1.2719298245614037e-05, "loss": 0.0715, "step": 995 }, { "epoch": 8.75, "learning_rate": 1.2280701754385964e-05, "loss": 0.0456, "step": 1000 }, { "epoch": 8.8, "learning_rate": 1.1842105263157895e-05, "loss": 0.0774, "step": 1005 }, { "epoch": 8.84, "learning_rate": 1.1403508771929824e-05, "loss": 0.0309, "step": 1010 }, { "epoch": 8.88, "learning_rate": 1.0964912280701754e-05, "loss": 0.1048, "step": 1015 }, { "epoch": 8.93, "learning_rate": 1.0526315789473684e-05, "loss": 0.1286, "step": 1020 }, { "epoch": 8.97, "learning_rate": 1.0087719298245614e-05, "loss": 0.1853, "step": 1025 }, { "epoch": 9.0, "eval_accuracy": 0.9942528735632183, "eval_loss": 0.02394494228065014, "eval_runtime": 19.1714, "eval_samples_per_second": 63.532, "eval_steps_per_second": 7.981, "step": 1028 }, { "epoch": 9.02, "learning_rate": 9.649122807017545e-06, "loss": 0.0881, "step": 1030 }, { "epoch": 9.06, "learning_rate": 9.210526315789474e-06, "loss": 0.0734, "step": 1035 }, { "epoch": 9.1, "learning_rate": 8.771929824561403e-06, "loss": 0.0274, "step": 1040 }, { "epoch": 9.15, "learning_rate": 8.333333333333334e-06, "loss": 0.1102, "step": 1045 }, { "epoch": 9.19, "learning_rate": 7.894736842105263e-06, "loss": 0.0844, "step": 1050 }, { "epoch": 9.23, "learning_rate": 7.456140350877193e-06, "loss": 0.0348, "step": 1055 }, { "epoch": 9.28, "learning_rate": 7.017543859649123e-06, "loss": 0.1314, "step": 1060 }, { "epoch": 9.32, "learning_rate": 6.578947368421053e-06, "loss": 0.1114, "step": 1065 }, { "epoch": 9.37, "learning_rate": 6.140350877192982e-06, "loss": 0.0509, "step": 1070 }, { "epoch": 9.41, "learning_rate": 5.701754385964912e-06, "loss": 0.0867, "step": 1075 }, { "epoch": 9.45, "learning_rate": 5.263157894736842e-06, "loss": 0.0165, "step": 1080 }, { "epoch": 9.5, "learning_rate": 4.824561403508772e-06, "loss": 0.1223, "step": 1085 }, { "epoch": 9.54, "learning_rate": 4.3859649122807014e-06, "loss": 0.0802, "step": 1090 }, { "epoch": 9.58, "learning_rate": 3.9473684210526315e-06, "loss": 0.1373, "step": 1095 }, { "epoch": 9.63, "learning_rate": 3.5087719298245615e-06, "loss": 0.0855, "step": 1100 }, { "epoch": 9.67, "learning_rate": 3.070175438596491e-06, "loss": 0.0934, "step": 1105 }, { "epoch": 9.72, "learning_rate": 2.631578947368421e-06, "loss": 0.0544, "step": 1110 }, { "epoch": 9.76, "learning_rate": 2.1929824561403507e-06, "loss": 0.0704, "step": 1115 }, { "epoch": 9.8, "learning_rate": 1.7543859649122807e-06, "loss": 0.0644, "step": 1120 }, { "epoch": 9.85, "learning_rate": 1.3157894736842106e-06, "loss": 0.064, "step": 1125 }, { "epoch": 9.89, "learning_rate": 8.771929824561404e-07, "loss": 0.0153, "step": 1130 }, { "epoch": 9.93, "learning_rate": 4.385964912280702e-07, "loss": 0.0909, "step": 1135 }, { "epoch": 9.98, "learning_rate": 0.0, "loss": 0.0666, "step": 1140 }, { "epoch": 9.98, "eval_accuracy": 0.9950738916256158, "eval_loss": 0.011206304654479027, "eval_runtime": 19.1932, "eval_samples_per_second": 63.46, "eval_steps_per_second": 7.972, "step": 1140 }, { "epoch": 9.98, "step": 1140, "total_flos": 8.390377142257582e+18, "train_loss": 0.46324286803341747, "train_runtime": 2845.9625, "train_samples_per_second": 12.829, "train_steps_per_second": 0.401 } ], "logging_steps": 5, "max_steps": 1140, "num_train_epochs": 10, "save_steps": 500, "total_flos": 8.390377142257582e+18, "trial_name": null, "trial_params": null }