{ "best_metric": 0.9975369458128078, "best_model_checkpoint": "swinv2-large-patch4-window12to16-192to256-22kto1k-ft-finetuned-Lesion-Classification-HAM10000-S/checkpoint-1140", "epoch": 9.978118161925602, "global_step": 1140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.385964912280702e-07, "loss": 2.0503, "step": 5 }, { "epoch": 0.09, "learning_rate": 8.771929824561404e-07, "loss": 1.9978, "step": 10 }, { "epoch": 0.13, "learning_rate": 1.3157894736842106e-06, "loss": 1.9865, "step": 15 }, { "epoch": 0.18, "learning_rate": 1.7543859649122807e-06, "loss": 2.0066, "step": 20 }, { "epoch": 0.22, "learning_rate": 2.1929824561403507e-06, "loss": 1.9463, "step": 25 }, { "epoch": 0.26, "learning_rate": 2.631578947368421e-06, "loss": 1.94, "step": 30 }, { "epoch": 0.31, "learning_rate": 3.070175438596491e-06, "loss": 1.9458, "step": 35 }, { "epoch": 0.35, "learning_rate": 3.5087719298245615e-06, "loss": 1.8351, "step": 40 }, { "epoch": 0.39, "learning_rate": 3.9473684210526315e-06, "loss": 1.8686, "step": 45 }, { "epoch": 0.44, "learning_rate": 4.3859649122807014e-06, "loss": 1.8466, "step": 50 }, { "epoch": 0.48, "learning_rate": 4.824561403508772e-06, "loss": 1.8131, "step": 55 }, { "epoch": 0.53, "learning_rate": 5.263157894736842e-06, "loss": 1.7527, "step": 60 }, { "epoch": 0.57, "learning_rate": 5.701754385964912e-06, "loss": 1.7692, "step": 65 }, { "epoch": 0.61, "learning_rate": 6.140350877192982e-06, "loss": 1.6492, "step": 70 }, { "epoch": 0.66, "learning_rate": 6.578947368421053e-06, "loss": 1.5949, "step": 75 }, { "epoch": 0.7, "learning_rate": 7.017543859649123e-06, "loss": 1.5051, "step": 80 }, { "epoch": 0.74, "learning_rate": 7.456140350877193e-06, "loss": 1.5077, "step": 85 }, { "epoch": 0.79, "learning_rate": 7.894736842105263e-06, "loss": 1.3687, "step": 90 }, { "epoch": 0.83, "learning_rate": 8.333333333333334e-06, "loss": 1.3231, "step": 95 }, { "epoch": 0.88, "learning_rate": 8.771929824561403e-06, "loss": 1.3126, "step": 100 }, { "epoch": 0.92, "learning_rate": 9.210526315789474e-06, "loss": 1.1667, "step": 105 }, { "epoch": 0.96, "learning_rate": 9.649122807017545e-06, "loss": 1.2378, "step": 110 }, { "epoch": 1.0, "eval_accuracy": 0.5935960591133005, "eval_loss": 0.997550368309021, "eval_runtime": 959.756, "eval_samples_per_second": 1.269, "eval_steps_per_second": 0.159, "step": 114 }, { "epoch": 1.01, "learning_rate": 1.0087719298245614e-05, "loss": 1.0408, "step": 115 }, { "epoch": 1.05, "learning_rate": 1.0526315789473684e-05, "loss": 1.0499, "step": 120 }, { "epoch": 1.09, "learning_rate": 1.0964912280701754e-05, "loss": 0.9867, "step": 125 }, { "epoch": 1.14, "learning_rate": 1.1403508771929824e-05, "loss": 0.9692, "step": 130 }, { "epoch": 1.18, "learning_rate": 1.1842105263157895e-05, "loss": 1.0219, "step": 135 }, { "epoch": 1.23, "learning_rate": 1.2280701754385964e-05, "loss": 0.9686, "step": 140 }, { "epoch": 1.27, "learning_rate": 1.2719298245614037e-05, "loss": 0.942, "step": 145 }, { "epoch": 1.31, "learning_rate": 1.3157894736842106e-05, "loss": 0.9243, "step": 150 }, { "epoch": 1.36, "learning_rate": 1.3596491228070177e-05, "loss": 0.791, "step": 155 }, { "epoch": 1.4, "learning_rate": 1.4035087719298246e-05, "loss": 0.8906, "step": 160 }, { "epoch": 1.44, "learning_rate": 1.4473684210526317e-05, "loss": 0.7858, "step": 165 }, { "epoch": 1.49, "learning_rate": 1.4912280701754386e-05, "loss": 0.7899, "step": 170 }, { "epoch": 1.53, "learning_rate": 1.5350877192982457e-05, "loss": 0.7818, "step": 175 }, { "epoch": 1.58, "learning_rate": 1.5789473684210526e-05, "loss": 0.7952, "step": 180 }, { "epoch": 1.62, "learning_rate": 1.62280701754386e-05, "loss": 0.7489, "step": 185 }, { "epoch": 1.66, "learning_rate": 1.6666666666666667e-05, "loss": 0.7705, "step": 190 }, { "epoch": 1.71, "learning_rate": 1.7105263157894737e-05, "loss": 0.917, "step": 195 }, { "epoch": 1.75, "learning_rate": 1.7543859649122806e-05, "loss": 0.8671, "step": 200 }, { "epoch": 1.79, "learning_rate": 1.7982456140350878e-05, "loss": 0.8763, "step": 205 }, { "epoch": 1.84, "learning_rate": 1.8421052631578947e-05, "loss": 0.6994, "step": 210 }, { "epoch": 1.88, "learning_rate": 1.885964912280702e-05, "loss": 0.6047, "step": 215 }, { "epoch": 1.93, "learning_rate": 1.929824561403509e-05, "loss": 0.6536, "step": 220 }, { "epoch": 1.97, "learning_rate": 1.9736842105263158e-05, "loss": 0.7272, "step": 225 }, { "epoch": 2.0, "eval_accuracy": 0.8308702791461412, "eval_loss": 0.4748782813549042, "eval_runtime": 51.2882, "eval_samples_per_second": 23.748, "eval_steps_per_second": 2.983, "step": 228 }, { "epoch": 2.01, "learning_rate": 2.0175438596491227e-05, "loss": 0.6975, "step": 230 }, { "epoch": 2.06, "learning_rate": 2.06140350877193e-05, "loss": 0.5554, "step": 235 }, { "epoch": 2.1, "learning_rate": 2.105263157894737e-05, "loss": 0.5656, "step": 240 }, { "epoch": 2.14, "learning_rate": 2.149122807017544e-05, "loss": 0.6029, "step": 245 }, { "epoch": 2.19, "learning_rate": 2.1929824561403507e-05, "loss": 0.5438, "step": 250 }, { "epoch": 2.23, "learning_rate": 2.236842105263158e-05, "loss": 0.6093, "step": 255 }, { "epoch": 2.28, "learning_rate": 2.280701754385965e-05, "loss": 0.6688, "step": 260 }, { "epoch": 2.32, "learning_rate": 2.324561403508772e-05, "loss": 0.6388, "step": 265 }, { "epoch": 2.36, "learning_rate": 2.368421052631579e-05, "loss": 0.5758, "step": 270 }, { "epoch": 2.41, "learning_rate": 2.412280701754386e-05, "loss": 0.6343, "step": 275 }, { "epoch": 2.45, "learning_rate": 2.456140350877193e-05, "loss": 0.7233, "step": 280 }, { "epoch": 2.49, "learning_rate": 2.5e-05, "loss": 0.4706, "step": 285 }, { "epoch": 2.54, "learning_rate": 2.5438596491228074e-05, "loss": 0.5836, "step": 290 }, { "epoch": 2.58, "learning_rate": 2.5877192982456143e-05, "loss": 0.5001, "step": 295 }, { "epoch": 2.63, "learning_rate": 2.6315789473684212e-05, "loss": 0.6911, "step": 300 }, { "epoch": 2.67, "learning_rate": 2.675438596491228e-05, "loss": 0.7413, "step": 305 }, { "epoch": 2.71, "learning_rate": 2.7192982456140354e-05, "loss": 0.5479, "step": 310 }, { "epoch": 2.76, "learning_rate": 2.7631578947368426e-05, "loss": 0.4444, "step": 315 }, { "epoch": 2.8, "learning_rate": 2.8070175438596492e-05, "loss": 0.5584, "step": 320 }, { "epoch": 2.84, "learning_rate": 2.850877192982456e-05, "loss": 0.3468, "step": 325 }, { "epoch": 2.89, "learning_rate": 2.8947368421052634e-05, "loss": 0.6219, "step": 330 }, { "epoch": 2.93, "learning_rate": 2.9385964912280706e-05, "loss": 0.5142, "step": 335 }, { "epoch": 2.98, "learning_rate": 2.9824561403508772e-05, "loss": 0.4335, "step": 340 }, { "epoch": 2.99, "eval_accuracy": 0.9195402298850575, "eval_loss": 0.24883201718330383, "eval_runtime": 51.2735, "eval_samples_per_second": 23.755, "eval_steps_per_second": 2.984, "step": 342 }, { "epoch": 3.02, "learning_rate": 3.0263157894736844e-05, "loss": 0.4105, "step": 345 }, { "epoch": 3.06, "learning_rate": 3.0701754385964913e-05, "loss": 0.4176, "step": 350 }, { "epoch": 3.11, "learning_rate": 3.1140350877192986e-05, "loss": 0.5252, "step": 355 }, { "epoch": 3.15, "learning_rate": 3.157894736842105e-05, "loss": 0.4416, "step": 360 }, { "epoch": 3.19, "learning_rate": 3.2017543859649124e-05, "loss": 0.4243, "step": 365 }, { "epoch": 3.24, "learning_rate": 3.24561403508772e-05, "loss": 0.3625, "step": 370 }, { "epoch": 3.28, "learning_rate": 3.289473684210527e-05, "loss": 0.3327, "step": 375 }, { "epoch": 3.33, "learning_rate": 3.3333333333333335e-05, "loss": 0.3239, "step": 380 }, { "epoch": 3.37, "learning_rate": 3.377192982456141e-05, "loss": 0.4551, "step": 385 }, { "epoch": 3.41, "learning_rate": 3.421052631578947e-05, "loss": 0.4704, "step": 390 }, { "epoch": 3.46, "learning_rate": 3.4649122807017546e-05, "loss": 0.442, "step": 395 }, { "epoch": 3.5, "learning_rate": 3.508771929824561e-05, "loss": 0.3715, "step": 400 }, { "epoch": 3.54, "learning_rate": 3.5526315789473684e-05, "loss": 0.3742, "step": 405 }, { "epoch": 3.59, "learning_rate": 3.5964912280701756e-05, "loss": 0.3209, "step": 410 }, { "epoch": 3.63, "learning_rate": 3.640350877192983e-05, "loss": 0.3335, "step": 415 }, { "epoch": 3.68, "learning_rate": 3.6842105263157895e-05, "loss": 0.3519, "step": 420 }, { "epoch": 3.72, "learning_rate": 3.728070175438597e-05, "loss": 0.3011, "step": 425 }, { "epoch": 3.76, "learning_rate": 3.771929824561404e-05, "loss": 0.4543, "step": 430 }, { "epoch": 3.81, "learning_rate": 3.815789473684211e-05, "loss": 0.3693, "step": 435 }, { "epoch": 3.85, "learning_rate": 3.859649122807018e-05, "loss": 0.3256, "step": 440 }, { "epoch": 3.89, "learning_rate": 3.9035087719298244e-05, "loss": 0.3463, "step": 445 }, { "epoch": 3.94, "learning_rate": 3.9473684210526316e-05, "loss": 0.3209, "step": 450 }, { "epoch": 3.98, "learning_rate": 3.991228070175439e-05, "loss": 0.3298, "step": 455 }, { "epoch": 4.0, "eval_accuracy": 0.9310344827586207, "eval_loss": 0.17004548013210297, "eval_runtime": 51.2681, "eval_samples_per_second": 23.757, "eval_steps_per_second": 2.984, "step": 457 }, { "epoch": 4.03, "learning_rate": 4.0350877192982455e-05, "loss": 0.3795, "step": 460 }, { "epoch": 4.07, "learning_rate": 4.078947368421053e-05, "loss": 0.2746, "step": 465 }, { "epoch": 4.11, "learning_rate": 4.12280701754386e-05, "loss": 0.2629, "step": 470 }, { "epoch": 4.16, "learning_rate": 4.166666666666667e-05, "loss": 0.3851, "step": 475 }, { "epoch": 4.2, "learning_rate": 4.210526315789474e-05, "loss": 0.3722, "step": 480 }, { "epoch": 4.25, "learning_rate": 4.254385964912281e-05, "loss": 0.2845, "step": 485 }, { "epoch": 4.29, "learning_rate": 4.298245614035088e-05, "loss": 0.313, "step": 490 }, { "epoch": 4.33, "learning_rate": 4.342105263157895e-05, "loss": 0.2468, "step": 495 }, { "epoch": 4.38, "learning_rate": 4.3859649122807014e-05, "loss": 0.3358, "step": 500 }, { "epoch": 4.42, "learning_rate": 4.429824561403509e-05, "loss": 0.4219, "step": 505 }, { "epoch": 4.46, "learning_rate": 4.473684210526316e-05, "loss": 0.3461, "step": 510 }, { "epoch": 4.51, "learning_rate": 4.517543859649123e-05, "loss": 0.3383, "step": 515 }, { "epoch": 4.55, "learning_rate": 4.56140350877193e-05, "loss": 0.3851, "step": 520 }, { "epoch": 4.6, "learning_rate": 4.605263157894737e-05, "loss": 0.3833, "step": 525 }, { "epoch": 4.64, "learning_rate": 4.649122807017544e-05, "loss": 0.4285, "step": 530 }, { "epoch": 4.68, "learning_rate": 4.6929824561403515e-05, "loss": 0.5201, "step": 535 }, { "epoch": 4.73, "learning_rate": 4.736842105263158e-05, "loss": 0.3688, "step": 540 }, { "epoch": 4.77, "learning_rate": 4.780701754385965e-05, "loss": 0.2883, "step": 545 }, { "epoch": 4.81, "learning_rate": 4.824561403508772e-05, "loss": 0.2787, "step": 550 }, { "epoch": 4.86, "learning_rate": 4.868421052631579e-05, "loss": 0.1929, "step": 555 }, { "epoch": 4.9, "learning_rate": 4.912280701754386e-05, "loss": 0.2865, "step": 560 }, { "epoch": 4.95, "learning_rate": 4.956140350877193e-05, "loss": 0.2997, "step": 565 }, { "epoch": 4.99, "learning_rate": 5e-05, "loss": 0.177, "step": 570 }, { "epoch": 5.0, "eval_accuracy": 0.9261083743842364, "eval_loss": 0.21159406006336212, "eval_runtime": 51.2395, "eval_samples_per_second": 23.771, "eval_steps_per_second": 2.986, "step": 571 }, { "epoch": 5.03, "learning_rate": 4.956140350877193e-05, "loss": 0.3122, "step": 575 }, { "epoch": 5.08, "learning_rate": 4.912280701754386e-05, "loss": 0.3343, "step": 580 }, { "epoch": 5.12, "learning_rate": 4.868421052631579e-05, "loss": 0.5452, "step": 585 }, { "epoch": 5.16, "learning_rate": 4.824561403508772e-05, "loss": 0.2825, "step": 590 }, { "epoch": 5.21, "learning_rate": 4.780701754385965e-05, "loss": 0.345, "step": 595 }, { "epoch": 5.25, "learning_rate": 4.736842105263158e-05, "loss": 0.3114, "step": 600 }, { "epoch": 5.3, "learning_rate": 4.6929824561403515e-05, "loss": 0.236, "step": 605 }, { "epoch": 5.34, "learning_rate": 4.649122807017544e-05, "loss": 0.1891, "step": 610 }, { "epoch": 5.38, "learning_rate": 4.605263157894737e-05, "loss": 0.2101, "step": 615 }, { "epoch": 5.43, "learning_rate": 4.56140350877193e-05, "loss": 0.3216, "step": 620 }, { "epoch": 5.47, "learning_rate": 4.517543859649123e-05, "loss": 0.2823, "step": 625 }, { "epoch": 5.51, "learning_rate": 4.473684210526316e-05, "loss": 0.3153, "step": 630 }, { "epoch": 5.56, "learning_rate": 4.429824561403509e-05, "loss": 0.3139, "step": 635 }, { "epoch": 5.6, "learning_rate": 4.3859649122807014e-05, "loss": 0.3497, "step": 640 }, { "epoch": 5.65, "learning_rate": 4.342105263157895e-05, "loss": 0.2596, "step": 645 }, { "epoch": 5.69, "learning_rate": 4.298245614035088e-05, "loss": 0.3244, "step": 650 }, { "epoch": 5.73, "learning_rate": 4.254385964912281e-05, "loss": 0.2692, "step": 655 }, { "epoch": 5.78, "learning_rate": 4.210526315789474e-05, "loss": 0.2558, "step": 660 }, { "epoch": 5.82, "learning_rate": 4.166666666666667e-05, "loss": 0.234, "step": 665 }, { "epoch": 5.86, "learning_rate": 4.12280701754386e-05, "loss": 0.3071, "step": 670 }, { "epoch": 5.91, "learning_rate": 4.078947368421053e-05, "loss": 0.1587, "step": 675 }, { "epoch": 5.95, "learning_rate": 4.0350877192982455e-05, "loss": 0.2624, "step": 680 }, { "epoch": 6.0, "learning_rate": 3.991228070175439e-05, "loss": 0.2299, "step": 685 }, { "epoch": 6.0, "eval_accuracy": 0.9753694581280788, "eval_loss": 0.09334532916545868, "eval_runtime": 51.1765, "eval_samples_per_second": 23.8, "eval_steps_per_second": 2.99, "step": 685 }, { "epoch": 6.04, "learning_rate": 3.9473684210526316e-05, "loss": 0.1728, "step": 690 }, { "epoch": 6.08, "learning_rate": 3.9035087719298244e-05, "loss": 0.1751, "step": 695 }, { "epoch": 6.13, "learning_rate": 3.859649122807018e-05, "loss": 0.2653, "step": 700 }, { "epoch": 6.17, "learning_rate": 3.815789473684211e-05, "loss": 0.2508, "step": 705 }, { "epoch": 6.21, "learning_rate": 3.771929824561404e-05, "loss": 0.1909, "step": 710 }, { "epoch": 6.26, "learning_rate": 3.728070175438597e-05, "loss": 0.2278, "step": 715 }, { "epoch": 6.3, "learning_rate": 3.6842105263157895e-05, "loss": 0.2178, "step": 720 }, { "epoch": 6.35, "learning_rate": 3.640350877192983e-05, "loss": 0.2482, "step": 725 }, { "epoch": 6.39, "learning_rate": 3.5964912280701756e-05, "loss": 0.2605, "step": 730 }, { "epoch": 6.43, "learning_rate": 3.5526315789473684e-05, "loss": 0.3173, "step": 735 }, { "epoch": 6.48, "learning_rate": 3.508771929824561e-05, "loss": 0.1419, "step": 740 }, { "epoch": 6.52, "learning_rate": 3.4649122807017546e-05, "loss": 0.2116, "step": 745 }, { "epoch": 6.56, "learning_rate": 3.421052631578947e-05, "loss": 0.1438, "step": 750 }, { "epoch": 6.61, "learning_rate": 3.377192982456141e-05, "loss": 0.1522, "step": 755 }, { "epoch": 6.65, "learning_rate": 3.3333333333333335e-05, "loss": 0.1583, "step": 760 }, { "epoch": 6.7, "learning_rate": 3.289473684210527e-05, "loss": 0.1541, "step": 765 }, { "epoch": 6.74, "learning_rate": 3.24561403508772e-05, "loss": 0.2615, "step": 770 }, { "epoch": 6.78, "learning_rate": 3.2017543859649124e-05, "loss": 0.1603, "step": 775 }, { "epoch": 6.83, "learning_rate": 3.157894736842105e-05, "loss": 0.1657, "step": 780 }, { "epoch": 6.87, "learning_rate": 3.1140350877192986e-05, "loss": 0.1074, "step": 785 }, { "epoch": 6.91, "learning_rate": 3.0701754385964913e-05, "loss": 0.1414, "step": 790 }, { "epoch": 6.96, "learning_rate": 3.0263157894736844e-05, "loss": 0.2586, "step": 795 }, { "epoch": 6.99, "eval_accuracy": 0.986863711001642, "eval_loss": 0.031560178846120834, "eval_runtime": 51.3582, "eval_samples_per_second": 23.716, "eval_steps_per_second": 2.979, "step": 799 }, { "epoch": 7.0, "learning_rate": 2.9824561403508772e-05, "loss": 0.1753, "step": 800 }, { "epoch": 7.05, "learning_rate": 2.9385964912280706e-05, "loss": 0.1409, "step": 805 }, { "epoch": 7.09, "learning_rate": 2.8947368421052634e-05, "loss": 0.0889, "step": 810 }, { "epoch": 7.13, "learning_rate": 2.850877192982456e-05, "loss": 0.1245, "step": 815 }, { "epoch": 7.18, "learning_rate": 2.8070175438596492e-05, "loss": 0.116, "step": 820 }, { "epoch": 7.22, "learning_rate": 2.7631578947368426e-05, "loss": 0.196, "step": 825 }, { "epoch": 7.26, "learning_rate": 2.7192982456140354e-05, "loss": 0.0874, "step": 830 }, { "epoch": 7.31, "learning_rate": 2.675438596491228e-05, "loss": 0.2338, "step": 835 }, { "epoch": 7.35, "learning_rate": 2.6315789473684212e-05, "loss": 0.247, "step": 840 }, { "epoch": 7.4, "learning_rate": 2.5877192982456143e-05, "loss": 0.2131, "step": 845 }, { "epoch": 7.44, "learning_rate": 2.5438596491228074e-05, "loss": 0.2523, "step": 850 }, { "epoch": 7.48, "learning_rate": 2.5e-05, "loss": 0.1632, "step": 855 }, { "epoch": 7.53, "learning_rate": 2.456140350877193e-05, "loss": 0.1257, "step": 860 }, { "epoch": 7.57, "learning_rate": 2.412280701754386e-05, "loss": 0.1589, "step": 865 }, { "epoch": 7.61, "learning_rate": 2.368421052631579e-05, "loss": 0.1895, "step": 870 }, { "epoch": 7.66, "learning_rate": 2.324561403508772e-05, "loss": 0.1422, "step": 875 }, { "epoch": 7.7, "learning_rate": 2.280701754385965e-05, "loss": 0.1701, "step": 880 }, { "epoch": 7.75, "learning_rate": 2.236842105263158e-05, "loss": 0.0639, "step": 885 }, { "epoch": 7.79, "learning_rate": 2.1929824561403507e-05, "loss": 0.2418, "step": 890 }, { "epoch": 7.83, "learning_rate": 2.149122807017544e-05, "loss": 0.0928, "step": 895 }, { "epoch": 7.88, "learning_rate": 2.105263157894737e-05, "loss": 0.1306, "step": 900 }, { "epoch": 7.92, "learning_rate": 2.06140350877193e-05, "loss": 0.1759, "step": 905 }, { "epoch": 7.96, "learning_rate": 2.0175438596491227e-05, "loss": 0.1053, "step": 910 }, { "epoch": 8.0, "eval_accuracy": 0.9909688013136289, "eval_loss": 0.025560511276125908, "eval_runtime": 51.3274, "eval_samples_per_second": 23.73, "eval_steps_per_second": 2.981, "step": 914 }, { "epoch": 8.01, "learning_rate": 1.9736842105263158e-05, "loss": 0.1086, "step": 915 }, { "epoch": 8.05, "learning_rate": 1.929824561403509e-05, "loss": 0.1473, "step": 920 }, { "epoch": 8.1, "learning_rate": 1.885964912280702e-05, "loss": 0.047, "step": 925 }, { "epoch": 8.14, "learning_rate": 1.8421052631578947e-05, "loss": 0.1855, "step": 930 }, { "epoch": 8.18, "learning_rate": 1.7982456140350878e-05, "loss": 0.1216, "step": 935 }, { "epoch": 8.23, "learning_rate": 1.7543859649122806e-05, "loss": 0.1677, "step": 940 }, { "epoch": 8.27, "learning_rate": 1.7105263157894737e-05, "loss": 0.1112, "step": 945 }, { "epoch": 8.32, "learning_rate": 1.6666666666666667e-05, "loss": 0.0748, "step": 950 }, { "epoch": 8.36, "learning_rate": 1.62280701754386e-05, "loss": 0.124, "step": 955 }, { "epoch": 8.4, "learning_rate": 1.5789473684210526e-05, "loss": 0.1319, "step": 960 }, { "epoch": 8.45, "learning_rate": 1.5350877192982457e-05, "loss": 0.1867, "step": 965 }, { "epoch": 8.49, "learning_rate": 1.4912280701754386e-05, "loss": 0.1458, "step": 970 }, { "epoch": 8.53, "learning_rate": 1.4473684210526317e-05, "loss": 0.1482, "step": 975 }, { "epoch": 8.58, "learning_rate": 1.4035087719298246e-05, "loss": 0.122, "step": 980 }, { "epoch": 8.62, "learning_rate": 1.3596491228070177e-05, "loss": 0.0777, "step": 985 }, { "epoch": 8.67, "learning_rate": 1.3157894736842106e-05, "loss": 0.1007, "step": 990 }, { "epoch": 8.71, "learning_rate": 1.2719298245614037e-05, "loss": 0.1103, "step": 995 }, { "epoch": 8.75, "learning_rate": 1.2280701754385964e-05, "loss": 0.0859, "step": 1000 }, { "epoch": 8.8, "learning_rate": 1.1842105263157895e-05, "loss": 0.0928, "step": 1005 }, { "epoch": 8.84, "learning_rate": 1.1403508771929824e-05, "loss": 0.0475, "step": 1010 }, { "epoch": 8.88, "learning_rate": 1.0964912280701754e-05, "loss": 0.0863, "step": 1015 }, { "epoch": 8.93, "learning_rate": 1.0526315789473684e-05, "loss": 0.1315, "step": 1020 }, { "epoch": 8.97, "learning_rate": 1.0087719298245614e-05, "loss": 0.2159, "step": 1025 }, { "epoch": 9.0, "eval_accuracy": 0.9958949096880131, "eval_loss": 0.014652877114713192, "eval_runtime": 51.3705, "eval_samples_per_second": 23.71, "eval_steps_per_second": 2.978, "step": 1028 }, { "epoch": 9.02, "learning_rate": 9.649122807017545e-06, "loss": 0.1224, "step": 1030 }, { "epoch": 9.06, "learning_rate": 9.210526315789474e-06, "loss": 0.0728, "step": 1035 }, { "epoch": 9.1, "learning_rate": 8.771929824561403e-06, "loss": 0.0981, "step": 1040 }, { "epoch": 9.15, "learning_rate": 8.333333333333334e-06, "loss": 0.1195, "step": 1045 }, { "epoch": 9.19, "learning_rate": 7.894736842105263e-06, "loss": 0.1001, "step": 1050 }, { "epoch": 9.23, "learning_rate": 7.456140350877193e-06, "loss": 0.0816, "step": 1055 }, { "epoch": 9.28, "learning_rate": 7.017543859649123e-06, "loss": 0.1013, "step": 1060 }, { "epoch": 9.32, "learning_rate": 6.578947368421053e-06, "loss": 0.0797, "step": 1065 }, { "epoch": 9.37, "learning_rate": 6.140350877192982e-06, "loss": 0.0894, "step": 1070 }, { "epoch": 9.41, "learning_rate": 5.701754385964912e-06, "loss": 0.0353, "step": 1075 }, { "epoch": 9.45, "learning_rate": 5.263157894736842e-06, "loss": 0.0845, "step": 1080 }, { "epoch": 9.5, "learning_rate": 4.824561403508772e-06, "loss": 0.0855, "step": 1085 }, { "epoch": 9.54, "learning_rate": 4.3859649122807014e-06, "loss": 0.0886, "step": 1090 }, { "epoch": 9.58, "learning_rate": 3.9473684210526315e-06, "loss": 0.0394, "step": 1095 }, { "epoch": 9.63, "learning_rate": 3.5087719298245615e-06, "loss": 0.0745, "step": 1100 }, { "epoch": 9.67, "learning_rate": 3.070175438596491e-06, "loss": 0.073, "step": 1105 }, { "epoch": 9.72, "learning_rate": 2.631578947368421e-06, "loss": 0.0753, "step": 1110 }, { "epoch": 9.76, "learning_rate": 2.1929824561403507e-06, "loss": 0.0744, "step": 1115 }, { "epoch": 9.8, "learning_rate": 1.7543859649122807e-06, "loss": 0.1127, "step": 1120 }, { "epoch": 9.85, "learning_rate": 1.3157894736842106e-06, "loss": 0.123, "step": 1125 }, { "epoch": 9.89, "learning_rate": 8.771929824561404e-07, "loss": 0.0915, "step": 1130 }, { "epoch": 9.93, "learning_rate": 4.385964912280702e-07, "loss": 0.0422, "step": 1135 }, { "epoch": 9.98, "learning_rate": 0.0, "loss": 0.0607, "step": 1140 }, { "epoch": 9.98, "eval_accuracy": 0.9975369458128078, "eval_loss": 0.006894771475344896, "eval_runtime": 51.4226, "eval_samples_per_second": 23.686, "eval_steps_per_second": 2.975, "step": 1140 }, { "epoch": 9.98, "step": 1140, "total_flos": 8.390377142257582e+18, "train_loss": 0.465066796070651, "train_runtime": 8763.5167, "train_samples_per_second": 4.166, "train_steps_per_second": 0.13 } ], "max_steps": 1140, "num_train_epochs": 10, "total_flos": 8.390377142257582e+18, "trial_name": null, "trial_params": null }