{ "best_metric": 1.0, "best_model_checkpoint": "swinv2-large-patch4-window12to16-192to256-22kto1k-ft-finetuned-Lesion-Classification-HAM10000-S/checkpoint-1710", "epoch": 14.967177242888402, "global_step": 1710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.9239766081871344e-07, "loss": 2.0763, "step": 5 }, { "epoch": 0.09, "learning_rate": 5.847953216374269e-07, "loss": 2.0144, "step": 10 }, { "epoch": 0.13, "learning_rate": 8.771929824561404e-07, "loss": 2.0062, "step": 15 }, { "epoch": 0.18, "learning_rate": 1.1695906432748538e-06, "loss": 1.9886, "step": 20 }, { "epoch": 0.22, "learning_rate": 1.4619883040935671e-06, "loss": 1.9744, "step": 25 }, { "epoch": 0.26, "learning_rate": 1.7543859649122807e-06, "loss": 1.9513, "step": 30 }, { "epoch": 0.31, "learning_rate": 2.0467836257309943e-06, "loss": 1.9403, "step": 35 }, { "epoch": 0.35, "learning_rate": 2.3391812865497075e-06, "loss": 1.9093, "step": 40 }, { "epoch": 0.39, "learning_rate": 2.631578947368421e-06, "loss": 1.9076, "step": 45 }, { "epoch": 0.44, "learning_rate": 2.9239766081871343e-06, "loss": 1.8717, "step": 50 }, { "epoch": 0.48, "learning_rate": 3.216374269005848e-06, "loss": 1.8642, "step": 55 }, { "epoch": 0.53, "learning_rate": 3.5087719298245615e-06, "loss": 1.821, "step": 60 }, { "epoch": 0.57, "learning_rate": 3.8011695906432747e-06, "loss": 1.7219, "step": 65 }, { "epoch": 0.61, "learning_rate": 4.093567251461989e-06, "loss": 1.787, "step": 70 }, { "epoch": 0.66, "learning_rate": 4.3859649122807014e-06, "loss": 1.7157, "step": 75 }, { "epoch": 0.7, "learning_rate": 4.678362573099415e-06, "loss": 1.6594, "step": 80 }, { "epoch": 0.74, "learning_rate": 4.970760233918129e-06, "loss": 1.6312, "step": 85 }, { "epoch": 0.79, "learning_rate": 5.263157894736842e-06, "loss": 1.5143, "step": 90 }, { "epoch": 0.83, "learning_rate": 5.555555555555556e-06, "loss": 1.5347, "step": 95 }, { "epoch": 0.88, "learning_rate": 5.8479532163742686e-06, "loss": 1.4393, "step": 100 }, { "epoch": 0.92, "learning_rate": 6.140350877192982e-06, "loss": 1.3955, "step": 105 }, { "epoch": 0.96, "learning_rate": 6.432748538011696e-06, "loss": 1.3289, "step": 110 }, { "epoch": 1.0, "eval_accuracy": 0.6247947454844006, "eval_loss": 1.0632905960083008, "eval_runtime": 838.3049, "eval_samples_per_second": 1.453, "eval_steps_per_second": 0.183, "step": 114 }, { "epoch": 1.01, "learning_rate": 6.725146198830409e-06, "loss": 1.2315, "step": 115 }, { "epoch": 1.05, "learning_rate": 7.017543859649123e-06, "loss": 1.1319, "step": 120 }, { "epoch": 1.09, "learning_rate": 7.3099415204678366e-06, "loss": 1.2158, "step": 125 }, { "epoch": 1.14, "learning_rate": 7.602339181286549e-06, "loss": 1.1509, "step": 130 }, { "epoch": 1.18, "learning_rate": 7.894736842105263e-06, "loss": 1.0971, "step": 135 }, { "epoch": 1.23, "learning_rate": 8.187134502923977e-06, "loss": 1.1285, "step": 140 }, { "epoch": 1.27, "learning_rate": 8.47953216374269e-06, "loss": 0.9948, "step": 145 }, { "epoch": 1.31, "learning_rate": 8.771929824561403e-06, "loss": 0.9737, "step": 150 }, { "epoch": 1.36, "learning_rate": 9.064327485380117e-06, "loss": 1.0149, "step": 155 }, { "epoch": 1.4, "learning_rate": 9.35672514619883e-06, "loss": 0.9684, "step": 160 }, { "epoch": 1.44, "learning_rate": 9.649122807017545e-06, "loss": 0.9588, "step": 165 }, { "epoch": 1.49, "learning_rate": 9.941520467836257e-06, "loss": 0.7973, "step": 170 }, { "epoch": 1.53, "learning_rate": 1.023391812865497e-05, "loss": 0.8688, "step": 175 }, { "epoch": 1.58, "learning_rate": 1.0526315789473684e-05, "loss": 0.9191, "step": 180 }, { "epoch": 1.62, "learning_rate": 1.0818713450292397e-05, "loss": 0.864, "step": 185 }, { "epoch": 1.66, "learning_rate": 1.1111111111111112e-05, "loss": 0.8442, "step": 190 }, { "epoch": 1.71, "learning_rate": 1.1403508771929824e-05, "loss": 0.954, "step": 195 }, { "epoch": 1.75, "learning_rate": 1.1695906432748537e-05, "loss": 0.7894, "step": 200 }, { "epoch": 1.79, "learning_rate": 1.1988304093567252e-05, "loss": 0.7701, "step": 205 }, { "epoch": 1.84, "learning_rate": 1.2280701754385964e-05, "loss": 0.7477, "step": 210 }, { "epoch": 1.88, "learning_rate": 1.2573099415204679e-05, "loss": 0.7278, "step": 215 }, { "epoch": 1.93, "learning_rate": 1.2865497076023392e-05, "loss": 0.7302, "step": 220 }, { "epoch": 1.97, "learning_rate": 1.3157894736842106e-05, "loss": 0.7956, "step": 225 }, { "epoch": 2.0, "eval_accuracy": 0.8103448275862069, "eval_loss": 0.504953145980835, "eval_runtime": 54.2638, "eval_samples_per_second": 22.446, "eval_steps_per_second": 2.82, "step": 228 }, { "epoch": 2.01, "learning_rate": 1.3450292397660819e-05, "loss": 0.6622, "step": 230 }, { "epoch": 2.06, "learning_rate": 1.3742690058479531e-05, "loss": 0.6419, "step": 235 }, { "epoch": 2.1, "learning_rate": 1.4035087719298246e-05, "loss": 0.5583, "step": 240 }, { "epoch": 2.14, "learning_rate": 1.4327485380116959e-05, "loss": 0.6594, "step": 245 }, { "epoch": 2.19, "learning_rate": 1.4619883040935673e-05, "loss": 0.6647, "step": 250 }, { "epoch": 2.23, "learning_rate": 1.4912280701754386e-05, "loss": 0.5958, "step": 255 }, { "epoch": 2.28, "learning_rate": 1.5204678362573099e-05, "loss": 0.6235, "step": 260 }, { "epoch": 2.32, "learning_rate": 1.5497076023391813e-05, "loss": 0.5793, "step": 265 }, { "epoch": 2.36, "learning_rate": 1.5789473684210526e-05, "loss": 0.5209, "step": 270 }, { "epoch": 2.41, "learning_rate": 1.608187134502924e-05, "loss": 0.6693, "step": 275 }, { "epoch": 2.45, "learning_rate": 1.6374269005847955e-05, "loss": 0.6411, "step": 280 }, { "epoch": 2.49, "learning_rate": 1.6666666666666667e-05, "loss": 0.5181, "step": 285 }, { "epoch": 2.54, "learning_rate": 1.695906432748538e-05, "loss": 0.6182, "step": 290 }, { "epoch": 2.58, "learning_rate": 1.7251461988304093e-05, "loss": 0.5462, "step": 295 }, { "epoch": 2.63, "learning_rate": 1.7543859649122806e-05, "loss": 0.6361, "step": 300 }, { "epoch": 2.67, "learning_rate": 1.7836257309941522e-05, "loss": 0.548, "step": 305 }, { "epoch": 2.71, "learning_rate": 1.8128654970760235e-05, "loss": 0.5446, "step": 310 }, { "epoch": 2.76, "learning_rate": 1.8421052631578947e-05, "loss": 0.6898, "step": 315 }, { "epoch": 2.8, "learning_rate": 1.871345029239766e-05, "loss": 0.6384, "step": 320 }, { "epoch": 2.84, "learning_rate": 1.9005847953216373e-05, "loss": 0.5624, "step": 325 }, { "epoch": 2.89, "learning_rate": 1.929824561403509e-05, "loss": 0.7614, "step": 330 }, { "epoch": 2.93, "learning_rate": 1.9590643274853802e-05, "loss": 0.6112, "step": 335 }, { "epoch": 2.98, "learning_rate": 1.9883040935672515e-05, "loss": 0.5253, "step": 340 }, { "epoch": 2.99, "eval_accuracy": 0.90311986863711, "eval_loss": 0.30125120282173157, "eval_runtime": 54.2496, "eval_samples_per_second": 22.452, "eval_steps_per_second": 2.82, "step": 342 }, { "epoch": 3.02, "learning_rate": 2.0175438596491227e-05, "loss": 0.441, "step": 345 }, { "epoch": 3.06, "learning_rate": 2.046783625730994e-05, "loss": 0.5173, "step": 350 }, { "epoch": 3.11, "learning_rate": 2.0760233918128656e-05, "loss": 0.5399, "step": 355 }, { "epoch": 3.15, "learning_rate": 2.105263157894737e-05, "loss": 0.4442, "step": 360 }, { "epoch": 3.19, "learning_rate": 2.134502923976608e-05, "loss": 0.5029, "step": 365 }, { "epoch": 3.24, "learning_rate": 2.1637426900584794e-05, "loss": 0.3489, "step": 370 }, { "epoch": 3.28, "learning_rate": 2.1929824561403507e-05, "loss": 0.4367, "step": 375 }, { "epoch": 3.33, "learning_rate": 2.2222222222222223e-05, "loss": 0.3868, "step": 380 }, { "epoch": 3.37, "learning_rate": 2.2514619883040936e-05, "loss": 0.479, "step": 385 }, { "epoch": 3.41, "learning_rate": 2.280701754385965e-05, "loss": 0.4852, "step": 390 }, { "epoch": 3.46, "learning_rate": 2.309941520467836e-05, "loss": 0.4858, "step": 395 }, { "epoch": 3.5, "learning_rate": 2.3391812865497074e-05, "loss": 0.4333, "step": 400 }, { "epoch": 3.54, "learning_rate": 2.368421052631579e-05, "loss": 0.5477, "step": 405 }, { "epoch": 3.59, "learning_rate": 2.3976608187134503e-05, "loss": 0.4052, "step": 410 }, { "epoch": 3.63, "learning_rate": 2.4269005847953216e-05, "loss": 0.4993, "step": 415 }, { "epoch": 3.68, "learning_rate": 2.456140350877193e-05, "loss": 0.3349, "step": 420 }, { "epoch": 3.72, "learning_rate": 2.485380116959064e-05, "loss": 0.3309, "step": 425 }, { "epoch": 3.76, "learning_rate": 2.5146198830409358e-05, "loss": 0.4864, "step": 430 }, { "epoch": 3.81, "learning_rate": 2.5438596491228074e-05, "loss": 0.4098, "step": 435 }, { "epoch": 3.85, "learning_rate": 2.5730994152046783e-05, "loss": 0.3193, "step": 440 }, { "epoch": 3.89, "learning_rate": 2.60233918128655e-05, "loss": 0.57, "step": 445 }, { "epoch": 3.94, "learning_rate": 2.6315789473684212e-05, "loss": 0.4372, "step": 450 }, { "epoch": 3.98, "learning_rate": 2.6608187134502928e-05, "loss": 0.2958, "step": 455 }, { "epoch": 4.0, "eval_accuracy": 0.9523809523809523, "eval_loss": 0.1533823311328888, "eval_runtime": 54.2646, "eval_samples_per_second": 22.446, "eval_steps_per_second": 2.82, "step": 457 }, { "epoch": 4.03, "learning_rate": 2.6900584795321637e-05, "loss": 0.3616, "step": 460 }, { "epoch": 4.07, "learning_rate": 2.7192982456140354e-05, "loss": 0.2978, "step": 465 }, { "epoch": 4.11, "learning_rate": 2.7485380116959063e-05, "loss": 0.3206, "step": 470 }, { "epoch": 4.16, "learning_rate": 2.777777777777778e-05, "loss": 0.3085, "step": 475 }, { "epoch": 4.2, "learning_rate": 2.8070175438596492e-05, "loss": 0.3722, "step": 480 }, { "epoch": 4.25, "learning_rate": 2.8362573099415208e-05, "loss": 0.3702, "step": 485 }, { "epoch": 4.29, "learning_rate": 2.8654970760233917e-05, "loss": 0.3533, "step": 490 }, { "epoch": 4.33, "learning_rate": 2.8947368421052634e-05, "loss": 0.2361, "step": 495 }, { "epoch": 4.38, "learning_rate": 2.9239766081871346e-05, "loss": 0.2779, "step": 500 }, { "epoch": 4.42, "learning_rate": 2.9532163742690062e-05, "loss": 0.3753, "step": 505 }, { "epoch": 4.46, "learning_rate": 2.9824561403508772e-05, "loss": 0.3958, "step": 510 }, { "epoch": 4.51, "learning_rate": 3.0116959064327488e-05, "loss": 0.2555, "step": 515 }, { "epoch": 4.55, "learning_rate": 3.0409356725146197e-05, "loss": 0.2769, "step": 520 }, { "epoch": 4.6, "learning_rate": 3.0701754385964913e-05, "loss": 0.3126, "step": 525 }, { "epoch": 4.64, "learning_rate": 3.0994152046783626e-05, "loss": 0.3055, "step": 530 }, { "epoch": 4.68, "learning_rate": 3.128654970760234e-05, "loss": 0.3042, "step": 535 }, { "epoch": 4.73, "learning_rate": 3.157894736842105e-05, "loss": 0.4877, "step": 540 }, { "epoch": 4.77, "learning_rate": 3.187134502923977e-05, "loss": 0.2869, "step": 545 }, { "epoch": 4.81, "learning_rate": 3.216374269005848e-05, "loss": 0.2767, "step": 550 }, { "epoch": 4.86, "learning_rate": 3.24561403508772e-05, "loss": 0.3141, "step": 555 }, { "epoch": 4.9, "learning_rate": 3.274853801169591e-05, "loss": 0.3657, "step": 560 }, { "epoch": 4.95, "learning_rate": 3.304093567251462e-05, "loss": 0.2905, "step": 565 }, { "epoch": 4.99, "learning_rate": 3.3333333333333335e-05, "loss": 0.276, "step": 570 }, { "epoch": 5.0, "eval_accuracy": 0.9334975369458128, "eval_loss": 0.1825261414051056, "eval_runtime": 54.1455, "eval_samples_per_second": 22.495, "eval_steps_per_second": 2.826, "step": 571 }, { "epoch": 5.03, "learning_rate": 3.362573099415205e-05, "loss": 0.2115, "step": 575 }, { "epoch": 5.08, "learning_rate": 3.391812865497076e-05, "loss": 0.34, "step": 580 }, { "epoch": 5.12, "learning_rate": 3.421052631578947e-05, "loss": 0.3145, "step": 585 }, { "epoch": 5.16, "learning_rate": 3.4502923976608186e-05, "loss": 0.2357, "step": 590 }, { "epoch": 5.21, "learning_rate": 3.4795321637426905e-05, "loss": 0.1658, "step": 595 }, { "epoch": 5.25, "learning_rate": 3.508771929824561e-05, "loss": 0.2002, "step": 600 }, { "epoch": 5.3, "learning_rate": 3.538011695906433e-05, "loss": 0.2387, "step": 605 }, { "epoch": 5.34, "learning_rate": 3.5672514619883044e-05, "loss": 0.2309, "step": 610 }, { "epoch": 5.38, "learning_rate": 3.5964912280701756e-05, "loss": 0.2013, "step": 615 }, { "epoch": 5.43, "learning_rate": 3.625730994152047e-05, "loss": 0.2333, "step": 620 }, { "epoch": 5.47, "learning_rate": 3.654970760233918e-05, "loss": 0.4043, "step": 625 }, { "epoch": 5.51, "learning_rate": 3.6842105263157895e-05, "loss": 0.2156, "step": 630 }, { "epoch": 5.56, "learning_rate": 3.713450292397661e-05, "loss": 0.3377, "step": 635 }, { "epoch": 5.6, "learning_rate": 3.742690058479532e-05, "loss": 0.2842, "step": 640 }, { "epoch": 5.65, "learning_rate": 3.771929824561404e-05, "loss": 0.1925, "step": 645 }, { "epoch": 5.69, "learning_rate": 3.8011695906432746e-05, "loss": 0.347, "step": 650 }, { "epoch": 5.73, "learning_rate": 3.8304093567251465e-05, "loss": 0.2794, "step": 655 }, { "epoch": 5.78, "learning_rate": 3.859649122807018e-05, "loss": 0.244, "step": 660 }, { "epoch": 5.82, "learning_rate": 3.888888888888889e-05, "loss": 0.3321, "step": 665 }, { "epoch": 5.86, "learning_rate": 3.9181286549707604e-05, "loss": 0.2099, "step": 670 }, { "epoch": 5.91, "learning_rate": 3.9473684210526316e-05, "loss": 0.2861, "step": 675 }, { "epoch": 5.95, "learning_rate": 3.976608187134503e-05, "loss": 0.2936, "step": 680 }, { "epoch": 6.0, "learning_rate": 4.005847953216375e-05, "loss": 0.2556, "step": 685 }, { "epoch": 6.0, "eval_accuracy": 0.9729064039408867, "eval_loss": 0.07233729213476181, "eval_runtime": 54.3781, "eval_samples_per_second": 22.399, "eval_steps_per_second": 2.814, "step": 685 }, { "epoch": 6.04, "learning_rate": 4.0350877192982455e-05, "loss": 0.2191, "step": 690 }, { "epoch": 6.08, "learning_rate": 4.0643274853801174e-05, "loss": 0.2091, "step": 695 }, { "epoch": 6.13, "learning_rate": 4.093567251461988e-05, "loss": 0.3105, "step": 700 }, { "epoch": 6.17, "learning_rate": 4.12280701754386e-05, "loss": 0.1681, "step": 705 }, { "epoch": 6.21, "learning_rate": 4.152046783625731e-05, "loss": 0.2344, "step": 710 }, { "epoch": 6.26, "learning_rate": 4.1812865497076025e-05, "loss": 0.3378, "step": 715 }, { "epoch": 6.3, "learning_rate": 4.210526315789474e-05, "loss": 0.3698, "step": 720 }, { "epoch": 6.35, "learning_rate": 4.239766081871345e-05, "loss": 0.2361, "step": 725 }, { "epoch": 6.39, "learning_rate": 4.269005847953216e-05, "loss": 0.4333, "step": 730 }, { "epoch": 6.43, "learning_rate": 4.298245614035088e-05, "loss": 0.3148, "step": 735 }, { "epoch": 6.48, "learning_rate": 4.327485380116959e-05, "loss": 0.3368, "step": 740 }, { "epoch": 6.52, "learning_rate": 4.356725146198831e-05, "loss": 0.2656, "step": 745 }, { "epoch": 6.56, "learning_rate": 4.3859649122807014e-05, "loss": 0.2234, "step": 750 }, { "epoch": 6.61, "learning_rate": 4.4152046783625734e-05, "loss": 0.1766, "step": 755 }, { "epoch": 6.65, "learning_rate": 4.4444444444444447e-05, "loss": 0.2741, "step": 760 }, { "epoch": 6.7, "learning_rate": 4.473684210526316e-05, "loss": 0.473, "step": 765 }, { "epoch": 6.74, "learning_rate": 4.502923976608187e-05, "loss": 0.3843, "step": 770 }, { "epoch": 6.78, "learning_rate": 4.5321637426900585e-05, "loss": 0.2639, "step": 775 }, { "epoch": 6.83, "learning_rate": 4.56140350877193e-05, "loss": 0.2179, "step": 780 }, { "epoch": 6.87, "learning_rate": 4.590643274853802e-05, "loss": 0.1746, "step": 785 }, { "epoch": 6.91, "learning_rate": 4.619883040935672e-05, "loss": 0.233, "step": 790 }, { "epoch": 6.96, "learning_rate": 4.649122807017544e-05, "loss": 0.3624, "step": 795 }, { "epoch": 6.99, "eval_accuracy": 0.9482758620689655, "eval_loss": 0.12682275474071503, "eval_runtime": 54.3592, "eval_samples_per_second": 22.407, "eval_steps_per_second": 2.815, "step": 799 }, { "epoch": 7.0, "learning_rate": 4.678362573099415e-05, "loss": 0.2835, "step": 800 }, { "epoch": 7.05, "learning_rate": 4.707602339181287e-05, "loss": 0.2004, "step": 805 }, { "epoch": 7.09, "learning_rate": 4.736842105263158e-05, "loss": 0.2604, "step": 810 }, { "epoch": 7.13, "learning_rate": 4.7660818713450294e-05, "loss": 0.2926, "step": 815 }, { "epoch": 7.18, "learning_rate": 4.7953216374269006e-05, "loss": 0.1887, "step": 820 }, { "epoch": 7.22, "learning_rate": 4.824561403508772e-05, "loss": 0.2633, "step": 825 }, { "epoch": 7.26, "learning_rate": 4.853801169590643e-05, "loss": 0.382, "step": 830 }, { "epoch": 7.31, "learning_rate": 4.883040935672515e-05, "loss": 0.2431, "step": 835 }, { "epoch": 7.35, "learning_rate": 4.912280701754386e-05, "loss": 0.2928, "step": 840 }, { "epoch": 7.4, "learning_rate": 4.941520467836258e-05, "loss": 0.2424, "step": 845 }, { "epoch": 7.44, "learning_rate": 4.970760233918128e-05, "loss": 0.3165, "step": 850 }, { "epoch": 7.48, "learning_rate": 5e-05, "loss": 0.4459, "step": 855 }, { "epoch": 7.53, "learning_rate": 4.970760233918128e-05, "loss": 0.2964, "step": 860 }, { "epoch": 7.57, "learning_rate": 4.941520467836258e-05, "loss": 0.37, "step": 865 }, { "epoch": 7.61, "learning_rate": 4.912280701754386e-05, "loss": 0.2116, "step": 870 }, { "epoch": 7.66, "learning_rate": 4.883040935672515e-05, "loss": 0.1844, "step": 875 }, { "epoch": 7.7, "learning_rate": 4.853801169590643e-05, "loss": 0.215, "step": 880 }, { "epoch": 7.75, "learning_rate": 4.824561403508772e-05, "loss": 0.3179, "step": 885 }, { "epoch": 7.79, "learning_rate": 4.7953216374269006e-05, "loss": 0.2767, "step": 890 }, { "epoch": 7.83, "learning_rate": 4.7660818713450294e-05, "loss": 0.3065, "step": 895 }, { "epoch": 7.88, "learning_rate": 4.736842105263158e-05, "loss": 0.2597, "step": 900 }, { "epoch": 7.92, "learning_rate": 4.707602339181287e-05, "loss": 0.3216, "step": 905 }, { "epoch": 7.96, "learning_rate": 4.678362573099415e-05, "loss": 0.1986, "step": 910 }, { "epoch": 8.0, "eval_accuracy": 0.9778325123152709, "eval_loss": 0.052232660353183746, "eval_runtime": 54.3544, "eval_samples_per_second": 22.408, "eval_steps_per_second": 2.815, "step": 914 }, { "epoch": 8.01, "learning_rate": 4.649122807017544e-05, "loss": 0.197, "step": 915 }, { "epoch": 8.05, "learning_rate": 4.619883040935672e-05, "loss": 0.2028, "step": 920 }, { "epoch": 8.1, "learning_rate": 4.590643274853802e-05, "loss": 0.1589, "step": 925 }, { "epoch": 8.14, "learning_rate": 4.56140350877193e-05, "loss": 0.1238, "step": 930 }, { "epoch": 8.18, "learning_rate": 4.5321637426900585e-05, "loss": 0.1461, "step": 935 }, { "epoch": 8.23, "learning_rate": 4.502923976608187e-05, "loss": 0.146, "step": 940 }, { "epoch": 8.27, "learning_rate": 4.473684210526316e-05, "loss": 0.2098, "step": 945 }, { "epoch": 8.32, "learning_rate": 4.4444444444444447e-05, "loss": 0.1078, "step": 950 }, { "epoch": 8.36, "learning_rate": 4.4152046783625734e-05, "loss": 0.2042, "step": 955 }, { "epoch": 8.4, "learning_rate": 4.3859649122807014e-05, "loss": 0.2362, "step": 960 }, { "epoch": 8.45, "learning_rate": 4.356725146198831e-05, "loss": 0.2723, "step": 965 }, { "epoch": 8.49, "learning_rate": 4.327485380116959e-05, "loss": 0.2668, "step": 970 }, { "epoch": 8.53, "learning_rate": 4.298245614035088e-05, "loss": 0.1731, "step": 975 }, { "epoch": 8.58, "learning_rate": 4.269005847953216e-05, "loss": 0.2237, "step": 980 }, { "epoch": 8.62, "learning_rate": 4.239766081871345e-05, "loss": 0.2593, "step": 985 }, { "epoch": 8.67, "learning_rate": 4.210526315789474e-05, "loss": 0.1881, "step": 990 }, { "epoch": 8.71, "learning_rate": 4.1812865497076025e-05, "loss": 0.1066, "step": 995 }, { "epoch": 8.75, "learning_rate": 4.152046783625731e-05, "loss": 0.1722, "step": 1000 }, { "epoch": 8.8, "learning_rate": 4.12280701754386e-05, "loss": 0.2162, "step": 1005 }, { "epoch": 8.84, "learning_rate": 4.093567251461988e-05, "loss": 0.2242, "step": 1010 }, { "epoch": 8.88, "learning_rate": 4.0643274853801174e-05, "loss": 0.2441, "step": 1015 }, { "epoch": 8.93, "learning_rate": 4.0350877192982455e-05, "loss": 0.1766, "step": 1020 }, { "epoch": 8.97, "learning_rate": 4.005847953216375e-05, "loss": 0.1554, "step": 1025 }, { "epoch": 9.0, "eval_accuracy": 0.9926108374384236, "eval_loss": 0.020505670458078384, "eval_runtime": 54.405, "eval_samples_per_second": 22.388, "eval_steps_per_second": 2.812, "step": 1028 }, { "epoch": 9.02, "learning_rate": 3.976608187134503e-05, "loss": 0.0887, "step": 1030 }, { "epoch": 9.06, "learning_rate": 3.9473684210526316e-05, "loss": 0.2388, "step": 1035 }, { "epoch": 9.1, "learning_rate": 3.9181286549707604e-05, "loss": 0.2096, "step": 1040 }, { "epoch": 9.15, "learning_rate": 3.888888888888889e-05, "loss": 0.2445, "step": 1045 }, { "epoch": 9.19, "learning_rate": 3.859649122807018e-05, "loss": 0.2204, "step": 1050 }, { "epoch": 9.23, "learning_rate": 3.8304093567251465e-05, "loss": 0.1205, "step": 1055 }, { "epoch": 9.28, "learning_rate": 3.8011695906432746e-05, "loss": 0.1625, "step": 1060 }, { "epoch": 9.32, "learning_rate": 3.771929824561404e-05, "loss": 0.1143, "step": 1065 }, { "epoch": 9.37, "learning_rate": 3.742690058479532e-05, "loss": 0.1649, "step": 1070 }, { "epoch": 9.41, "learning_rate": 3.713450292397661e-05, "loss": 0.2073, "step": 1075 }, { "epoch": 9.45, "learning_rate": 3.6842105263157895e-05, "loss": 0.1361, "step": 1080 }, { "epoch": 9.5, "learning_rate": 3.654970760233918e-05, "loss": 0.1194, "step": 1085 }, { "epoch": 9.54, "learning_rate": 3.625730994152047e-05, "loss": 0.2756, "step": 1090 }, { "epoch": 9.58, "learning_rate": 3.5964912280701756e-05, "loss": 0.2033, "step": 1095 }, { "epoch": 9.63, "learning_rate": 3.5672514619883044e-05, "loss": 0.1373, "step": 1100 }, { "epoch": 9.67, "learning_rate": 3.538011695906433e-05, "loss": 0.1226, "step": 1105 }, { "epoch": 9.72, "learning_rate": 3.508771929824561e-05, "loss": 0.1087, "step": 1110 }, { "epoch": 9.76, "learning_rate": 3.4795321637426905e-05, "loss": 0.0839, "step": 1115 }, { "epoch": 9.8, "learning_rate": 3.4502923976608186e-05, "loss": 0.1183, "step": 1120 }, { "epoch": 9.85, "learning_rate": 3.421052631578947e-05, "loss": 0.2259, "step": 1125 }, { "epoch": 9.89, "learning_rate": 3.391812865497076e-05, "loss": 0.1027, "step": 1130 }, { "epoch": 9.93, "learning_rate": 3.362573099415205e-05, "loss": 0.0893, "step": 1135 }, { "epoch": 9.98, "learning_rate": 3.3333333333333335e-05, "loss": 0.1636, "step": 1140 }, { "epoch": 10.0, "eval_accuracy": 0.9950738916256158, "eval_loss": 0.019661663100123405, "eval_runtime": 54.4421, "eval_samples_per_second": 22.372, "eval_steps_per_second": 2.81, "step": 1142 }, { "epoch": 10.02, "learning_rate": 3.304093567251462e-05, "loss": 0.1459, "step": 1145 }, { "epoch": 10.07, "learning_rate": 3.274853801169591e-05, "loss": 0.1303, "step": 1150 }, { "epoch": 10.11, "learning_rate": 3.24561403508772e-05, "loss": 0.2256, "step": 1155 }, { "epoch": 10.15, "learning_rate": 3.216374269005848e-05, "loss": 0.2763, "step": 1160 }, { "epoch": 10.2, "learning_rate": 3.187134502923977e-05, "loss": 0.1605, "step": 1165 }, { "epoch": 10.24, "learning_rate": 3.157894736842105e-05, "loss": 0.14, "step": 1170 }, { "epoch": 10.28, "learning_rate": 3.128654970760234e-05, "loss": 0.1832, "step": 1175 }, { "epoch": 10.33, "learning_rate": 3.0994152046783626e-05, "loss": 0.1387, "step": 1180 }, { "epoch": 10.37, "learning_rate": 3.0701754385964913e-05, "loss": 0.1055, "step": 1185 }, { "epoch": 10.42, "learning_rate": 3.0409356725146197e-05, "loss": 0.1302, "step": 1190 }, { "epoch": 10.46, "learning_rate": 3.0116959064327488e-05, "loss": 0.0993, "step": 1195 }, { "epoch": 10.5, "learning_rate": 2.9824561403508772e-05, "loss": 0.174, "step": 1200 }, { "epoch": 10.55, "learning_rate": 2.9532163742690062e-05, "loss": 0.1348, "step": 1205 }, { "epoch": 10.59, "learning_rate": 2.9239766081871346e-05, "loss": 0.12, "step": 1210 }, { "epoch": 10.63, "learning_rate": 2.8947368421052634e-05, "loss": 0.2483, "step": 1215 }, { "epoch": 10.68, "learning_rate": 2.8654970760233917e-05, "loss": 0.0965, "step": 1220 }, { "epoch": 10.72, "learning_rate": 2.8362573099415208e-05, "loss": 0.1754, "step": 1225 }, { "epoch": 10.77, "learning_rate": 2.8070175438596492e-05, "loss": 0.1192, "step": 1230 }, { "epoch": 10.81, "learning_rate": 2.777777777777778e-05, "loss": 0.1869, "step": 1235 }, { "epoch": 10.85, "learning_rate": 2.7485380116959063e-05, "loss": 0.0549, "step": 1240 }, { "epoch": 10.9, "learning_rate": 2.7192982456140354e-05, "loss": 0.1127, "step": 1245 }, { "epoch": 10.94, "learning_rate": 2.6900584795321637e-05, "loss": 0.1294, "step": 1250 }, { "epoch": 10.98, "learning_rate": 2.6608187134502928e-05, "loss": 0.1147, "step": 1255 }, { "epoch": 10.99, "eval_accuracy": 0.9835796387520526, "eval_loss": 0.05173669755458832, "eval_runtime": 54.2024, "eval_samples_per_second": 22.471, "eval_steps_per_second": 2.823, "step": 1256 }, { "epoch": 11.03, "learning_rate": 2.6315789473684212e-05, "loss": 0.1873, "step": 1260 }, { "epoch": 11.07, "learning_rate": 2.60233918128655e-05, "loss": 0.1212, "step": 1265 }, { "epoch": 11.12, "learning_rate": 2.5730994152046783e-05, "loss": 0.0422, "step": 1270 }, { "epoch": 11.16, "learning_rate": 2.5438596491228074e-05, "loss": 0.1971, "step": 1275 }, { "epoch": 11.2, "learning_rate": 2.5146198830409358e-05, "loss": 0.0876, "step": 1280 }, { "epoch": 11.25, "learning_rate": 2.485380116959064e-05, "loss": 0.1424, "step": 1285 }, { "epoch": 11.29, "learning_rate": 2.456140350877193e-05, "loss": 0.0589, "step": 1290 }, { "epoch": 11.33, "learning_rate": 2.4269005847953216e-05, "loss": 0.0975, "step": 1295 }, { "epoch": 11.38, "learning_rate": 2.3976608187134503e-05, "loss": 0.1219, "step": 1300 }, { "epoch": 11.42, "learning_rate": 2.368421052631579e-05, "loss": 0.1359, "step": 1305 }, { "epoch": 11.47, "learning_rate": 2.3391812865497074e-05, "loss": 0.0449, "step": 1310 }, { "epoch": 11.51, "learning_rate": 2.309941520467836e-05, "loss": 0.1362, "step": 1315 }, { "epoch": 11.55, "learning_rate": 2.280701754385965e-05, "loss": 0.1156, "step": 1320 }, { "epoch": 11.6, "learning_rate": 2.2514619883040936e-05, "loss": 0.0743, "step": 1325 }, { "epoch": 11.64, "learning_rate": 2.2222222222222223e-05, "loss": 0.2625, "step": 1330 }, { "epoch": 11.68, "learning_rate": 2.1929824561403507e-05, "loss": 0.065, "step": 1335 }, { "epoch": 11.73, "learning_rate": 2.1637426900584794e-05, "loss": 0.1152, "step": 1340 }, { "epoch": 11.77, "learning_rate": 2.134502923976608e-05, "loss": 0.1055, "step": 1345 }, { "epoch": 11.82, "learning_rate": 2.105263157894737e-05, "loss": 0.0941, "step": 1350 }, { "epoch": 11.86, "learning_rate": 2.0760233918128656e-05, "loss": 0.1564, "step": 1355 }, { "epoch": 11.9, "learning_rate": 2.046783625730994e-05, "loss": 0.151, "step": 1360 }, { "epoch": 11.95, "learning_rate": 2.0175438596491227e-05, "loss": 0.0964, "step": 1365 }, { "epoch": 11.99, "learning_rate": 1.9883040935672515e-05, "loss": 0.1663, "step": 1370 }, { "epoch": 12.0, "eval_accuracy": 0.9958949096880131, "eval_loss": 0.005566800944507122, "eval_runtime": 54.1153, "eval_samples_per_second": 22.507, "eval_steps_per_second": 2.827, "step": 1371 }, { "epoch": 12.04, "learning_rate": 1.9590643274853802e-05, "loss": 0.0835, "step": 1375 }, { "epoch": 12.08, "learning_rate": 1.929824561403509e-05, "loss": 0.0809, "step": 1380 }, { "epoch": 12.12, "learning_rate": 1.9005847953216373e-05, "loss": 0.1657, "step": 1385 }, { "epoch": 12.17, "learning_rate": 1.871345029239766e-05, "loss": 0.0642, "step": 1390 }, { "epoch": 12.21, "learning_rate": 1.8421052631578947e-05, "loss": 0.1124, "step": 1395 }, { "epoch": 12.25, "learning_rate": 1.8128654970760235e-05, "loss": 0.1444, "step": 1400 }, { "epoch": 12.3, "learning_rate": 1.7836257309941522e-05, "loss": 0.0315, "step": 1405 }, { "epoch": 12.34, "learning_rate": 1.7543859649122806e-05, "loss": 0.1471, "step": 1410 }, { "epoch": 12.39, "learning_rate": 1.7251461988304093e-05, "loss": 0.091, "step": 1415 }, { "epoch": 12.43, "learning_rate": 1.695906432748538e-05, "loss": 0.167, "step": 1420 }, { "epoch": 12.47, "learning_rate": 1.6666666666666667e-05, "loss": 0.0769, "step": 1425 }, { "epoch": 12.52, "learning_rate": 1.6374269005847955e-05, "loss": 0.1187, "step": 1430 }, { "epoch": 12.56, "learning_rate": 1.608187134502924e-05, "loss": 0.1473, "step": 1435 }, { "epoch": 12.6, "learning_rate": 1.5789473684210526e-05, "loss": 0.0737, "step": 1440 }, { "epoch": 12.65, "learning_rate": 1.5497076023391813e-05, "loss": 0.0668, "step": 1445 }, { "epoch": 12.69, "learning_rate": 1.5204678362573099e-05, "loss": 0.1488, "step": 1450 }, { "epoch": 12.74, "learning_rate": 1.4912280701754386e-05, "loss": 0.0413, "step": 1455 }, { "epoch": 12.78, "learning_rate": 1.4619883040935673e-05, "loss": 0.1062, "step": 1460 }, { "epoch": 12.82, "learning_rate": 1.4327485380116959e-05, "loss": 0.1047, "step": 1465 }, { "epoch": 12.87, "learning_rate": 1.4035087719298246e-05, "loss": 0.068, "step": 1470 }, { "epoch": 12.91, "learning_rate": 1.3742690058479531e-05, "loss": 0.03, "step": 1475 }, { "epoch": 12.95, "learning_rate": 1.3450292397660819e-05, "loss": 0.0708, "step": 1480 }, { "epoch": 13.0, "learning_rate": 1.3157894736842106e-05, "loss": 0.094, "step": 1485 }, { "epoch": 13.0, "eval_accuracy": 0.9991789819376026, "eval_loss": 0.002953051822260022, "eval_runtime": 54.2489, "eval_samples_per_second": 22.452, "eval_steps_per_second": 2.82, "step": 1485 }, { "epoch": 13.04, "learning_rate": 1.2865497076023392e-05, "loss": 0.1298, "step": 1490 }, { "epoch": 13.09, "learning_rate": 1.2573099415204679e-05, "loss": 0.1349, "step": 1495 }, { "epoch": 13.13, "learning_rate": 1.2280701754385964e-05, "loss": 0.1136, "step": 1500 }, { "epoch": 13.17, "learning_rate": 1.1988304093567252e-05, "loss": 0.0972, "step": 1505 }, { "epoch": 13.22, "learning_rate": 1.1695906432748537e-05, "loss": 0.0589, "step": 1510 }, { "epoch": 13.26, "learning_rate": 1.1403508771929824e-05, "loss": 0.0569, "step": 1515 }, { "epoch": 13.3, "learning_rate": 1.1111111111111112e-05, "loss": 0.085, "step": 1520 }, { "epoch": 13.35, "learning_rate": 1.0818713450292397e-05, "loss": 0.1057, "step": 1525 }, { "epoch": 13.39, "learning_rate": 1.0526315789473684e-05, "loss": 0.0953, "step": 1530 }, { "epoch": 13.44, "learning_rate": 1.023391812865497e-05, "loss": 0.085, "step": 1535 }, { "epoch": 13.48, "learning_rate": 9.941520467836257e-06, "loss": 0.043, "step": 1540 }, { "epoch": 13.52, "learning_rate": 9.649122807017545e-06, "loss": 0.0555, "step": 1545 }, { "epoch": 13.57, "learning_rate": 9.35672514619883e-06, "loss": 0.0452, "step": 1550 }, { "epoch": 13.61, "learning_rate": 9.064327485380117e-06, "loss": 0.0489, "step": 1555 }, { "epoch": 13.65, "learning_rate": 8.771929824561403e-06, "loss": 0.1405, "step": 1560 }, { "epoch": 13.7, "learning_rate": 8.47953216374269e-06, "loss": 0.0661, "step": 1565 }, { "epoch": 13.74, "learning_rate": 8.187134502923977e-06, "loss": 0.1128, "step": 1570 }, { "epoch": 13.79, "learning_rate": 7.894736842105263e-06, "loss": 0.0803, "step": 1575 }, { "epoch": 13.83, "learning_rate": 7.602339181286549e-06, "loss": 0.0843, "step": 1580 }, { "epoch": 13.87, "learning_rate": 7.3099415204678366e-06, "loss": 0.0902, "step": 1585 }, { "epoch": 13.92, "learning_rate": 7.017543859649123e-06, "loss": 0.094, "step": 1590 }, { "epoch": 13.96, "learning_rate": 6.725146198830409e-06, "loss": 0.1308, "step": 1595 }, { "epoch": 14.0, "eval_accuracy": 0.9991789819376026, "eval_loss": 0.0011125708697363734, "eval_runtime": 54.0922, "eval_samples_per_second": 22.517, "eval_steps_per_second": 2.829, "step": 1599 }, { "epoch": 14.0, "learning_rate": 6.432748538011696e-06, "loss": 0.0785, "step": 1600 }, { "epoch": 14.05, "learning_rate": 6.140350877192982e-06, "loss": 0.079, "step": 1605 }, { "epoch": 14.09, "learning_rate": 5.8479532163742686e-06, "loss": 0.1075, "step": 1610 }, { "epoch": 14.14, "learning_rate": 5.555555555555556e-06, "loss": 0.0769, "step": 1615 }, { "epoch": 14.18, "learning_rate": 5.263157894736842e-06, "loss": 0.0705, "step": 1620 }, { "epoch": 14.22, "learning_rate": 4.970760233918129e-06, "loss": 0.0779, "step": 1625 }, { "epoch": 14.27, "learning_rate": 4.678362573099415e-06, "loss": 0.0319, "step": 1630 }, { "epoch": 14.31, "learning_rate": 4.3859649122807014e-06, "loss": 0.0495, "step": 1635 }, { "epoch": 14.35, "learning_rate": 4.093567251461989e-06, "loss": 0.0851, "step": 1640 }, { "epoch": 14.4, "learning_rate": 3.8011695906432747e-06, "loss": 0.1291, "step": 1645 }, { "epoch": 14.44, "learning_rate": 3.5087719298245615e-06, "loss": 0.0778, "step": 1650 }, { "epoch": 14.49, "learning_rate": 3.216374269005848e-06, "loss": 0.0826, "step": 1655 }, { "epoch": 14.53, "learning_rate": 2.9239766081871343e-06, "loss": 0.0836, "step": 1660 }, { "epoch": 14.57, "learning_rate": 2.631578947368421e-06, "loss": 0.0562, "step": 1665 }, { "epoch": 14.62, "learning_rate": 2.3391812865497075e-06, "loss": 0.065, "step": 1670 }, { "epoch": 14.66, "learning_rate": 2.0467836257309943e-06, "loss": 0.0644, "step": 1675 }, { "epoch": 14.7, "learning_rate": 1.7543859649122807e-06, "loss": 0.056, "step": 1680 }, { "epoch": 14.75, "learning_rate": 1.4619883040935671e-06, "loss": 0.1165, "step": 1685 }, { "epoch": 14.79, "learning_rate": 1.1695906432748538e-06, "loss": 0.0744, "step": 1690 }, { "epoch": 14.84, "learning_rate": 8.771929824561404e-07, "loss": 0.0856, "step": 1695 }, { "epoch": 14.88, "learning_rate": 5.847953216374269e-07, "loss": 0.0425, "step": 1700 }, { "epoch": 14.92, "learning_rate": 2.9239766081871344e-07, "loss": 0.0536, "step": 1705 }, { "epoch": 14.97, "learning_rate": 0.0, "loss": 0.1557, "step": 1710 }, { "epoch": 14.97, "eval_accuracy": 1.0, "eval_loss": 0.0007466585957445204, "eval_runtime": 54.1734, "eval_samples_per_second": 22.483, "eval_steps_per_second": 2.824, "step": 1710 }, { "epoch": 14.97, "step": 1710, "total_flos": 1.2584990004785971e+19, "train_loss": 0.38356898541164675, "train_runtime": 11197.9622, "train_samples_per_second": 4.891, "train_steps_per_second": 0.153 } ], "max_steps": 1710, "num_train_epochs": 15, "total_flos": 1.2584990004785971e+19, "trial_name": null, "trial_params": null }