|
{ |
|
"best_metric": 0.9894837476099426, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-icpr/checkpoint-2554", |
|
"epoch": 99.23664122137404, |
|
"eval_steps": 500, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.4616, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 1.4319, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 1.4003, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.3194, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.2456, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 1.1149, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.762906309751434, |
|
"eval_loss": 0.9589953422546387, |
|
"eval_runtime": 10.6478, |
|
"eval_samples_per_second": 98.236, |
|
"eval_steps_per_second": 6.198, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.3846153846153855e-06, |
|
"loss": 0.9474, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 0.767, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 0.5454, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.3751, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 0.3008, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 0.281, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2653, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9531548757170172, |
|
"eval_loss": 0.16476869583129883, |
|
"eval_runtime": 10.0714, |
|
"eval_samples_per_second": 103.858, |
|
"eval_steps_per_second": 6.553, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.0769230769230771e-05, |
|
"loss": 0.2232, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 0.2024, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 0.1796, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.3076923076923078e-05, |
|
"loss": 0.2004, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 0.1493, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.4615384615384617e-05, |
|
"loss": 0.1984, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.97131931166348, |
|
"eval_loss": 0.08937183022499084, |
|
"eval_runtime": 10.7983, |
|
"eval_samples_per_second": 96.867, |
|
"eval_steps_per_second": 6.112, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.1647, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.6153846153846154e-05, |
|
"loss": 0.1539, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 0.1643, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.7692307692307694e-05, |
|
"loss": 0.1078, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 0.127, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.1269, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1719, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9684512428298279, |
|
"eval_loss": 0.08631974458694458, |
|
"eval_runtime": 9.6084, |
|
"eval_samples_per_second": 108.863, |
|
"eval_steps_per_second": 6.869, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.0769230769230772e-05, |
|
"loss": 0.1383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.1538461538461542e-05, |
|
"loss": 0.1304, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.230769230769231e-05, |
|
"loss": 0.1392, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 0.1269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.384615384615385e-05, |
|
"loss": 0.1296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 0.1537, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9760994263862333, |
|
"eval_loss": 0.08103872090578079, |
|
"eval_runtime": 10.0768, |
|
"eval_samples_per_second": 103.802, |
|
"eval_steps_per_second": 6.55, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.5384615384615383e-05, |
|
"loss": 0.1774, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.6153846153846157e-05, |
|
"loss": 0.1191, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.6923076923076923e-05, |
|
"loss": 0.1717, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 0.1622, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 2.846153846153846e-05, |
|
"loss": 0.1145, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 2.9230769230769234e-05, |
|
"loss": 0.0814, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1162, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9770554493307839, |
|
"eval_loss": 0.07851049304008484, |
|
"eval_runtime": 10.7167, |
|
"eval_samples_per_second": 97.605, |
|
"eval_steps_per_second": 6.159, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.0778, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 3.153846153846154e-05, |
|
"loss": 0.0952, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 0.1505, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.307692307692308e-05, |
|
"loss": 0.1087, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 0.1069, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 0.1063, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9722753346080306, |
|
"eval_loss": 0.08351072669029236, |
|
"eval_runtime": 9.7782, |
|
"eval_samples_per_second": 106.973, |
|
"eval_steps_per_second": 6.75, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 0.0976, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 3.615384615384615e-05, |
|
"loss": 0.0788, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 0.1104, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 3.769230769230769e-05, |
|
"loss": 0.1054, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.1131, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.923076923076923e-05, |
|
"loss": 0.1597, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1392, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9760994263862333, |
|
"eval_loss": 0.06736110895872116, |
|
"eval_runtime": 9.5406, |
|
"eval_samples_per_second": 109.637, |
|
"eval_steps_per_second": 6.918, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 4.0769230769230773e-05, |
|
"loss": 0.0986, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 4.1538461538461544e-05, |
|
"loss": 0.1179, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 0.0881, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.3076923076923084e-05, |
|
"loss": 0.0884, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.384615384615385e-05, |
|
"loss": 0.0972, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.461538461538462e-05, |
|
"loss": 0.1286, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9760994263862333, |
|
"eval_loss": 0.07882168889045715, |
|
"eval_runtime": 10.4396, |
|
"eval_samples_per_second": 100.195, |
|
"eval_steps_per_second": 6.322, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.538461538461539e-05, |
|
"loss": 0.1296, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 0.0937, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 4.692307692307693e-05, |
|
"loss": 0.1032, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.76923076923077e-05, |
|
"loss": 0.1336, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 4.846153846153846e-05, |
|
"loss": 0.0927, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 0.1015, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1294, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.06581155955791473, |
|
"eval_runtime": 10.4314, |
|
"eval_samples_per_second": 100.274, |
|
"eval_steps_per_second": 6.327, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 4.991452991452992e-05, |
|
"loss": 0.1018, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 4.982905982905983e-05, |
|
"loss": 0.0903, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 4.9743589743589746e-05, |
|
"loss": 0.1085, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 4.965811965811966e-05, |
|
"loss": 0.0893, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 4.9572649572649575e-05, |
|
"loss": 0.1019, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 4.948717948717949e-05, |
|
"loss": 0.0631, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 4.94017094017094e-05, |
|
"loss": 0.0843, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.9732313575525813, |
|
"eval_loss": 0.07347707450389862, |
|
"eval_runtime": 10.5361, |
|
"eval_samples_per_second": 99.278, |
|
"eval_steps_per_second": 6.264, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 4.931623931623932e-05, |
|
"loss": 0.0675, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 0.1206, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.0543, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.905982905982906e-05, |
|
"loss": 0.0973, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 4.8974358974358975e-05, |
|
"loss": 0.1046, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.074, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9760994263862333, |
|
"eval_loss": 0.06362002342939377, |
|
"eval_runtime": 10.045, |
|
"eval_samples_per_second": 104.131, |
|
"eval_steps_per_second": 6.57, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 4.8803418803418804e-05, |
|
"loss": 0.0947, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 4.871794871794872e-05, |
|
"loss": 0.079, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 4.863247863247863e-05, |
|
"loss": 0.1109, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 4.854700854700855e-05, |
|
"loss": 0.1039, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 4.846153846153846e-05, |
|
"loss": 0.0797, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 4.8376068376068376e-05, |
|
"loss": 0.0743, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 4.829059829059829e-05, |
|
"loss": 0.0734, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9751434034416826, |
|
"eval_loss": 0.1042996272444725, |
|
"eval_runtime": 9.3639, |
|
"eval_samples_per_second": 111.706, |
|
"eval_steps_per_second": 7.048, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 4.8205128205128205e-05, |
|
"loss": 0.1188, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 4.8119658119658126e-05, |
|
"loss": 0.0659, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 4.803418803418804e-05, |
|
"loss": 0.092, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 4.7948717948717955e-05, |
|
"loss": 0.0565, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 4.786324786324787e-05, |
|
"loss": 0.0603, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.0774, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9722753346080306, |
|
"eval_loss": 0.08982475847005844, |
|
"eval_runtime": 10.6797, |
|
"eval_samples_per_second": 97.943, |
|
"eval_steps_per_second": 6.18, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 4.76923076923077e-05, |
|
"loss": 0.0848, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 4.7606837606837606e-05, |
|
"loss": 0.0684, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 4.752136752136752e-05, |
|
"loss": 0.0563, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 4.7435897435897435e-05, |
|
"loss": 0.0684, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 4.735042735042735e-05, |
|
"loss": 0.0607, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 4.7264957264957264e-05, |
|
"loss": 0.0853, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 4.717948717948718e-05, |
|
"loss": 0.068, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.07194650918245316, |
|
"eval_runtime": 10.533, |
|
"eval_samples_per_second": 99.307, |
|
"eval_steps_per_second": 6.266, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 4.709401709401709e-05, |
|
"loss": 0.0867, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.0606, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 4.692307692307693e-05, |
|
"loss": 0.0921, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 4.683760683760684e-05, |
|
"loss": 0.1273, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 4.675213675213676e-05, |
|
"loss": 0.0708, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.0821, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9741873804971319, |
|
"eval_loss": 0.09563681483268738, |
|
"eval_runtime": 9.5529, |
|
"eval_samples_per_second": 109.495, |
|
"eval_steps_per_second": 6.909, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 4.6581196581196586e-05, |
|
"loss": 0.1147, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 4.64957264957265e-05, |
|
"loss": 0.1188, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 4.6410256410256415e-05, |
|
"loss": 0.0562, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 4.632478632478633e-05, |
|
"loss": 0.0575, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 4.6239316239316244e-05, |
|
"loss": 0.0674, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 0.0855, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.6068376068376066e-05, |
|
"loss": 0.0576, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.97131931166348, |
|
"eval_loss": 0.07245714962482452, |
|
"eval_runtime": 9.8421, |
|
"eval_samples_per_second": 106.279, |
|
"eval_steps_per_second": 6.706, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 4.598290598290598e-05, |
|
"loss": 0.089, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 4.5897435897435895e-05, |
|
"loss": 0.0607, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 4.581196581196581e-05, |
|
"loss": 0.0875, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 4.572649572649573e-05, |
|
"loss": 0.1147, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 4.5641025641025645e-05, |
|
"loss": 0.0751, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.0652, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9751434034416826, |
|
"eval_loss": 0.09574375301599503, |
|
"eval_runtime": 10.6204, |
|
"eval_samples_per_second": 98.489, |
|
"eval_steps_per_second": 6.214, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 4.5470085470085474e-05, |
|
"loss": 0.0591, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 4.538461538461539e-05, |
|
"loss": 0.0642, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 4.52991452991453e-05, |
|
"loss": 0.0819, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"learning_rate": 4.521367521367522e-05, |
|
"loss": 0.0766, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 4.512820512820513e-05, |
|
"loss": 0.0906, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"learning_rate": 4.5042735042735046e-05, |
|
"loss": 0.0694, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 4.495726495726496e-05, |
|
"loss": 0.0712, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.08092983067035675, |
|
"eval_runtime": 10.1615, |
|
"eval_samples_per_second": 102.937, |
|
"eval_steps_per_second": 6.495, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.056, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 4.478632478632479e-05, |
|
"loss": 0.0751, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 4.47008547008547e-05, |
|
"loss": 0.0592, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 4.461538461538462e-05, |
|
"loss": 0.0627, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 4.452991452991453e-05, |
|
"loss": 0.0692, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0801, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.435897435897436e-05, |
|
"loss": 0.075, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9674952198852772, |
|
"eval_loss": 0.1283179670572281, |
|
"eval_runtime": 9.22, |
|
"eval_samples_per_second": 113.449, |
|
"eval_steps_per_second": 7.158, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"learning_rate": 4.4273504273504275e-05, |
|
"loss": 0.0597, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"learning_rate": 4.418803418803419e-05, |
|
"loss": 0.0835, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 20.46, |
|
"learning_rate": 4.4102564102564104e-05, |
|
"loss": 0.0788, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 4.401709401709402e-05, |
|
"loss": 0.0612, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"learning_rate": 4.393162393162393e-05, |
|
"loss": 0.0565, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 4.384615384615385e-05, |
|
"loss": 0.0988, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9741873804971319, |
|
"eval_loss": 0.09660971164703369, |
|
"eval_runtime": 10.5136, |
|
"eval_samples_per_second": 99.49, |
|
"eval_steps_per_second": 6.278, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"learning_rate": 4.376068376068376e-05, |
|
"loss": 0.0496, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 4.3675213675213676e-05, |
|
"loss": 0.0651, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 4.358974358974359e-05, |
|
"loss": 0.0545, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"learning_rate": 4.3504273504273505e-05, |
|
"loss": 0.0774, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 4.341880341880342e-05, |
|
"loss": 0.0632, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.0754, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"learning_rate": 4.324786324786325e-05, |
|
"loss": 0.0538, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9760994263862333, |
|
"eval_loss": 0.11253877729177475, |
|
"eval_runtime": 10.5371, |
|
"eval_samples_per_second": 99.269, |
|
"eval_steps_per_second": 6.264, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 22.14, |
|
"learning_rate": 4.316239316239317e-05, |
|
"loss": 0.0742, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 4.3076923076923084e-05, |
|
"loss": 0.0734, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 4.2991452991453e-05, |
|
"loss": 0.0792, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"learning_rate": 4.2905982905982906e-05, |
|
"loss": 0.078, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 4.282051282051282e-05, |
|
"loss": 0.0802, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.0578, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.06480014324188232, |
|
"eval_runtime": 9.3635, |
|
"eval_samples_per_second": 111.71, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 23.05, |
|
"learning_rate": 4.264957264957265e-05, |
|
"loss": 0.0634, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 23.21, |
|
"learning_rate": 4.2564102564102564e-05, |
|
"loss": 0.0717, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 4.247863247863248e-05, |
|
"loss": 0.0661, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"learning_rate": 4.239316239316239e-05, |
|
"loss": 0.0679, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 23.66, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 0.0646, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.0605, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 4.2136752136752136e-05, |
|
"loss": 0.0675, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.09923950582742691, |
|
"eval_runtime": 10.6598, |
|
"eval_samples_per_second": 98.126, |
|
"eval_steps_per_second": 6.192, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"learning_rate": 4.205128205128206e-05, |
|
"loss": 0.0458, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"learning_rate": 4.196581196581197e-05, |
|
"loss": 0.0425, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 4.1880341880341886e-05, |
|
"loss": 0.0637, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 4.17948717948718e-05, |
|
"loss": 0.0593, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 24.73, |
|
"learning_rate": 4.1709401709401715e-05, |
|
"loss": 0.0567, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 4.162393162393163e-05, |
|
"loss": 0.0611, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.06819227337837219, |
|
"eval_runtime": 10.2586, |
|
"eval_samples_per_second": 101.963, |
|
"eval_steps_per_second": 6.434, |
|
"step": 1637 |
|
}, |
|
{ |
|
"epoch": 25.04, |
|
"learning_rate": 4.1538461538461544e-05, |
|
"loss": 0.0694, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 4.145299145299146e-05, |
|
"loss": 0.0484, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 25.34, |
|
"learning_rate": 4.1367521367521366e-05, |
|
"loss": 0.0558, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"learning_rate": 4.128205128205128e-05, |
|
"loss": 0.0498, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 25.65, |
|
"learning_rate": 4.1196581196581195e-05, |
|
"loss": 0.0521, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.0939, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 4.1025641025641023e-05, |
|
"loss": 0.0434, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.0718933716416359, |
|
"eval_runtime": 12.7961, |
|
"eval_samples_per_second": 81.744, |
|
"eval_steps_per_second": 5.158, |
|
"step": 1703 |
|
}, |
|
{ |
|
"epoch": 26.11, |
|
"learning_rate": 4.094017094017094e-05, |
|
"loss": 0.0384, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 26.26, |
|
"learning_rate": 4.085470085470086e-05, |
|
"loss": 0.0533, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 4.0769230769230773e-05, |
|
"loss": 0.0483, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"learning_rate": 4.068376068376069e-05, |
|
"loss": 0.0627, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.071, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"learning_rate": 4.051282051282052e-05, |
|
"loss": 0.0339, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.9780114722753346, |
|
"eval_loss": 0.09302157908678055, |
|
"eval_runtime": 10.3175, |
|
"eval_samples_per_second": 101.381, |
|
"eval_steps_per_second": 6.397, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 4.042735042735043e-05, |
|
"loss": 0.0597, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"learning_rate": 4.0341880341880346e-05, |
|
"loss": 0.0443, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 4.025641025641026e-05, |
|
"loss": 0.0623, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 27.48, |
|
"learning_rate": 4.0170940170940174e-05, |
|
"loss": 0.0757, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 27.63, |
|
"learning_rate": 4.008547008547009e-05, |
|
"loss": 0.0294, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"learning_rate": 4e-05, |
|
"loss": 0.063, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 27.94, |
|
"learning_rate": 3.991452991452992e-05, |
|
"loss": 0.0346, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.0902632623910904, |
|
"eval_runtime": 9.3456, |
|
"eval_samples_per_second": 111.924, |
|
"eval_steps_per_second": 7.062, |
|
"step": 1834 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 3.9829059829059825e-05, |
|
"loss": 0.047, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 3.974358974358974e-05, |
|
"loss": 0.0743, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 3.965811965811966e-05, |
|
"loss": 0.0471, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 28.55, |
|
"learning_rate": 3.9572649572649575e-05, |
|
"loss": 0.0453, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 3.948717948717949e-05, |
|
"loss": 0.0784, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 3.9401709401709404e-05, |
|
"loss": 0.0806, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.09031251072883606, |
|
"eval_runtime": 10.3537, |
|
"eval_samples_per_second": 101.026, |
|
"eval_steps_per_second": 6.375, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 3.931623931623932e-05, |
|
"loss": 0.0711, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 3.923076923076923e-05, |
|
"loss": 0.0381, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 29.31, |
|
"learning_rate": 3.914529914529915e-05, |
|
"loss": 0.0437, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"learning_rate": 3.905982905982906e-05, |
|
"loss": 0.0597, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"learning_rate": 3.8974358974358976e-05, |
|
"loss": 0.0347, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.0482, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 29.92, |
|
"learning_rate": 3.8803418803418805e-05, |
|
"loss": 0.0518, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.09824305772781372, |
|
"eval_runtime": 10.4776, |
|
"eval_samples_per_second": 99.832, |
|
"eval_steps_per_second": 6.299, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 30.08, |
|
"learning_rate": 3.871794871794872e-05, |
|
"loss": 0.0362, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 30.23, |
|
"learning_rate": 3.8632478632478634e-05, |
|
"loss": 0.0363, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 30.38, |
|
"learning_rate": 3.854700854700855e-05, |
|
"loss": 0.0474, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.0475, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.69, |
|
"learning_rate": 3.837606837606838e-05, |
|
"loss": 0.0605, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 30.84, |
|
"learning_rate": 3.82905982905983e-05, |
|
"loss": 0.0635, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"learning_rate": 3.8205128205128206e-05, |
|
"loss": 0.0407, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.07017896324396133, |
|
"eval_runtime": 9.0926, |
|
"eval_samples_per_second": 115.038, |
|
"eval_steps_per_second": 7.259, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"learning_rate": 3.811965811965812e-05, |
|
"loss": 0.0449, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 3.8034188034188035e-05, |
|
"loss": 0.0321, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"learning_rate": 3.794871794871795e-05, |
|
"loss": 0.0595, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 3.7863247863247864e-05, |
|
"loss": 0.0452, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 31.76, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.0822, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"learning_rate": 3.769230769230769e-05, |
|
"loss": 0.0528, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9760994263862333, |
|
"eval_loss": 0.08967561274766922, |
|
"eval_runtime": 10.4871, |
|
"eval_samples_per_second": 99.741, |
|
"eval_steps_per_second": 6.293, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 32.06, |
|
"learning_rate": 3.760683760683761e-05, |
|
"loss": 0.0808, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 3.752136752136752e-05, |
|
"loss": 0.0616, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 32.37, |
|
"learning_rate": 3.7435897435897436e-05, |
|
"loss": 0.0581, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"learning_rate": 3.735042735042735e-05, |
|
"loss": 0.0522, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 32.67, |
|
"learning_rate": 3.7264957264957265e-05, |
|
"loss": 0.0624, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 32.82, |
|
"learning_rate": 3.717948717948718e-05, |
|
"loss": 0.0542, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 32.98, |
|
"learning_rate": 3.70940170940171e-05, |
|
"loss": 0.0774, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.06260856240987778, |
|
"eval_runtime": 10.4508, |
|
"eval_samples_per_second": 100.088, |
|
"eval_steps_per_second": 6.315, |
|
"step": 2161 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"learning_rate": 3.7008547008547015e-05, |
|
"loss": 0.0516, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 33.28, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 0.0368, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 3.6837606837606844e-05, |
|
"loss": 0.0509, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 33.59, |
|
"learning_rate": 3.675213675213676e-05, |
|
"loss": 0.0784, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 33.74, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0732, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 33.89, |
|
"learning_rate": 3.658119658119658e-05, |
|
"loss": 0.053, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.05762642249464989, |
|
"eval_runtime": 9.1985, |
|
"eval_samples_per_second": 113.714, |
|
"eval_steps_per_second": 7.175, |
|
"step": 2227 |
|
}, |
|
{ |
|
"epoch": 34.05, |
|
"learning_rate": 3.6495726495726495e-05, |
|
"loss": 0.0403, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"learning_rate": 3.641025641025641e-05, |
|
"loss": 0.0429, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 34.35, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.0629, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 34.5, |
|
"learning_rate": 3.623931623931624e-05, |
|
"loss": 0.0538, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 34.66, |
|
"learning_rate": 3.615384615384615e-05, |
|
"loss": 0.0995, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 34.81, |
|
"learning_rate": 3.606837606837607e-05, |
|
"loss": 0.0479, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"learning_rate": 3.598290598290598e-05, |
|
"loss": 0.0512, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.9847036328871893, |
|
"eval_loss": 0.07072658836841583, |
|
"eval_runtime": 10.4603, |
|
"eval_samples_per_second": 99.997, |
|
"eval_steps_per_second": 6.31, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 3.58974358974359e-05, |
|
"loss": 0.0408, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 35.27, |
|
"learning_rate": 3.581196581196582e-05, |
|
"loss": 0.0599, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 35.42, |
|
"learning_rate": 3.572649572649573e-05, |
|
"loss": 0.0467, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"learning_rate": 3.5641025641025646e-05, |
|
"loss": 0.0415, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.0507, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 35.88, |
|
"learning_rate": 3.5470085470085474e-05, |
|
"loss": 0.0388, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.10397801548242569, |
|
"eval_runtime": 9.1827, |
|
"eval_samples_per_second": 113.91, |
|
"eval_steps_per_second": 7.187, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 36.03, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 0.044, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 36.18, |
|
"learning_rate": 3.52991452991453e-05, |
|
"loss": 0.0477, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 36.34, |
|
"learning_rate": 3.521367521367522e-05, |
|
"loss": 0.0627, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 36.49, |
|
"learning_rate": 3.5128205128205125e-05, |
|
"loss": 0.0486, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 3.504273504273504e-05, |
|
"loss": 0.0513, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 3.4957264957264954e-05, |
|
"loss": 0.0507, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"learning_rate": 3.487179487179487e-05, |
|
"loss": 0.06, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.08400359004735947, |
|
"eval_runtime": 10.3767, |
|
"eval_samples_per_second": 100.803, |
|
"eval_steps_per_second": 6.36, |
|
"step": 2423 |
|
}, |
|
{ |
|
"epoch": 37.1, |
|
"learning_rate": 3.478632478632479e-05, |
|
"loss": 0.0414, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"learning_rate": 3.4700854700854704e-05, |
|
"loss": 0.0514, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 0.0591, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 37.56, |
|
"learning_rate": 3.452991452991453e-05, |
|
"loss": 0.0562, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 37.71, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.0224, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 37.86, |
|
"learning_rate": 3.435897435897436e-05, |
|
"loss": 0.0477, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.98565965583174, |
|
"eval_loss": 0.06589007377624512, |
|
"eval_runtime": 10.3493, |
|
"eval_samples_per_second": 101.07, |
|
"eval_steps_per_second": 6.377, |
|
"step": 2489 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 3.4273504273504276e-05, |
|
"loss": 0.0414, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 38.17, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.0531, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 3.4102564102564105e-05, |
|
"loss": 0.0407, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 38.47, |
|
"learning_rate": 3.401709401709402e-05, |
|
"loss": 0.0605, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 38.63, |
|
"learning_rate": 3.3931623931623934e-05, |
|
"loss": 0.0504, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 38.78, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 0.0273, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 3.376068376068376e-05, |
|
"loss": 0.0482, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.9894837476099426, |
|
"eval_loss": 0.04790885001420975, |
|
"eval_runtime": 9.1599, |
|
"eval_samples_per_second": 114.194, |
|
"eval_steps_per_second": 7.205, |
|
"step": 2554 |
|
}, |
|
{ |
|
"epoch": 39.08, |
|
"learning_rate": 3.367521367521368e-05, |
|
"loss": 0.0457, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"learning_rate": 3.358974358974359e-05, |
|
"loss": 0.0483, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 39.39, |
|
"learning_rate": 3.3504273504273506e-05, |
|
"loss": 0.0345, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 39.54, |
|
"learning_rate": 3.341880341880342e-05, |
|
"loss": 0.0418, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 39.69, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0738, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 39.85, |
|
"learning_rate": 3.324786324786325e-05, |
|
"loss": 0.0781, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.3162393162393164e-05, |
|
"loss": 0.0292, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.06987924128770828, |
|
"eval_runtime": 10.3998, |
|
"eval_samples_per_second": 100.579, |
|
"eval_steps_per_second": 6.346, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 40.15, |
|
"learning_rate": 3.307692307692308e-05, |
|
"loss": 0.0377, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 40.31, |
|
"learning_rate": 3.299145299145299e-05, |
|
"loss": 0.0298, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 40.46, |
|
"learning_rate": 3.290598290598291e-05, |
|
"loss": 0.0221, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 40.61, |
|
"learning_rate": 3.282051282051282e-05, |
|
"loss": 0.0246, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 40.76, |
|
"learning_rate": 3.2735042735042736e-05, |
|
"loss": 0.0517, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 40.92, |
|
"learning_rate": 3.264957264957265e-05, |
|
"loss": 0.0386, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.10300048440694809, |
|
"eval_runtime": 10.0411, |
|
"eval_samples_per_second": 104.171, |
|
"eval_steps_per_second": 6.573, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 3.2564102564102565e-05, |
|
"loss": 0.0775, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 41.22, |
|
"learning_rate": 3.247863247863248e-05, |
|
"loss": 0.0777, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 41.37, |
|
"learning_rate": 3.2393162393162394e-05, |
|
"loss": 0.0516, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 41.53, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 0.0373, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 41.68, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.0505, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 41.83, |
|
"learning_rate": 3.2136752136752144e-05, |
|
"loss": 0.0494, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 41.98, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.0441, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.08014865964651108, |
|
"eval_runtime": 9.3785, |
|
"eval_samples_per_second": 111.532, |
|
"eval_steps_per_second": 7.037, |
|
"step": 2751 |
|
}, |
|
{ |
|
"epoch": 42.14, |
|
"learning_rate": 3.1965811965811966e-05, |
|
"loss": 0.0428, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 3.188034188034188e-05, |
|
"loss": 0.044, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"learning_rate": 3.1794871794871795e-05, |
|
"loss": 0.0535, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 3.170940170940171e-05, |
|
"loss": 0.0467, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 3.162393162393162e-05, |
|
"loss": 0.0393, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 42.9, |
|
"learning_rate": 3.153846153846154e-05, |
|
"loss": 0.0269, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.10365654528141022, |
|
"eval_runtime": 10.553, |
|
"eval_samples_per_second": 99.119, |
|
"eval_steps_per_second": 6.254, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 43.05, |
|
"learning_rate": 3.145299145299145e-05, |
|
"loss": 0.048, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 43.21, |
|
"learning_rate": 3.136752136752137e-05, |
|
"loss": 0.0646, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 3.128205128205128e-05, |
|
"loss": 0.0448, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 43.51, |
|
"learning_rate": 3.1196581196581195e-05, |
|
"loss": 0.047, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 43.66, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.0191, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 43.82, |
|
"learning_rate": 3.102564102564103e-05, |
|
"loss": 0.0452, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 43.97, |
|
"learning_rate": 3.0940170940170946e-05, |
|
"loss": 0.0385, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.086981400847435, |
|
"eval_runtime": 10.1033, |
|
"eval_samples_per_second": 103.53, |
|
"eval_steps_per_second": 6.533, |
|
"step": 2882 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"learning_rate": 3.085470085470086e-05, |
|
"loss": 0.0587, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.0558, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 44.43, |
|
"learning_rate": 3.068376068376069e-05, |
|
"loss": 0.0461, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"learning_rate": 3.05982905982906e-05, |
|
"loss": 0.0416, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 44.73, |
|
"learning_rate": 3.0512820512820518e-05, |
|
"loss": 0.0405, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 44.89, |
|
"learning_rate": 3.0427350427350425e-05, |
|
"loss": 0.0502, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.9770554493307839, |
|
"eval_loss": 0.13666602969169617, |
|
"eval_runtime": 9.3328, |
|
"eval_samples_per_second": 112.078, |
|
"eval_steps_per_second": 7.072, |
|
"step": 2947 |
|
}, |
|
{ |
|
"epoch": 45.04, |
|
"learning_rate": 3.034188034188034e-05, |
|
"loss": 0.0508, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 45.19, |
|
"learning_rate": 3.0256410256410257e-05, |
|
"loss": 0.0438, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 45.34, |
|
"learning_rate": 3.0170940170940172e-05, |
|
"loss": 0.0156, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 45.5, |
|
"learning_rate": 3.0085470085470086e-05, |
|
"loss": 0.0662, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0606, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.052, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"learning_rate": 2.982905982905983e-05, |
|
"loss": 0.0389, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.9770554493307839, |
|
"eval_loss": 0.10930032283067703, |
|
"eval_runtime": 10.4976, |
|
"eval_samples_per_second": 99.642, |
|
"eval_steps_per_second": 6.287, |
|
"step": 3013 |
|
}, |
|
{ |
|
"epoch": 46.11, |
|
"learning_rate": 2.9743589743589744e-05, |
|
"loss": 0.0499, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 46.26, |
|
"learning_rate": 2.965811965811966e-05, |
|
"loss": 0.0365, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 46.41, |
|
"learning_rate": 2.9572649572649573e-05, |
|
"loss": 0.0411, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 46.56, |
|
"learning_rate": 2.948717948717949e-05, |
|
"loss": 0.0315, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 46.72, |
|
"learning_rate": 2.9401709401709405e-05, |
|
"loss": 0.0598, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 46.87, |
|
"learning_rate": 2.931623931623932e-05, |
|
"loss": 0.0209, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.0954117476940155, |
|
"eval_runtime": 10.251, |
|
"eval_samples_per_second": 102.039, |
|
"eval_steps_per_second": 6.438, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"learning_rate": 2.9230769230769234e-05, |
|
"loss": 0.0785, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 47.18, |
|
"learning_rate": 2.914529914529915e-05, |
|
"loss": 0.0432, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"learning_rate": 2.9059829059829063e-05, |
|
"loss": 0.0427, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 47.48, |
|
"learning_rate": 2.8974358974358977e-05, |
|
"loss": 0.0421, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 47.63, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.035, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 47.79, |
|
"learning_rate": 2.8803418803418803e-05, |
|
"loss": 0.0275, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"learning_rate": 2.8717948717948717e-05, |
|
"loss": 0.0327, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.98565965583174, |
|
"eval_loss": 0.08863285183906555, |
|
"eval_runtime": 9.2312, |
|
"eval_samples_per_second": 113.312, |
|
"eval_steps_per_second": 7.15, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 48.09, |
|
"learning_rate": 2.863247863247863e-05, |
|
"loss": 0.0332, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 48.24, |
|
"learning_rate": 2.8547008547008546e-05, |
|
"loss": 0.0532, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"learning_rate": 2.846153846153846e-05, |
|
"loss": 0.0212, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 48.55, |
|
"learning_rate": 2.8376068376068378e-05, |
|
"loss": 0.0406, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 2.8290598290598293e-05, |
|
"loss": 0.0594, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 48.85, |
|
"learning_rate": 2.8205128205128207e-05, |
|
"loss": 0.0269, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.0766867846250534, |
|
"eval_runtime": 10.7207, |
|
"eval_samples_per_second": 97.568, |
|
"eval_steps_per_second": 6.156, |
|
"step": 3209 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 2.811965811965812e-05, |
|
"loss": 0.0588, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 49.16, |
|
"learning_rate": 2.8034188034188036e-05, |
|
"loss": 0.0418, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 49.31, |
|
"learning_rate": 2.794871794871795e-05, |
|
"loss": 0.0389, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 49.47, |
|
"learning_rate": 2.7863247863247865e-05, |
|
"loss": 0.0342, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 49.62, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.046, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 49.77, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 0.0606, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 49.92, |
|
"learning_rate": 2.760683760683761e-05, |
|
"loss": 0.0461, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.98565965583174, |
|
"eval_loss": 0.06614663451910019, |
|
"eval_runtime": 10.6141, |
|
"eval_samples_per_second": 98.548, |
|
"eval_steps_per_second": 6.218, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 50.08, |
|
"learning_rate": 2.7521367521367526e-05, |
|
"loss": 0.0328, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 50.23, |
|
"learning_rate": 2.743589743589744e-05, |
|
"loss": 0.044, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 50.38, |
|
"learning_rate": 2.7350427350427355e-05, |
|
"loss": 0.0752, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"learning_rate": 2.7264957264957262e-05, |
|
"loss": 0.0543, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 50.69, |
|
"learning_rate": 2.717948717948718e-05, |
|
"loss": 0.0331, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 50.84, |
|
"learning_rate": 2.7094017094017094e-05, |
|
"loss": 0.0436, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"learning_rate": 2.700854700854701e-05, |
|
"loss": 0.0226, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.07688331604003906, |
|
"eval_runtime": 9.3318, |
|
"eval_samples_per_second": 112.09, |
|
"eval_steps_per_second": 7.073, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 51.15, |
|
"learning_rate": 2.6923076923076923e-05, |
|
"loss": 0.0394, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 51.3, |
|
"learning_rate": 2.6837606837606838e-05, |
|
"loss": 0.0444, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 51.45, |
|
"learning_rate": 2.6752136752136752e-05, |
|
"loss": 0.0424, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0507, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 51.76, |
|
"learning_rate": 2.658119658119658e-05, |
|
"loss": 0.033, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"learning_rate": 2.64957264957265e-05, |
|
"loss": 0.0304, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.084078848361969, |
|
"eval_runtime": 10.4168, |
|
"eval_samples_per_second": 100.415, |
|
"eval_steps_per_second": 6.336, |
|
"step": 3406 |
|
}, |
|
{ |
|
"epoch": 52.06, |
|
"learning_rate": 2.6410256410256413e-05, |
|
"loss": 0.0369, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 52.21, |
|
"learning_rate": 2.6324786324786328e-05, |
|
"loss": 0.0325, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 52.37, |
|
"learning_rate": 2.6239316239316242e-05, |
|
"loss": 0.0374, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 52.52, |
|
"learning_rate": 2.6153846153846157e-05, |
|
"loss": 0.0527, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 52.67, |
|
"learning_rate": 2.606837606837607e-05, |
|
"loss": 0.0321, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 52.82, |
|
"learning_rate": 2.5982905982905985e-05, |
|
"loss": 0.0725, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 52.98, |
|
"learning_rate": 2.58974358974359e-05, |
|
"loss": 0.0326, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.10022158920764923, |
|
"eval_runtime": 10.2475, |
|
"eval_samples_per_second": 102.074, |
|
"eval_steps_per_second": 6.441, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 53.13, |
|
"learning_rate": 2.5811965811965814e-05, |
|
"loss": 0.0557, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"learning_rate": 2.5726495726495725e-05, |
|
"loss": 0.081, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 53.44, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.0479, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 53.59, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.0266, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 53.74, |
|
"learning_rate": 2.547008547008547e-05, |
|
"loss": 0.0559, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 53.89, |
|
"learning_rate": 2.5384615384615383e-05, |
|
"loss": 0.0593, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9847036328871893, |
|
"eval_loss": 0.06341515481472015, |
|
"eval_runtime": 9.7205, |
|
"eval_samples_per_second": 107.608, |
|
"eval_steps_per_second": 6.79, |
|
"step": 3537 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"learning_rate": 2.52991452991453e-05, |
|
"loss": 0.0338, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 54.2, |
|
"learning_rate": 2.5213675213675215e-05, |
|
"loss": 0.0373, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 54.35, |
|
"learning_rate": 2.512820512820513e-05, |
|
"loss": 0.0439, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 54.5, |
|
"learning_rate": 2.5042735042735044e-05, |
|
"loss": 0.0455, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 54.66, |
|
"learning_rate": 2.495726495726496e-05, |
|
"loss": 0.0393, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 54.81, |
|
"learning_rate": 2.4871794871794873e-05, |
|
"loss": 0.0427, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"learning_rate": 2.4786324786324787e-05, |
|
"loss": 0.0489, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.070249542593956, |
|
"eval_runtime": 10.5454, |
|
"eval_samples_per_second": 99.191, |
|
"eval_steps_per_second": 6.259, |
|
"step": 3602 |
|
}, |
|
{ |
|
"epoch": 55.11, |
|
"learning_rate": 2.47008547008547e-05, |
|
"loss": 0.0264, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 55.27, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 0.0635, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 55.42, |
|
"learning_rate": 2.452991452991453e-05, |
|
"loss": 0.0655, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 55.57, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.0429, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 55.73, |
|
"learning_rate": 2.435897435897436e-05, |
|
"loss": 0.0381, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 55.88, |
|
"learning_rate": 2.4273504273504274e-05, |
|
"loss": 0.0495, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.10598118603229523, |
|
"eval_runtime": 10.001, |
|
"eval_samples_per_second": 104.589, |
|
"eval_steps_per_second": 6.599, |
|
"step": 3668 |
|
}, |
|
{ |
|
"epoch": 56.03, |
|
"learning_rate": 2.4188034188034188e-05, |
|
"loss": 0.0579, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 56.18, |
|
"learning_rate": 2.4102564102564103e-05, |
|
"loss": 0.0539, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 56.34, |
|
"learning_rate": 2.401709401709402e-05, |
|
"loss": 0.0331, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 56.49, |
|
"learning_rate": 2.3931623931623935e-05, |
|
"loss": 0.0358, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 56.64, |
|
"learning_rate": 2.384615384615385e-05, |
|
"loss": 0.038, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 56.79, |
|
"learning_rate": 2.376068376068376e-05, |
|
"loss": 0.0515, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 56.95, |
|
"learning_rate": 2.3675213675213675e-05, |
|
"loss": 0.0457, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_accuracy": 0.9866156787762906, |
|
"eval_loss": 0.07145655900239944, |
|
"eval_runtime": 9.7095, |
|
"eval_samples_per_second": 107.729, |
|
"eval_steps_per_second": 6.797, |
|
"step": 3733 |
|
}, |
|
{ |
|
"epoch": 57.1, |
|
"learning_rate": 2.358974358974359e-05, |
|
"loss": 0.0409, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 57.25, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.0397, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"learning_rate": 2.341880341880342e-05, |
|
"loss": 0.0365, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 57.56, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0507, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 57.71, |
|
"learning_rate": 2.324786324786325e-05, |
|
"loss": 0.0337, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 57.86, |
|
"learning_rate": 2.3162393162393165e-05, |
|
"loss": 0.0487, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09061739593744278, |
|
"eval_runtime": 10.6263, |
|
"eval_samples_per_second": 98.435, |
|
"eval_steps_per_second": 6.211, |
|
"step": 3799 |
|
}, |
|
{ |
|
"epoch": 58.02, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 0.0427, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 58.17, |
|
"learning_rate": 2.299145299145299e-05, |
|
"loss": 0.0403, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 58.32, |
|
"learning_rate": 2.2905982905982905e-05, |
|
"loss": 0.0229, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 58.47, |
|
"learning_rate": 2.2820512820512822e-05, |
|
"loss": 0.0724, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 58.63, |
|
"learning_rate": 2.2735042735042737e-05, |
|
"loss": 0.0215, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 58.78, |
|
"learning_rate": 2.264957264957265e-05, |
|
"loss": 0.0358, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 58.93, |
|
"learning_rate": 2.2564102564102566e-05, |
|
"loss": 0.0416, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 58.99, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.0973171517252922, |
|
"eval_runtime": 9.7778, |
|
"eval_samples_per_second": 106.977, |
|
"eval_steps_per_second": 6.75, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 59.08, |
|
"learning_rate": 2.247863247863248e-05, |
|
"loss": 0.0285, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 59.24, |
|
"learning_rate": 2.2393162393162394e-05, |
|
"loss": 0.0363, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 59.39, |
|
"learning_rate": 2.230769230769231e-05, |
|
"loss": 0.0434, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 59.54, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0364, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 59.69, |
|
"learning_rate": 2.2136752136752138e-05, |
|
"loss": 0.0308, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 59.85, |
|
"learning_rate": 2.2051282051282052e-05, |
|
"loss": 0.0392, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 2.1965811965811967e-05, |
|
"loss": 0.0358, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.98565965583174, |
|
"eval_loss": 0.08869770169258118, |
|
"eval_runtime": 9.8493, |
|
"eval_samples_per_second": 106.2, |
|
"eval_steps_per_second": 6.701, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 60.15, |
|
"learning_rate": 2.188034188034188e-05, |
|
"loss": 0.0416, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 60.31, |
|
"learning_rate": 2.1794871794871795e-05, |
|
"loss": 0.0491, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 60.46, |
|
"learning_rate": 2.170940170940171e-05, |
|
"loss": 0.0342, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 60.61, |
|
"learning_rate": 2.1623931623931624e-05, |
|
"loss": 0.0427, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 60.76, |
|
"learning_rate": 2.1538461538461542e-05, |
|
"loss": 0.026, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"learning_rate": 2.1452991452991453e-05, |
|
"loss": 0.0503, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.09588623046875, |
|
"eval_runtime": 10.6142, |
|
"eval_samples_per_second": 98.548, |
|
"eval_steps_per_second": 6.218, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 61.07, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.0453, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 61.22, |
|
"learning_rate": 2.1282051282051282e-05, |
|
"loss": 0.0431, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 61.37, |
|
"learning_rate": 2.1196581196581196e-05, |
|
"loss": 0.0349, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 61.53, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.0243, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 61.68, |
|
"learning_rate": 2.102564102564103e-05, |
|
"loss": 0.0447, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 61.83, |
|
"learning_rate": 2.0940170940170943e-05, |
|
"loss": 0.03, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 61.98, |
|
"learning_rate": 2.0854700854700857e-05, |
|
"loss": 0.0555, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.9780114722753346, |
|
"eval_loss": 0.10571310669183731, |
|
"eval_runtime": 9.8843, |
|
"eval_samples_per_second": 105.825, |
|
"eval_steps_per_second": 6.677, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 62.14, |
|
"learning_rate": 2.0769230769230772e-05, |
|
"loss": 0.0497, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 62.29, |
|
"learning_rate": 2.0683760683760683e-05, |
|
"loss": 0.0216, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 62.44, |
|
"learning_rate": 2.0598290598290597e-05, |
|
"loss": 0.053, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"learning_rate": 2.0512820512820512e-05, |
|
"loss": 0.0525, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 62.75, |
|
"learning_rate": 2.042735042735043e-05, |
|
"loss": 0.0426, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 62.9, |
|
"learning_rate": 2.0341880341880344e-05, |
|
"loss": 0.0288, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.09712828695774078, |
|
"eval_runtime": 10.2313, |
|
"eval_samples_per_second": 102.235, |
|
"eval_steps_per_second": 6.451, |
|
"step": 4126 |
|
}, |
|
{ |
|
"epoch": 63.05, |
|
"learning_rate": 2.025641025641026e-05, |
|
"loss": 0.0289, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 2.0170940170940173e-05, |
|
"loss": 0.0196, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 63.36, |
|
"learning_rate": 2.0085470085470087e-05, |
|
"loss": 0.034, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 63.51, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0398, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 63.66, |
|
"learning_rate": 1.9914529914529913e-05, |
|
"loss": 0.0195, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 63.82, |
|
"learning_rate": 1.982905982905983e-05, |
|
"loss": 0.0401, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 63.97, |
|
"learning_rate": 1.9743589743589745e-05, |
|
"loss": 0.0514, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.9847036328871893, |
|
"eval_loss": 0.0754217728972435, |
|
"eval_runtime": 10.6351, |
|
"eval_samples_per_second": 98.353, |
|
"eval_steps_per_second": 6.206, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 64.12, |
|
"learning_rate": 1.965811965811966e-05, |
|
"loss": 0.0345, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 64.27, |
|
"learning_rate": 1.9572649572649574e-05, |
|
"loss": 0.0399, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 64.43, |
|
"learning_rate": 1.9487179487179488e-05, |
|
"loss": 0.0563, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 64.58, |
|
"learning_rate": 1.9401709401709403e-05, |
|
"loss": 0.0366, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 64.73, |
|
"learning_rate": 1.9316239316239317e-05, |
|
"loss": 0.0268, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.0602, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.07885503023862839, |
|
"eval_runtime": 9.6953, |
|
"eval_samples_per_second": 107.888, |
|
"eval_steps_per_second": 6.807, |
|
"step": 4257 |
|
}, |
|
{ |
|
"epoch": 65.04, |
|
"learning_rate": 1.914529914529915e-05, |
|
"loss": 0.0301, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 65.19, |
|
"learning_rate": 1.905982905982906e-05, |
|
"loss": 0.0662, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 65.34, |
|
"learning_rate": 1.8974358974358975e-05, |
|
"loss": 0.0315, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 65.5, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.0182, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 65.65, |
|
"learning_rate": 1.8803418803418804e-05, |
|
"loss": 0.0177, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 65.8, |
|
"learning_rate": 1.8717948717948718e-05, |
|
"loss": 0.019, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 65.95, |
|
"learning_rate": 1.8632478632478632e-05, |
|
"loss": 0.0209, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.10050670802593231, |
|
"eval_runtime": 10.2235, |
|
"eval_samples_per_second": 102.314, |
|
"eval_steps_per_second": 6.456, |
|
"step": 4323 |
|
}, |
|
{ |
|
"epoch": 66.11, |
|
"learning_rate": 1.854700854700855e-05, |
|
"loss": 0.0344, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 66.26, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 0.0188, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 66.41, |
|
"learning_rate": 1.837606837606838e-05, |
|
"loss": 0.0495, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 66.56, |
|
"learning_rate": 1.829059829059829e-05, |
|
"loss": 0.0359, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 66.72, |
|
"learning_rate": 1.8205128205128204e-05, |
|
"loss": 0.0196, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 66.87, |
|
"learning_rate": 1.811965811965812e-05, |
|
"loss": 0.0366, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.10703601688146591, |
|
"eval_runtime": 10.6582, |
|
"eval_samples_per_second": 98.14, |
|
"eval_steps_per_second": 6.192, |
|
"step": 4388 |
|
}, |
|
{ |
|
"epoch": 67.02, |
|
"learning_rate": 1.8034188034188033e-05, |
|
"loss": 0.0323, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 67.18, |
|
"learning_rate": 1.794871794871795e-05, |
|
"loss": 0.0326, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 67.33, |
|
"learning_rate": 1.7863247863247866e-05, |
|
"loss": 0.0348, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 67.48, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.031, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 67.63, |
|
"learning_rate": 1.7692307692307694e-05, |
|
"loss": 0.0283, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 67.79, |
|
"learning_rate": 1.760683760683761e-05, |
|
"loss": 0.0171, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 67.94, |
|
"learning_rate": 1.752136752136752e-05, |
|
"loss": 0.031, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.1018110066652298, |
|
"eval_runtime": 9.5096, |
|
"eval_samples_per_second": 109.994, |
|
"eval_steps_per_second": 6.94, |
|
"step": 4454 |
|
}, |
|
{ |
|
"epoch": 68.09, |
|
"learning_rate": 1.7435897435897434e-05, |
|
"loss": 0.0254, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 68.24, |
|
"learning_rate": 1.7350427350427352e-05, |
|
"loss": 0.0367, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"learning_rate": 1.7264957264957267e-05, |
|
"loss": 0.0227, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 68.55, |
|
"learning_rate": 1.717948717948718e-05, |
|
"loss": 0.0143, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 68.7, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.0288, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 68.85, |
|
"learning_rate": 1.700854700854701e-05, |
|
"loss": 0.043, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.10203871130943298, |
|
"eval_runtime": 10.358, |
|
"eval_samples_per_second": 100.984, |
|
"eval_steps_per_second": 6.372, |
|
"step": 4519 |
|
}, |
|
{ |
|
"epoch": 69.01, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 0.0302, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 69.16, |
|
"learning_rate": 1.683760683760684e-05, |
|
"loss": 0.0235, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 69.31, |
|
"learning_rate": 1.6752136752136753e-05, |
|
"loss": 0.0463, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 69.47, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0232, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 69.62, |
|
"learning_rate": 1.6581196581196582e-05, |
|
"loss": 0.0559, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 69.77, |
|
"learning_rate": 1.6495726495726496e-05, |
|
"loss": 0.039, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 69.92, |
|
"learning_rate": 1.641025641025641e-05, |
|
"loss": 0.0262, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.08955594152212143, |
|
"eval_runtime": 10.4955, |
|
"eval_samples_per_second": 99.662, |
|
"eval_steps_per_second": 6.288, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 70.08, |
|
"learning_rate": 1.6324786324786325e-05, |
|
"loss": 0.0373, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 70.23, |
|
"learning_rate": 1.623931623931624e-05, |
|
"loss": 0.0138, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 70.38, |
|
"learning_rate": 1.6153846153846154e-05, |
|
"loss": 0.0264, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 70.53, |
|
"learning_rate": 1.6068376068376072e-05, |
|
"loss": 0.0185, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 70.69, |
|
"learning_rate": 1.5982905982905983e-05, |
|
"loss": 0.0496, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 70.84, |
|
"learning_rate": 1.5897435897435897e-05, |
|
"loss": 0.0351, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"learning_rate": 1.581196581196581e-05, |
|
"loss": 0.0299, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"eval_accuracy": 0.9837476099426387, |
|
"eval_loss": 0.09126070141792297, |
|
"eval_runtime": 9.1248, |
|
"eval_samples_per_second": 114.633, |
|
"eval_steps_per_second": 7.233, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 71.15, |
|
"learning_rate": 1.5726495726495726e-05, |
|
"loss": 0.0372, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 71.3, |
|
"learning_rate": 1.564102564102564e-05, |
|
"loss": 0.0291, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 71.45, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.0301, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"learning_rate": 1.5470085470085473e-05, |
|
"loss": 0.0573, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 71.76, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.0186, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 71.91, |
|
"learning_rate": 1.52991452991453e-05, |
|
"loss": 0.0211, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.98565965583174, |
|
"eval_loss": 0.09574086219072342, |
|
"eval_runtime": 10.4838, |
|
"eval_samples_per_second": 99.773, |
|
"eval_steps_per_second": 6.295, |
|
"step": 4716 |
|
}, |
|
{ |
|
"epoch": 72.06, |
|
"learning_rate": 1.5213675213675213e-05, |
|
"loss": 0.0405, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 72.21, |
|
"learning_rate": 1.5128205128205129e-05, |
|
"loss": 0.025, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 72.37, |
|
"learning_rate": 1.5042735042735043e-05, |
|
"loss": 0.0257, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 72.52, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.0293, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 72.67, |
|
"learning_rate": 1.4871794871794872e-05, |
|
"loss": 0.0511, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 72.82, |
|
"learning_rate": 1.4786324786324786e-05, |
|
"loss": 0.0653, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 72.98, |
|
"learning_rate": 1.4700854700854703e-05, |
|
"loss": 0.0351, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.11798752844333649, |
|
"eval_runtime": 10.3353, |
|
"eval_samples_per_second": 101.207, |
|
"eval_steps_per_second": 6.386, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 73.13, |
|
"learning_rate": 1.4615384615384617e-05, |
|
"loss": 0.0298, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 73.28, |
|
"learning_rate": 1.4529914529914531e-05, |
|
"loss": 0.0382, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 73.44, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.0291, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 73.59, |
|
"learning_rate": 1.4358974358974359e-05, |
|
"loss": 0.0241, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 73.74, |
|
"learning_rate": 1.4273504273504273e-05, |
|
"loss": 0.0217, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 73.89, |
|
"learning_rate": 1.4188034188034189e-05, |
|
"loss": 0.0498, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.10561424493789673, |
|
"eval_runtime": 9.5301, |
|
"eval_samples_per_second": 109.758, |
|
"eval_steps_per_second": 6.925, |
|
"step": 4847 |
|
}, |
|
{ |
|
"epoch": 74.05, |
|
"learning_rate": 1.4102564102564104e-05, |
|
"loss": 0.0305, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 74.2, |
|
"learning_rate": 1.4017094017094018e-05, |
|
"loss": 0.0295, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 74.35, |
|
"learning_rate": 1.3931623931623932e-05, |
|
"loss": 0.0415, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 74.5, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 0.0531, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 74.66, |
|
"learning_rate": 1.3760683760683763e-05, |
|
"loss": 0.0283, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 74.81, |
|
"learning_rate": 1.3675213675213677e-05, |
|
"loss": 0.0293, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 74.96, |
|
"learning_rate": 1.358974358974359e-05, |
|
"loss": 0.0174, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.10319594293832779, |
|
"eval_runtime": 10.5444, |
|
"eval_samples_per_second": 99.2, |
|
"eval_steps_per_second": 6.259, |
|
"step": 4912 |
|
}, |
|
{ |
|
"epoch": 75.11, |
|
"learning_rate": 1.3504273504273504e-05, |
|
"loss": 0.0216, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 75.27, |
|
"learning_rate": 1.3418803418803419e-05, |
|
"loss": 0.0304, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 75.42, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0347, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 75.57, |
|
"learning_rate": 1.324786324786325e-05, |
|
"loss": 0.0208, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 75.73, |
|
"learning_rate": 1.3162393162393164e-05, |
|
"loss": 0.0192, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 75.88, |
|
"learning_rate": 1.3076923076923078e-05, |
|
"loss": 0.0368, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.10714620351791382, |
|
"eval_runtime": 10.1776, |
|
"eval_samples_per_second": 102.775, |
|
"eval_steps_per_second": 6.485, |
|
"step": 4978 |
|
}, |
|
{ |
|
"epoch": 76.03, |
|
"learning_rate": 1.2991452991452993e-05, |
|
"loss": 0.0549, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 76.18, |
|
"learning_rate": 1.2905982905982907e-05, |
|
"loss": 0.0363, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 76.34, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.0377, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 76.49, |
|
"learning_rate": 1.2735042735042734e-05, |
|
"loss": 0.0272, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 76.64, |
|
"learning_rate": 1.264957264957265e-05, |
|
"loss": 0.0384, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 76.79, |
|
"learning_rate": 1.2564102564102565e-05, |
|
"loss": 0.0344, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 76.95, |
|
"learning_rate": 1.247863247863248e-05, |
|
"loss": 0.0367, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.09872549772262573, |
|
"eval_runtime": 10.524, |
|
"eval_samples_per_second": 99.392, |
|
"eval_steps_per_second": 6.271, |
|
"step": 5043 |
|
}, |
|
{ |
|
"epoch": 77.1, |
|
"learning_rate": 1.2393162393162394e-05, |
|
"loss": 0.0206, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 77.25, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 0.0368, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 77.4, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.0193, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 77.56, |
|
"learning_rate": 1.2136752136752137e-05, |
|
"loss": 0.0427, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 77.71, |
|
"learning_rate": 1.2051282051282051e-05, |
|
"loss": 0.0326, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 77.86, |
|
"learning_rate": 1.1965811965811967e-05, |
|
"loss": 0.027, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.10366753488779068, |
|
"eval_runtime": 10.054, |
|
"eval_samples_per_second": 104.038, |
|
"eval_steps_per_second": 6.565, |
|
"step": 5109 |
|
}, |
|
{ |
|
"epoch": 78.02, |
|
"learning_rate": 1.188034188034188e-05, |
|
"loss": 0.0416, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 78.17, |
|
"learning_rate": 1.1794871794871795e-05, |
|
"loss": 0.0193, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 78.32, |
|
"learning_rate": 1.170940170940171e-05, |
|
"loss": 0.0362, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 78.47, |
|
"learning_rate": 1.1623931623931625e-05, |
|
"loss": 0.0348, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 78.63, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 0.0424, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 78.78, |
|
"learning_rate": 1.1452991452991452e-05, |
|
"loss": 0.0275, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 78.93, |
|
"learning_rate": 1.1367521367521368e-05, |
|
"loss": 0.0225, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 78.99, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.11285591125488281, |
|
"eval_runtime": 9.5703, |
|
"eval_samples_per_second": 109.296, |
|
"eval_steps_per_second": 6.896, |
|
"step": 5174 |
|
}, |
|
{ |
|
"epoch": 79.08, |
|
"learning_rate": 1.1282051282051283e-05, |
|
"loss": 0.0243, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 79.24, |
|
"learning_rate": 1.1196581196581197e-05, |
|
"loss": 0.0362, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 79.39, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.027, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 79.54, |
|
"learning_rate": 1.1025641025641026e-05, |
|
"loss": 0.0462, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 79.69, |
|
"learning_rate": 1.094017094017094e-05, |
|
"loss": 0.0341, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 79.85, |
|
"learning_rate": 1.0854700854700855e-05, |
|
"loss": 0.0302, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.0769230769230771e-05, |
|
"loss": 0.0241, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.12019278109073639, |
|
"eval_runtime": 10.7086, |
|
"eval_samples_per_second": 97.679, |
|
"eval_steps_per_second": 6.163, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 80.15, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.0322, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 80.31, |
|
"learning_rate": 1.0598290598290598e-05, |
|
"loss": 0.0205, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 80.46, |
|
"learning_rate": 1.0512820512820514e-05, |
|
"loss": 0.0606, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 80.61, |
|
"learning_rate": 1.0427350427350429e-05, |
|
"loss": 0.0468, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 80.76, |
|
"learning_rate": 1.0341880341880341e-05, |
|
"loss": 0.0439, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 80.92, |
|
"learning_rate": 1.0256410256410256e-05, |
|
"loss": 0.026, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.12190443277359009, |
|
"eval_runtime": 9.89, |
|
"eval_samples_per_second": 105.763, |
|
"eval_steps_per_second": 6.673, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 81.07, |
|
"learning_rate": 1.0170940170940172e-05, |
|
"loss": 0.0184, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 81.22, |
|
"learning_rate": 1.0085470085470086e-05, |
|
"loss": 0.0346, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 81.37, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0346, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 81.53, |
|
"learning_rate": 9.914529914529915e-06, |
|
"loss": 0.0237, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 81.68, |
|
"learning_rate": 9.82905982905983e-06, |
|
"loss": 0.031, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 81.83, |
|
"learning_rate": 9.743589743589744e-06, |
|
"loss": 0.0101, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 81.98, |
|
"learning_rate": 9.658119658119659e-06, |
|
"loss": 0.0223, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.11935597658157349, |
|
"eval_runtime": 9.9357, |
|
"eval_samples_per_second": 105.277, |
|
"eval_steps_per_second": 6.643, |
|
"step": 5371 |
|
}, |
|
{ |
|
"epoch": 82.14, |
|
"learning_rate": 9.572649572649575e-06, |
|
"loss": 0.0398, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 82.29, |
|
"learning_rate": 9.487179487179487e-06, |
|
"loss": 0.0337, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 82.44, |
|
"learning_rate": 9.401709401709402e-06, |
|
"loss": 0.046, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 82.6, |
|
"learning_rate": 9.316239316239316e-06, |
|
"loss": 0.0425, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 82.75, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 0.0144, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 82.9, |
|
"learning_rate": 9.145299145299145e-06, |
|
"loss": 0.0454, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.11481741070747375, |
|
"eval_runtime": 10.9253, |
|
"eval_samples_per_second": 95.741, |
|
"eval_steps_per_second": 6.041, |
|
"step": 5436 |
|
}, |
|
{ |
|
"epoch": 83.05, |
|
"learning_rate": 9.05982905982906e-06, |
|
"loss": 0.0217, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 83.21, |
|
"learning_rate": 8.974358974358976e-06, |
|
"loss": 0.024, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 83.36, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.0312, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 83.51, |
|
"learning_rate": 8.803418803418804e-06, |
|
"loss": 0.0318, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 83.66, |
|
"learning_rate": 8.717948717948717e-06, |
|
"loss": 0.0374, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 83.82, |
|
"learning_rate": 8.632478632478633e-06, |
|
"loss": 0.0118, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.019, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.11676544696092606, |
|
"eval_runtime": 9.8983, |
|
"eval_samples_per_second": 105.674, |
|
"eval_steps_per_second": 6.668, |
|
"step": 5502 |
|
}, |
|
{ |
|
"epoch": 84.12, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 0.032, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 84.27, |
|
"learning_rate": 8.376068376068377e-06, |
|
"loss": 0.0268, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 84.43, |
|
"learning_rate": 8.290598290598291e-06, |
|
"loss": 0.0589, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 84.58, |
|
"learning_rate": 8.205128205128205e-06, |
|
"loss": 0.0402, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 84.73, |
|
"learning_rate": 8.11965811965812e-06, |
|
"loss": 0.0301, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 84.89, |
|
"learning_rate": 8.034188034188036e-06, |
|
"loss": 0.0269, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.12455571442842484, |
|
"eval_runtime": 10.278, |
|
"eval_samples_per_second": 101.771, |
|
"eval_steps_per_second": 6.421, |
|
"step": 5567 |
|
}, |
|
{ |
|
"epoch": 85.04, |
|
"learning_rate": 7.948717948717949e-06, |
|
"loss": 0.0279, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 85.19, |
|
"learning_rate": 7.863247863247863e-06, |
|
"loss": 0.02, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 85.34, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.0235, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 85.5, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.027, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 85.65, |
|
"learning_rate": 7.606837606837606e-06, |
|
"loss": 0.0421, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 85.8, |
|
"learning_rate": 7.521367521367522e-06, |
|
"loss": 0.0178, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 85.95, |
|
"learning_rate": 7.435897435897436e-06, |
|
"loss": 0.0403, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.13009017705917358, |
|
"eval_runtime": 10.5359, |
|
"eval_samples_per_second": 99.279, |
|
"eval_steps_per_second": 6.264, |
|
"step": 5633 |
|
}, |
|
{ |
|
"epoch": 86.11, |
|
"learning_rate": 7.350427350427351e-06, |
|
"loss": 0.0359, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 86.26, |
|
"learning_rate": 7.264957264957266e-06, |
|
"loss": 0.0326, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 86.41, |
|
"learning_rate": 7.179487179487179e-06, |
|
"loss": 0.0363, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 86.56, |
|
"learning_rate": 7.0940170940170945e-06, |
|
"loss": 0.0162, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 86.72, |
|
"learning_rate": 7.008547008547009e-06, |
|
"loss": 0.0422, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 86.87, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 0.0294, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 86.99, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.12042093276977539, |
|
"eval_runtime": 9.222, |
|
"eval_samples_per_second": 113.424, |
|
"eval_steps_per_second": 7.157, |
|
"step": 5698 |
|
}, |
|
{ |
|
"epoch": 87.02, |
|
"learning_rate": 6.837606837606839e-06, |
|
"loss": 0.038, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 87.18, |
|
"learning_rate": 6.752136752136752e-06, |
|
"loss": 0.0103, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 87.33, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0627, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 87.48, |
|
"learning_rate": 6.581196581196582e-06, |
|
"loss": 0.0244, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 87.63, |
|
"learning_rate": 6.495726495726496e-06, |
|
"loss": 0.0235, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 87.79, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.0233, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 87.94, |
|
"learning_rate": 6.324786324786325e-06, |
|
"loss": 0.0501, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9789674952198852, |
|
"eval_loss": 0.11684638261795044, |
|
"eval_runtime": 10.4843, |
|
"eval_samples_per_second": 99.768, |
|
"eval_steps_per_second": 6.295, |
|
"step": 5764 |
|
}, |
|
{ |
|
"epoch": 88.09, |
|
"learning_rate": 6.23931623931624e-06, |
|
"loss": 0.0321, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 88.24, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 0.0223, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 88.4, |
|
"learning_rate": 6.0683760683760684e-06, |
|
"loss": 0.0497, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 88.55, |
|
"learning_rate": 5.982905982905984e-06, |
|
"loss": 0.0319, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 88.7, |
|
"learning_rate": 5.897435897435897e-06, |
|
"loss": 0.0444, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 88.85, |
|
"learning_rate": 5.8119658119658126e-06, |
|
"loss": 0.0361, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.11426308006048203, |
|
"eval_runtime": 10.4122, |
|
"eval_samples_per_second": 100.459, |
|
"eval_steps_per_second": 6.339, |
|
"step": 5829 |
|
}, |
|
{ |
|
"epoch": 89.01, |
|
"learning_rate": 5.726495726495726e-06, |
|
"loss": 0.0254, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 89.16, |
|
"learning_rate": 5.641025641025641e-06, |
|
"loss": 0.0327, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 89.31, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0289, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 89.47, |
|
"learning_rate": 5.47008547008547e-06, |
|
"loss": 0.0174, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 5.3846153846153855e-06, |
|
"loss": 0.0278, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 89.77, |
|
"learning_rate": 5.299145299145299e-06, |
|
"loss": 0.031, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 89.92, |
|
"learning_rate": 5.213675213675214e-06, |
|
"loss": 0.0278, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9799235181644359, |
|
"eval_loss": 0.10286556929349899, |
|
"eval_runtime": 9.3657, |
|
"eval_samples_per_second": 111.684, |
|
"eval_steps_per_second": 7.047, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 90.08, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 0.0184, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 90.23, |
|
"learning_rate": 5.042735042735043e-06, |
|
"loss": 0.0323, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 90.38, |
|
"learning_rate": 4.957264957264958e-06, |
|
"loss": 0.0127, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 90.53, |
|
"learning_rate": 4.871794871794872e-06, |
|
"loss": 0.0258, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 90.69, |
|
"learning_rate": 4.786324786324787e-06, |
|
"loss": 0.0364, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 90.84, |
|
"learning_rate": 4.700854700854701e-06, |
|
"loss": 0.0385, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 0.0267, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09912797808647156, |
|
"eval_runtime": 10.4, |
|
"eval_samples_per_second": 100.577, |
|
"eval_steps_per_second": 6.346, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 91.15, |
|
"learning_rate": 4.52991452991453e-06, |
|
"loss": 0.0313, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.0327, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 91.45, |
|
"learning_rate": 4.3589743589743586e-06, |
|
"loss": 0.0239, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 91.6, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.0314, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 91.76, |
|
"learning_rate": 4.188034188034188e-06, |
|
"loss": 0.0274, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 91.91, |
|
"learning_rate": 4.102564102564103e-06, |
|
"loss": 0.0308, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.982791586998088, |
|
"eval_loss": 0.10278183221817017, |
|
"eval_runtime": 10.2154, |
|
"eval_samples_per_second": 102.395, |
|
"eval_steps_per_second": 6.461, |
|
"step": 6026 |
|
}, |
|
{ |
|
"epoch": 92.06, |
|
"learning_rate": 4.017094017094018e-06, |
|
"loss": 0.04, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 92.21, |
|
"learning_rate": 3.9316239316239315e-06, |
|
"loss": 0.0103, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 92.37, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 0.0456, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 92.52, |
|
"learning_rate": 3.760683760683761e-06, |
|
"loss": 0.0231, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 92.67, |
|
"learning_rate": 3.6752136752136756e-06, |
|
"loss": 0.0334, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 92.82, |
|
"learning_rate": 3.5897435897435896e-06, |
|
"loss": 0.0389, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 92.98, |
|
"learning_rate": 3.5042735042735045e-06, |
|
"loss": 0.0246, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.10309727489948273, |
|
"eval_runtime": 9.5539, |
|
"eval_samples_per_second": 109.484, |
|
"eval_steps_per_second": 6.908, |
|
"step": 6091 |
|
}, |
|
{ |
|
"epoch": 93.13, |
|
"learning_rate": 3.4188034188034193e-06, |
|
"loss": 0.0262, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 93.28, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0234, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 93.44, |
|
"learning_rate": 3.247863247863248e-06, |
|
"loss": 0.0211, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 93.59, |
|
"learning_rate": 3.1623931623931626e-06, |
|
"loss": 0.0293, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 93.74, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 0.0361, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 93.89, |
|
"learning_rate": 2.991452991452992e-06, |
|
"loss": 0.0283, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.10351178050041199, |
|
"eval_runtime": 10.7511, |
|
"eval_samples_per_second": 97.292, |
|
"eval_steps_per_second": 6.139, |
|
"step": 6157 |
|
}, |
|
{ |
|
"epoch": 94.05, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"loss": 0.0281, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 94.2, |
|
"learning_rate": 2.8205128205128207e-06, |
|
"loss": 0.0175, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 94.35, |
|
"learning_rate": 2.735042735042735e-06, |
|
"loss": 0.019, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 94.5, |
|
"learning_rate": 2.6495726495726495e-06, |
|
"loss": 0.0248, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 94.66, |
|
"learning_rate": 2.564102564102564e-06, |
|
"loss": 0.0295, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 94.81, |
|
"learning_rate": 2.478632478632479e-06, |
|
"loss": 0.0243, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"learning_rate": 2.3931623931623937e-06, |
|
"loss": 0.0278, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 94.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09991679340600967, |
|
"eval_runtime": 10.1438, |
|
"eval_samples_per_second": 103.117, |
|
"eval_steps_per_second": 6.506, |
|
"step": 6222 |
|
}, |
|
{ |
|
"epoch": 95.11, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 0.0323, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 95.27, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.0189, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 95.42, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.0194, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 95.57, |
|
"learning_rate": 2.0512820512820513e-06, |
|
"loss": 0.0318, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 95.73, |
|
"learning_rate": 1.9658119658119658e-06, |
|
"loss": 0.0363, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 95.88, |
|
"learning_rate": 1.8803418803418804e-06, |
|
"loss": 0.0221, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9808795411089866, |
|
"eval_loss": 0.1006636694073677, |
|
"eval_runtime": 10.5083, |
|
"eval_samples_per_second": 99.54, |
|
"eval_steps_per_second": 6.281, |
|
"step": 6288 |
|
}, |
|
{ |
|
"epoch": 96.03, |
|
"learning_rate": 1.7948717948717948e-06, |
|
"loss": 0.0297, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 96.18, |
|
"learning_rate": 1.7094017094017097e-06, |
|
"loss": 0.0359, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 96.34, |
|
"learning_rate": 1.623931623931624e-06, |
|
"loss": 0.0131, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 96.49, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 0.012, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 96.64, |
|
"learning_rate": 1.4529914529914531e-06, |
|
"loss": 0.0279, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 96.79, |
|
"learning_rate": 1.3675213675213676e-06, |
|
"loss": 0.0251, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 96.95, |
|
"learning_rate": 1.282051282051282e-06, |
|
"loss": 0.0197, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09894201159477234, |
|
"eval_runtime": 10.0809, |
|
"eval_samples_per_second": 103.761, |
|
"eval_steps_per_second": 6.547, |
|
"step": 6353 |
|
}, |
|
{ |
|
"epoch": 97.1, |
|
"learning_rate": 1.1965811965811968e-06, |
|
"loss": 0.0481, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 97.25, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.038, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 97.4, |
|
"learning_rate": 1.0256410256410257e-06, |
|
"loss": 0.0476, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 97.56, |
|
"learning_rate": 9.401709401709402e-07, |
|
"loss": 0.023, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 97.71, |
|
"learning_rate": 8.547008547008548e-07, |
|
"loss": 0.0267, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 97.86, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 0.0435, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09859943389892578, |
|
"eval_runtime": 9.3732, |
|
"eval_samples_per_second": 111.595, |
|
"eval_steps_per_second": 7.041, |
|
"step": 6419 |
|
}, |
|
{ |
|
"epoch": 98.02, |
|
"learning_rate": 6.837606837606838e-07, |
|
"loss": 0.0195, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 98.17, |
|
"learning_rate": 5.982905982905984e-07, |
|
"loss": 0.0445, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 98.32, |
|
"learning_rate": 5.128205128205128e-07, |
|
"loss": 0.0338, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 98.47, |
|
"learning_rate": 4.273504273504274e-07, |
|
"loss": 0.0303, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 98.63, |
|
"learning_rate": 3.418803418803419e-07, |
|
"loss": 0.0205, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 98.78, |
|
"learning_rate": 2.564102564102564e-07, |
|
"loss": 0.0288, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 98.93, |
|
"learning_rate": 1.7094017094017095e-07, |
|
"loss": 0.0266, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 98.99, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09866122901439667, |
|
"eval_runtime": 10.6097, |
|
"eval_samples_per_second": 98.589, |
|
"eval_steps_per_second": 6.221, |
|
"step": 6484 |
|
}, |
|
{ |
|
"epoch": 99.08, |
|
"learning_rate": 8.547008547008547e-08, |
|
"loss": 0.0251, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 99.24, |
|
"learning_rate": 0.0, |
|
"loss": 0.0334, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 99.24, |
|
"eval_accuracy": 0.9818355640535373, |
|
"eval_loss": 0.09865351021289825, |
|
"eval_runtime": 10.5658, |
|
"eval_samples_per_second": 98.999, |
|
"eval_steps_per_second": 6.247, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 99.24, |
|
"step": 6500, |
|
"total_flos": 1.0320982228805714e+19, |
|
"train_loss": 0.06894740481560047, |
|
"train_runtime": 8315.1084, |
|
"train_samples_per_second": 50.318, |
|
"train_steps_per_second": 0.782 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6500, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.0320982228805714e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|