|
{ |
|
"best_metric": 0.8803088803088803, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-310", |
|
"epoch": 49.31506849315068, |
|
"eval_steps": 500, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 2.1952, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5984555984555985, |
|
"eval_loss": 1.5913729667663574, |
|
"eval_runtime": 64.7917, |
|
"eval_samples_per_second": 3.997, |
|
"eval_steps_per_second": 0.139, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 1.7838, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.3705, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.6872586872586872, |
|
"eval_loss": 1.2163656949996948, |
|
"eval_runtime": 2.7019, |
|
"eval_samples_per_second": 95.857, |
|
"eval_steps_per_second": 3.331, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.2071, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.026, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7374517374517374, |
|
"eval_loss": 0.9973997473716736, |
|
"eval_runtime": 2.773, |
|
"eval_samples_per_second": 93.401, |
|
"eval_steps_per_second": 3.246, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.8747, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.829, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7722007722007722, |
|
"eval_loss": 0.7666593790054321, |
|
"eval_runtime": 2.7043, |
|
"eval_samples_per_second": 95.774, |
|
"eval_steps_per_second": 3.328, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.6805, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6513, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8223938223938224, |
|
"eval_loss": 0.6674064993858337, |
|
"eval_runtime": 2.7216, |
|
"eval_samples_per_second": 95.164, |
|
"eval_steps_per_second": 3.307, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.5516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy": 0.8378378378378378, |
|
"eval_loss": 0.58104407787323, |
|
"eval_runtime": 2.7003, |
|
"eval_samples_per_second": 95.916, |
|
"eval_steps_per_second": 3.333, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.876543209876544e-05, |
|
"loss": 0.5259, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.4978, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_accuracy": 0.8262548262548263, |
|
"eval_loss": 0.5498219132423401, |
|
"eval_runtime": 2.7634, |
|
"eval_samples_per_second": 93.724, |
|
"eval_steps_per_second": 3.257, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 4.7530864197530866e-05, |
|
"loss": 0.4849, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 4.691358024691358e-05, |
|
"loss": 0.4568, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8185328185328186, |
|
"eval_loss": 0.5999107360839844, |
|
"eval_runtime": 2.6558, |
|
"eval_samples_per_second": 97.523, |
|
"eval_steps_per_second": 3.389, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4352, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 4.567901234567901e-05, |
|
"loss": 0.4047, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5210986137390137, |
|
"eval_runtime": 2.6564, |
|
"eval_samples_per_second": 97.5, |
|
"eval_steps_per_second": 3.388, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 4.506172839506173e-05, |
|
"loss": 0.4443, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3696, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.5201298594474792, |
|
"eval_runtime": 2.969, |
|
"eval_samples_per_second": 87.234, |
|
"eval_steps_per_second": 3.031, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 4.3827160493827164e-05, |
|
"loss": 0.3712, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.3479, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_accuracy": 0.8262548262548263, |
|
"eval_loss": 0.5309751033782959, |
|
"eval_runtime": 2.8895, |
|
"eval_samples_per_second": 89.636, |
|
"eval_steps_per_second": 3.115, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.329, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5438859462738037, |
|
"eval_runtime": 2.8848, |
|
"eval_samples_per_second": 89.781, |
|
"eval_steps_per_second": 3.12, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 4.197530864197531e-05, |
|
"loss": 0.3452, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.135802469135803e-05, |
|
"loss": 0.3376, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.505037784576416, |
|
"eval_runtime": 2.6792, |
|
"eval_samples_per_second": 96.671, |
|
"eval_steps_per_second": 3.359, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3155, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.2804, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_accuracy": 0.8262548262548263, |
|
"eval_loss": 0.5709359645843506, |
|
"eval_runtime": 2.6578, |
|
"eval_samples_per_second": 97.449, |
|
"eval_steps_per_second": 3.386, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 3.950617283950617e-05, |
|
"loss": 0.292, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2941, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_accuracy": 0.8146718146718147, |
|
"eval_loss": 0.6376463770866394, |
|
"eval_runtime": 3.1239, |
|
"eval_samples_per_second": 82.91, |
|
"eval_steps_per_second": 2.881, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 3.82716049382716e-05, |
|
"loss": 0.254, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 3.7654320987654326e-05, |
|
"loss": 0.3026, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5446977615356445, |
|
"eval_runtime": 2.6837, |
|
"eval_samples_per_second": 96.51, |
|
"eval_steps_per_second": 3.354, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2807, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 3.6419753086419754e-05, |
|
"loss": 0.2578, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8803088803088803, |
|
"eval_loss": 0.5055761933326721, |
|
"eval_runtime": 3.0337, |
|
"eval_samples_per_second": 85.374, |
|
"eval_steps_per_second": 2.967, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 17.53, |
|
"learning_rate": 3.580246913580247e-05, |
|
"loss": 0.219, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_accuracy": 0.861003861003861, |
|
"eval_loss": 0.5619993209838867, |
|
"eval_runtime": 2.7058, |
|
"eval_samples_per_second": 95.72, |
|
"eval_steps_per_second": 3.326, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2377, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 3.45679012345679e-05, |
|
"loss": 0.2403, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_accuracy": 0.8455598455598455, |
|
"eval_loss": 0.5581746101379395, |
|
"eval_runtime": 2.7021, |
|
"eval_samples_per_second": 95.85, |
|
"eval_steps_per_second": 3.331, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 0.203, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2258, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5458388328552246, |
|
"eval_runtime": 3.0873, |
|
"eval_samples_per_second": 83.891, |
|
"eval_steps_per_second": 2.915, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 3.271604938271605e-05, |
|
"loss": 0.211, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"learning_rate": 3.209876543209876e-05, |
|
"loss": 0.2265, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.541065514087677, |
|
"eval_runtime": 2.7072, |
|
"eval_samples_per_second": 95.67, |
|
"eval_steps_per_second": 3.324, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.1848, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.1893, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5476610064506531, |
|
"eval_runtime": 2.9316, |
|
"eval_samples_per_second": 88.348, |
|
"eval_steps_per_second": 3.07, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 22.47, |
|
"learning_rate": 3.0246913580246916e-05, |
|
"loss": 0.1896, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5125249028205872, |
|
"eval_runtime": 2.7238, |
|
"eval_samples_per_second": 95.087, |
|
"eval_steps_per_second": 3.304, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.2007, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 2.9012345679012347e-05, |
|
"loss": 0.1976, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.833976833976834, |
|
"eval_loss": 0.5671693086624146, |
|
"eval_runtime": 3.015, |
|
"eval_samples_per_second": 85.904, |
|
"eval_steps_per_second": 2.985, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 24.11, |
|
"learning_rate": 2.839506172839506e-05, |
|
"loss": 0.1781, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 24.66, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1725, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.8455598455598455, |
|
"eval_loss": 0.5581468343734741, |
|
"eval_runtime": 3.0099, |
|
"eval_samples_per_second": 86.048, |
|
"eval_steps_per_second": 2.99, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 2.7160493827160493e-05, |
|
"loss": 0.1542, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"learning_rate": 2.654320987654321e-05, |
|
"loss": 0.168, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"eval_accuracy": 0.8455598455598455, |
|
"eval_loss": 0.5964789390563965, |
|
"eval_runtime": 2.7357, |
|
"eval_samples_per_second": 94.676, |
|
"eval_steps_per_second": 3.29, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 26.3, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1806, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 26.85, |
|
"learning_rate": 2.5308641975308646e-05, |
|
"loss": 0.1821, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"eval_accuracy": 0.861003861003861, |
|
"eval_loss": 0.5566571950912476, |
|
"eval_runtime": 3.0269, |
|
"eval_samples_per_second": 85.566, |
|
"eval_steps_per_second": 2.973, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.1622, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.1805, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.5998114943504333, |
|
"eval_runtime": 2.7276, |
|
"eval_samples_per_second": 94.954, |
|
"eval_steps_per_second": 3.3, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 28.49, |
|
"learning_rate": 2.345679012345679e-05, |
|
"loss": 0.1616, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.5450794696807861, |
|
"eval_runtime": 2.7182, |
|
"eval_samples_per_second": 95.285, |
|
"eval_steps_per_second": 3.311, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 29.04, |
|
"learning_rate": 2.2839506172839506e-05, |
|
"loss": 0.1757, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 29.59, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1467, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5574297308921814, |
|
"eval_runtime": 2.825, |
|
"eval_samples_per_second": 91.683, |
|
"eval_steps_per_second": 3.186, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 30.14, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 0.1363, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 30.68, |
|
"learning_rate": 2.0987654320987655e-05, |
|
"loss": 0.1439, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.5706761479377747, |
|
"eval_runtime": 2.7105, |
|
"eval_samples_per_second": 95.556, |
|
"eval_steps_per_second": 3.32, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 31.23, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1329, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"learning_rate": 1.9753086419753087e-05, |
|
"loss": 0.13, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8378378378378378, |
|
"eval_loss": 0.6018989086151123, |
|
"eval_runtime": 3.1531, |
|
"eval_samples_per_second": 82.14, |
|
"eval_steps_per_second": 2.854, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"learning_rate": 1.91358024691358e-05, |
|
"loss": 0.1193, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 32.88, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1353, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.861003861003861, |
|
"eval_loss": 0.5952116847038269, |
|
"eval_runtime": 2.7134, |
|
"eval_samples_per_second": 95.452, |
|
"eval_steps_per_second": 3.317, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 33.42, |
|
"learning_rate": 1.7901234567901236e-05, |
|
"loss": 0.1312, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"learning_rate": 1.728395061728395e-05, |
|
"loss": 0.1329, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"eval_accuracy": 0.8378378378378378, |
|
"eval_loss": 0.6262175440788269, |
|
"eval_runtime": 2.6635, |
|
"eval_samples_per_second": 97.242, |
|
"eval_steps_per_second": 3.379, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 34.52, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1258, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"eval_accuracy": 0.8455598455598455, |
|
"eval_loss": 0.6313734650611877, |
|
"eval_runtime": 3.0086, |
|
"eval_samples_per_second": 86.087, |
|
"eval_steps_per_second": 2.991, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 35.07, |
|
"learning_rate": 1.604938271604938e-05, |
|
"loss": 0.1403, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 35.62, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 0.1408, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5761238932609558, |
|
"eval_runtime": 2.696, |
|
"eval_samples_per_second": 96.069, |
|
"eval_steps_per_second": 3.338, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 36.16, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1398, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 1.419753086419753e-05, |
|
"loss": 0.1197, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.861003861003861, |
|
"eval_loss": 0.5702797174453735, |
|
"eval_runtime": 2.6925, |
|
"eval_samples_per_second": 96.193, |
|
"eval_steps_per_second": 3.343, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 37.26, |
|
"learning_rate": 1.3580246913580247e-05, |
|
"loss": 0.1258, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1208, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_accuracy": 0.8455598455598455, |
|
"eval_loss": 0.624657392501831, |
|
"eval_runtime": 2.9681, |
|
"eval_samples_per_second": 87.263, |
|
"eval_steps_per_second": 3.032, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 38.36, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.1356, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 38.9, |
|
"learning_rate": 1.1728395061728396e-05, |
|
"loss": 0.1197, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 38.96, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.6025857329368591, |
|
"eval_runtime": 2.7949, |
|
"eval_samples_per_second": 92.669, |
|
"eval_steps_per_second": 3.22, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 39.45, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1045, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.0493827160493827e-05, |
|
"loss": 0.1271, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.5952697396278381, |
|
"eval_runtime": 2.6623, |
|
"eval_samples_per_second": 97.285, |
|
"eval_steps_per_second": 3.381, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 40.55, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.1053, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.6070245504379272, |
|
"eval_runtime": 3.0263, |
|
"eval_samples_per_second": 85.583, |
|
"eval_steps_per_second": 2.974, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 41.1, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1095, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 41.64, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.0846, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"eval_accuracy": 0.861003861003861, |
|
"eval_loss": 0.6094004511833191, |
|
"eval_runtime": 2.6949, |
|
"eval_samples_per_second": 96.107, |
|
"eval_steps_per_second": 3.34, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 42.19, |
|
"learning_rate": 8.02469135802469e-06, |
|
"loss": 0.1012, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1206, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5911765098571777, |
|
"eval_runtime": 2.7122, |
|
"eval_samples_per_second": 95.495, |
|
"eval_steps_per_second": 3.318, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 6.790123456790123e-06, |
|
"loss": 0.0978, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 43.84, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.1225, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.607361376285553, |
|
"eval_runtime": 4.6667, |
|
"eval_samples_per_second": 55.5, |
|
"eval_steps_per_second": 1.929, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 44.38, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1033, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.1184, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.5943210124969482, |
|
"eval_runtime": 2.7226, |
|
"eval_samples_per_second": 95.13, |
|
"eval_steps_per_second": 3.306, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 45.48, |
|
"learning_rate": 4.3209876543209875e-06, |
|
"loss": 0.1027, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.6083826422691345, |
|
"eval_runtime": 3.0894, |
|
"eval_samples_per_second": 83.836, |
|
"eval_steps_per_second": 2.913, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 46.03, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1305, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 46.58, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 0.1113, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.6033775210380554, |
|
"eval_runtime": 2.9035, |
|
"eval_samples_per_second": 89.202, |
|
"eval_steps_per_second": 3.1, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 47.12, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.1018, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 47.67, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0945, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8494208494208494, |
|
"eval_loss": 0.610550582408905, |
|
"eval_runtime": 2.6737, |
|
"eval_samples_per_second": 96.869, |
|
"eval_steps_per_second": 3.366, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 48.22, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0992, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 48.77, |
|
"learning_rate": 6.17283950617284e-07, |
|
"loss": 0.1159, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.6142740845680237, |
|
"eval_runtime": 3.3099, |
|
"eval_samples_per_second": 78.25, |
|
"eval_steps_per_second": 2.719, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"learning_rate": 0.0, |
|
"loss": 0.0963, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"eval_accuracy": 0.8532818532818532, |
|
"eval_loss": 0.6143748760223389, |
|
"eval_runtime": 3.568, |
|
"eval_samples_per_second": 72.59, |
|
"eval_steps_per_second": 2.522, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"step": 900, |
|
"total_flos": 2.8579589371326874e+18, |
|
"train_loss": 0.3051255483759774, |
|
"train_runtime": 3022.9937, |
|
"train_samples_per_second": 38.554, |
|
"train_steps_per_second": 0.298 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.8579589371326874e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|