{ "best_metric": 0.49814796447753906, "best_model_checkpoint": "./vit-kimchi/checkpoint-1300", "epoch": 4.0, "eval_steps": 100, "global_step": 1928, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019896265560165978, "loss": 2.2446, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.00019792531120331952, "loss": 1.8517, "step": 20 }, { "epoch": 0.06, "learning_rate": 0.00019688796680497928, "loss": 1.5648, "step": 30 }, { "epoch": 0.08, "learning_rate": 0.000195850622406639, "loss": 1.2418, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00019481327800829876, "loss": 1.1769, "step": 50 }, { "epoch": 0.12, "learning_rate": 0.00019377593360995853, "loss": 1.273, "step": 60 }, { "epoch": 0.15, "learning_rate": 0.00019273858921161827, "loss": 0.972, "step": 70 }, { "epoch": 0.17, "learning_rate": 0.00019170124481327803, "loss": 1.0242, "step": 80 }, { "epoch": 0.19, "learning_rate": 0.00019066390041493777, "loss": 0.9587, "step": 90 }, { "epoch": 0.21, "learning_rate": 0.0001896265560165975, "loss": 0.8988, "step": 100 }, { "epoch": 0.21, "eval_accuracy": 0.7145454545454546, "eval_loss": 0.9406929016113281, "eval_runtime": 49.3896, "eval_samples_per_second": 22.272, "eval_steps_per_second": 2.794, "step": 100 }, { "epoch": 0.23, "learning_rate": 0.00018858921161825728, "loss": 0.925, "step": 110 }, { "epoch": 0.25, "learning_rate": 0.00018755186721991702, "loss": 0.8407, "step": 120 }, { "epoch": 0.27, "learning_rate": 0.00018651452282157679, "loss": 0.7866, "step": 130 }, { "epoch": 0.29, "learning_rate": 0.00018547717842323653, "loss": 0.7472, "step": 140 }, { "epoch": 0.31, "learning_rate": 0.0001844398340248963, "loss": 0.8416, "step": 150 }, { "epoch": 0.33, "learning_rate": 0.000183402489626556, "loss": 0.8749, "step": 160 }, { "epoch": 0.35, "learning_rate": 0.00018236514522821577, "loss": 0.8141, "step": 170 }, { "epoch": 0.37, "learning_rate": 0.00018132780082987554, "loss": 0.7578, "step": 180 }, { "epoch": 0.39, "learning_rate": 0.00018029045643153528, "loss": 0.8922, "step": 190 }, { "epoch": 0.41, "learning_rate": 0.00017925311203319504, "loss": 0.6914, "step": 200 }, { "epoch": 0.41, "eval_accuracy": 0.8081818181818182, "eval_loss": 0.6929934024810791, "eval_runtime": 53.9344, "eval_samples_per_second": 20.395, "eval_steps_per_second": 2.559, "step": 200 }, { "epoch": 0.44, "learning_rate": 0.00017821576763485478, "loss": 0.6568, "step": 210 }, { "epoch": 0.46, "learning_rate": 0.00017717842323651452, "loss": 0.7086, "step": 220 }, { "epoch": 0.48, "learning_rate": 0.0001761410788381743, "loss": 0.8155, "step": 230 }, { "epoch": 0.5, "learning_rate": 0.00017510373443983403, "loss": 0.7773, "step": 240 }, { "epoch": 0.52, "learning_rate": 0.0001740663900414938, "loss": 0.6456, "step": 250 }, { "epoch": 0.54, "learning_rate": 0.00017302904564315354, "loss": 0.59, "step": 260 }, { "epoch": 0.56, "learning_rate": 0.0001719917012448133, "loss": 0.6845, "step": 270 }, { "epoch": 0.58, "learning_rate": 0.00017095435684647304, "loss": 0.8958, "step": 280 }, { "epoch": 0.6, "learning_rate": 0.00016991701244813278, "loss": 0.61, "step": 290 }, { "epoch": 0.62, "learning_rate": 0.00016887966804979255, "loss": 0.5689, "step": 300 }, { "epoch": 0.62, "eval_accuracy": 0.7827272727272727, "eval_loss": 0.691547691822052, "eval_runtime": 52.4462, "eval_samples_per_second": 20.974, "eval_steps_per_second": 2.631, "step": 300 }, { "epoch": 0.64, "learning_rate": 0.0001678423236514523, "loss": 0.5978, "step": 310 }, { "epoch": 0.66, "learning_rate": 0.00016680497925311205, "loss": 0.6593, "step": 320 }, { "epoch": 0.68, "learning_rate": 0.0001657676348547718, "loss": 0.6557, "step": 330 }, { "epoch": 0.71, "learning_rate": 0.00016473029045643153, "loss": 0.6392, "step": 340 }, { "epoch": 0.73, "learning_rate": 0.0001636929460580913, "loss": 0.7019, "step": 350 }, { "epoch": 0.75, "learning_rate": 0.00016265560165975104, "loss": 0.713, "step": 360 }, { "epoch": 0.77, "learning_rate": 0.0001616182572614108, "loss": 0.7038, "step": 370 }, { "epoch": 0.79, "learning_rate": 0.00016058091286307055, "loss": 0.8199, "step": 380 }, { "epoch": 0.81, "learning_rate": 0.0001595435684647303, "loss": 0.7091, "step": 390 }, { "epoch": 0.83, "learning_rate": 0.00015850622406639005, "loss": 0.6755, "step": 400 }, { "epoch": 0.83, "eval_accuracy": 0.7627272727272727, "eval_loss": 0.7460294961929321, "eval_runtime": 48.0299, "eval_samples_per_second": 22.902, "eval_steps_per_second": 2.873, "step": 400 }, { "epoch": 0.85, "learning_rate": 0.0001574688796680498, "loss": 0.7875, "step": 410 }, { "epoch": 0.87, "learning_rate": 0.00015643153526970956, "loss": 0.6854, "step": 420 }, { "epoch": 0.89, "learning_rate": 0.0001553941908713693, "loss": 0.5474, "step": 430 }, { "epoch": 0.91, "learning_rate": 0.00015435684647302906, "loss": 0.7665, "step": 440 }, { "epoch": 0.93, "learning_rate": 0.0001533195020746888, "loss": 0.6185, "step": 450 }, { "epoch": 0.95, "learning_rate": 0.00015228215767634854, "loss": 0.8083, "step": 460 }, { "epoch": 0.98, "learning_rate": 0.0001512448132780083, "loss": 0.5685, "step": 470 }, { "epoch": 1.0, "learning_rate": 0.00015020746887966805, "loss": 0.5072, "step": 480 }, { "epoch": 1.02, "learning_rate": 0.00014917012448132782, "loss": 0.3516, "step": 490 }, { "epoch": 1.04, "learning_rate": 0.00014813278008298756, "loss": 0.3364, "step": 500 }, { "epoch": 1.04, "eval_accuracy": 0.8227272727272728, "eval_loss": 0.5715581774711609, "eval_runtime": 52.5205, "eval_samples_per_second": 20.944, "eval_steps_per_second": 2.628, "step": 500 }, { "epoch": 1.06, "learning_rate": 0.00014709543568464732, "loss": 0.3399, "step": 510 }, { "epoch": 1.08, "learning_rate": 0.00014605809128630706, "loss": 0.3942, "step": 520 }, { "epoch": 1.1, "learning_rate": 0.0001450207468879668, "loss": 0.3344, "step": 530 }, { "epoch": 1.12, "learning_rate": 0.00014398340248962657, "loss": 0.3416, "step": 540 }, { "epoch": 1.14, "learning_rate": 0.0001429460580912863, "loss": 0.3703, "step": 550 }, { "epoch": 1.16, "learning_rate": 0.00014190871369294607, "loss": 0.3983, "step": 560 }, { "epoch": 1.18, "learning_rate": 0.00014087136929460581, "loss": 0.3765, "step": 570 }, { "epoch": 1.2, "learning_rate": 0.00013983402489626555, "loss": 0.4639, "step": 580 }, { "epoch": 1.22, "learning_rate": 0.00013879668049792532, "loss": 0.3725, "step": 590 }, { "epoch": 1.24, "learning_rate": 0.00013775933609958506, "loss": 0.3872, "step": 600 }, { "epoch": 1.24, "eval_accuracy": 0.8009090909090909, "eval_loss": 0.6399267315864563, "eval_runtime": 48.4264, "eval_samples_per_second": 22.715, "eval_steps_per_second": 2.85, "step": 600 }, { "epoch": 1.27, "learning_rate": 0.00013672199170124483, "loss": 0.3308, "step": 610 }, { "epoch": 1.29, "learning_rate": 0.00013568464730290457, "loss": 0.4045, "step": 620 }, { "epoch": 1.31, "learning_rate": 0.00013464730290456433, "loss": 0.4431, "step": 630 }, { "epoch": 1.33, "learning_rate": 0.00013360995850622407, "loss": 0.4729, "step": 640 }, { "epoch": 1.35, "learning_rate": 0.0001325726141078838, "loss": 0.3641, "step": 650 }, { "epoch": 1.37, "learning_rate": 0.00013153526970954358, "loss": 0.2814, "step": 660 }, { "epoch": 1.39, "learning_rate": 0.00013049792531120332, "loss": 0.4076, "step": 670 }, { "epoch": 1.41, "learning_rate": 0.00012946058091286308, "loss": 0.5521, "step": 680 }, { "epoch": 1.43, "learning_rate": 0.00012842323651452285, "loss": 0.4752, "step": 690 }, { "epoch": 1.45, "learning_rate": 0.00012738589211618256, "loss": 0.4035, "step": 700 }, { "epoch": 1.45, "eval_accuracy": 0.7927272727272727, "eval_loss": 0.6669933795928955, "eval_runtime": 232.0646, "eval_samples_per_second": 4.74, "eval_steps_per_second": 0.595, "step": 700 }, { "epoch": 1.47, "learning_rate": 0.00012634854771784233, "loss": 0.356, "step": 710 }, { "epoch": 1.49, "learning_rate": 0.00012531120331950207, "loss": 0.3443, "step": 720 }, { "epoch": 1.51, "learning_rate": 0.00012427385892116184, "loss": 0.3427, "step": 730 }, { "epoch": 1.54, "learning_rate": 0.00012323651452282158, "loss": 0.4563, "step": 740 }, { "epoch": 1.56, "learning_rate": 0.00012219917012448134, "loss": 0.4225, "step": 750 }, { "epoch": 1.58, "learning_rate": 0.00012116182572614108, "loss": 0.2857, "step": 760 }, { "epoch": 1.6, "learning_rate": 0.00012012448132780084, "loss": 0.3558, "step": 770 }, { "epoch": 1.62, "learning_rate": 0.00011908713692946059, "loss": 0.3012, "step": 780 }, { "epoch": 1.64, "learning_rate": 0.00011804979253112034, "loss": 0.2585, "step": 790 }, { "epoch": 1.66, "learning_rate": 0.0001170124481327801, "loss": 0.2907, "step": 800 }, { "epoch": 1.66, "eval_accuracy": 0.8018181818181818, "eval_loss": 0.619886040687561, "eval_runtime": 182.6836, "eval_samples_per_second": 6.021, "eval_steps_per_second": 0.755, "step": 800 }, { "epoch": 1.68, "learning_rate": 0.00011597510373443985, "loss": 0.2441, "step": 810 }, { "epoch": 1.7, "learning_rate": 0.00011493775933609959, "loss": 0.3021, "step": 820 }, { "epoch": 1.72, "learning_rate": 0.00011390041493775934, "loss": 0.4212, "step": 830 }, { "epoch": 1.74, "learning_rate": 0.0001128630705394191, "loss": 0.3419, "step": 840 }, { "epoch": 1.76, "learning_rate": 0.00011182572614107885, "loss": 0.4709, "step": 850 }, { "epoch": 1.78, "learning_rate": 0.0001107883817427386, "loss": 0.3137, "step": 860 }, { "epoch": 1.8, "learning_rate": 0.00010975103734439835, "loss": 0.2889, "step": 870 }, { "epoch": 1.83, "learning_rate": 0.00010871369294605809, "loss": 0.3452, "step": 880 }, { "epoch": 1.85, "learning_rate": 0.00010767634854771785, "loss": 0.2411, "step": 890 }, { "epoch": 1.87, "learning_rate": 0.0001066390041493776, "loss": 0.2934, "step": 900 }, { "epoch": 1.87, "eval_accuracy": 0.8109090909090909, "eval_loss": 0.5889019966125488, "eval_runtime": 74.2537, "eval_samples_per_second": 14.814, "eval_steps_per_second": 1.858, "step": 900 }, { "epoch": 1.89, "learning_rate": 0.00010560165975103735, "loss": 0.3304, "step": 910 }, { "epoch": 1.91, "learning_rate": 0.0001045643153526971, "loss": 0.2853, "step": 920 }, { "epoch": 1.93, "learning_rate": 0.00010352697095435686, "loss": 0.3694, "step": 930 }, { "epoch": 1.95, "learning_rate": 0.0001024896265560166, "loss": 0.2806, "step": 940 }, { "epoch": 1.97, "learning_rate": 0.00010145228215767635, "loss": 0.3782, "step": 950 }, { "epoch": 1.99, "learning_rate": 0.0001004149377593361, "loss": 0.2552, "step": 960 }, { "epoch": 2.01, "learning_rate": 9.937759336099586e-05, "loss": 0.3536, "step": 970 }, { "epoch": 2.03, "learning_rate": 9.83402489626556e-05, "loss": 0.0899, "step": 980 }, { "epoch": 2.05, "learning_rate": 9.730290456431535e-05, "loss": 0.1806, "step": 990 }, { "epoch": 2.07, "learning_rate": 9.626556016597512e-05, "loss": 0.1166, "step": 1000 }, { "epoch": 2.07, "eval_accuracy": 0.8390909090909091, "eval_loss": 0.5368185043334961, "eval_runtime": 70.8059, "eval_samples_per_second": 15.535, "eval_steps_per_second": 1.949, "step": 1000 }, { "epoch": 2.1, "learning_rate": 9.522821576763486e-05, "loss": 0.1473, "step": 1010 }, { "epoch": 2.12, "learning_rate": 9.419087136929461e-05, "loss": 0.1777, "step": 1020 }, { "epoch": 2.14, "learning_rate": 9.315352697095436e-05, "loss": 0.2064, "step": 1030 }, { "epoch": 2.16, "learning_rate": 9.21161825726141e-05, "loss": 0.1514, "step": 1040 }, { "epoch": 2.18, "learning_rate": 9.107883817427387e-05, "loss": 0.2532, "step": 1050 }, { "epoch": 2.2, "learning_rate": 9.004149377593362e-05, "loss": 0.1578, "step": 1060 }, { "epoch": 2.22, "learning_rate": 8.900414937759336e-05, "loss": 0.1755, "step": 1070 }, { "epoch": 2.24, "learning_rate": 8.796680497925311e-05, "loss": 0.1563, "step": 1080 }, { "epoch": 2.26, "learning_rate": 8.692946058091287e-05, "loss": 0.133, "step": 1090 }, { "epoch": 2.28, "learning_rate": 8.58921161825726e-05, "loss": 0.2452, "step": 1100 }, { "epoch": 2.28, "eval_accuracy": 0.83, "eval_loss": 0.5816710591316223, "eval_runtime": 82.9166, "eval_samples_per_second": 13.266, "eval_steps_per_second": 1.664, "step": 1100 }, { "epoch": 2.3, "learning_rate": 8.485477178423237e-05, "loss": 0.155, "step": 1110 }, { "epoch": 2.32, "learning_rate": 8.381742738589213e-05, "loss": 0.1068, "step": 1120 }, { "epoch": 2.34, "learning_rate": 8.278008298755187e-05, "loss": 0.0867, "step": 1130 }, { "epoch": 2.37, "learning_rate": 8.174273858921162e-05, "loss": 0.193, "step": 1140 }, { "epoch": 2.39, "learning_rate": 8.070539419087137e-05, "loss": 0.1487, "step": 1150 }, { "epoch": 2.41, "learning_rate": 7.966804979253112e-05, "loss": 0.1718, "step": 1160 }, { "epoch": 2.43, "learning_rate": 7.863070539419088e-05, "loss": 0.2072, "step": 1170 }, { "epoch": 2.45, "learning_rate": 7.759336099585063e-05, "loss": 0.1637, "step": 1180 }, { "epoch": 2.47, "learning_rate": 7.655601659751037e-05, "loss": 0.0945, "step": 1190 }, { "epoch": 2.49, "learning_rate": 7.551867219917012e-05, "loss": 0.1838, "step": 1200 }, { "epoch": 2.49, "eval_accuracy": 0.8336363636363636, "eval_loss": 0.5901106595993042, "eval_runtime": 65.6982, "eval_samples_per_second": 16.743, "eval_steps_per_second": 2.101, "step": 1200 }, { "epoch": 2.51, "learning_rate": 7.448132780082988e-05, "loss": 0.2265, "step": 1210 }, { "epoch": 2.53, "learning_rate": 7.344398340248963e-05, "loss": 0.1791, "step": 1220 }, { "epoch": 2.55, "learning_rate": 7.240663900414938e-05, "loss": 0.1296, "step": 1230 }, { "epoch": 2.57, "learning_rate": 7.136929460580914e-05, "loss": 0.1491, "step": 1240 }, { "epoch": 2.59, "learning_rate": 7.033195020746889e-05, "loss": 0.2888, "step": 1250 }, { "epoch": 2.61, "learning_rate": 6.929460580912863e-05, "loss": 0.1386, "step": 1260 }, { "epoch": 2.63, "learning_rate": 6.825726141078838e-05, "loss": 0.0571, "step": 1270 }, { "epoch": 2.66, "learning_rate": 6.721991701244813e-05, "loss": 0.1371, "step": 1280 }, { "epoch": 2.68, "learning_rate": 6.618257261410789e-05, "loss": 0.1443, "step": 1290 }, { "epoch": 2.7, "learning_rate": 6.514522821576764e-05, "loss": 0.168, "step": 1300 }, { "epoch": 2.7, "eval_accuracy": 0.86, "eval_loss": 0.49814796447753906, "eval_runtime": 74.5165, "eval_samples_per_second": 14.762, "eval_steps_per_second": 1.852, "step": 1300 }, { "epoch": 2.72, "learning_rate": 6.41078838174274e-05, "loss": 0.1147, "step": 1310 }, { "epoch": 2.74, "learning_rate": 6.307053941908713e-05, "loss": 0.1334, "step": 1320 }, { "epoch": 2.76, "learning_rate": 6.203319502074689e-05, "loss": 0.0889, "step": 1330 }, { "epoch": 2.78, "learning_rate": 6.099585062240665e-05, "loss": 0.1234, "step": 1340 }, { "epoch": 2.8, "learning_rate": 5.995850622406639e-05, "loss": 0.0907, "step": 1350 }, { "epoch": 2.82, "learning_rate": 5.8921161825726146e-05, "loss": 0.1191, "step": 1360 }, { "epoch": 2.84, "learning_rate": 5.78838174273859e-05, "loss": 0.0852, "step": 1370 }, { "epoch": 2.86, "learning_rate": 5.6846473029045646e-05, "loss": 0.1637, "step": 1380 }, { "epoch": 2.88, "learning_rate": 5.58091286307054e-05, "loss": 0.0901, "step": 1390 }, { "epoch": 2.9, "learning_rate": 5.477178423236515e-05, "loss": 0.1578, "step": 1400 }, { "epoch": 2.9, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.5170807242393494, "eval_runtime": 44.8814, "eval_samples_per_second": 24.509, "eval_steps_per_second": 3.075, "step": 1400 }, { "epoch": 2.93, "learning_rate": 5.37344398340249e-05, "loss": 0.2121, "step": 1410 }, { "epoch": 2.95, "learning_rate": 5.269709543568465e-05, "loss": 0.1638, "step": 1420 }, { "epoch": 2.97, "learning_rate": 5.1659751037344404e-05, "loss": 0.1259, "step": 1430 }, { "epoch": 2.99, "learning_rate": 5.062240663900415e-05, "loss": 0.2338, "step": 1440 }, { "epoch": 3.01, "learning_rate": 4.9585062240663904e-05, "loss": 0.0752, "step": 1450 }, { "epoch": 3.03, "learning_rate": 4.854771784232366e-05, "loss": 0.032, "step": 1460 }, { "epoch": 3.05, "learning_rate": 4.75103734439834e-05, "loss": 0.0438, "step": 1470 }, { "epoch": 3.07, "learning_rate": 4.6473029045643156e-05, "loss": 0.02, "step": 1480 }, { "epoch": 3.09, "learning_rate": 4.543568464730291e-05, "loss": 0.0427, "step": 1490 }, { "epoch": 3.11, "learning_rate": 4.4398340248962656e-05, "loss": 0.0172, "step": 1500 }, { "epoch": 3.11, "eval_accuracy": 0.8536363636363636, "eval_loss": 0.5450488924980164, "eval_runtime": 48.9818, "eval_samples_per_second": 22.457, "eval_steps_per_second": 2.817, "step": 1500 }, { "epoch": 3.13, "learning_rate": 4.336099585062241e-05, "loss": 0.0196, "step": 1510 }, { "epoch": 3.15, "learning_rate": 4.232365145228216e-05, "loss": 0.0644, "step": 1520 }, { "epoch": 3.17, "learning_rate": 4.128630705394191e-05, "loss": 0.0208, "step": 1530 }, { "epoch": 3.2, "learning_rate": 4.024896265560166e-05, "loss": 0.0379, "step": 1540 }, { "epoch": 3.22, "learning_rate": 3.9211618257261414e-05, "loss": 0.0425, "step": 1550 }, { "epoch": 3.24, "learning_rate": 3.817427385892116e-05, "loss": 0.0271, "step": 1560 }, { "epoch": 3.26, "learning_rate": 3.7136929460580914e-05, "loss": 0.0711, "step": 1570 }, { "epoch": 3.28, "learning_rate": 3.609958506224067e-05, "loss": 0.0198, "step": 1580 }, { "epoch": 3.3, "learning_rate": 3.506224066390041e-05, "loss": 0.0543, "step": 1590 }, { "epoch": 3.32, "learning_rate": 3.4024896265560166e-05, "loss": 0.052, "step": 1600 }, { "epoch": 3.32, "eval_accuracy": 0.8463636363636363, "eval_loss": 0.560087263584137, "eval_runtime": 52.4163, "eval_samples_per_second": 20.986, "eval_steps_per_second": 2.633, "step": 1600 }, { "epoch": 3.34, "learning_rate": 3.298755186721992e-05, "loss": 0.021, "step": 1610 }, { "epoch": 3.36, "learning_rate": 3.1950207468879666e-05, "loss": 0.044, "step": 1620 }, { "epoch": 3.38, "learning_rate": 3.091286307053942e-05, "loss": 0.0595, "step": 1630 }, { "epoch": 3.4, "learning_rate": 2.9875518672199172e-05, "loss": 0.0108, "step": 1640 }, { "epoch": 3.42, "learning_rate": 2.883817427385892e-05, "loss": 0.0932, "step": 1650 }, { "epoch": 3.44, "learning_rate": 2.7800829875518675e-05, "loss": 0.0853, "step": 1660 }, { "epoch": 3.46, "learning_rate": 2.6763485477178424e-05, "loss": 0.021, "step": 1670 }, { "epoch": 3.49, "learning_rate": 2.5726141078838174e-05, "loss": 0.038, "step": 1680 }, { "epoch": 3.51, "learning_rate": 2.4688796680497927e-05, "loss": 0.039, "step": 1690 }, { "epoch": 3.53, "learning_rate": 2.3651452282157677e-05, "loss": 0.0263, "step": 1700 }, { "epoch": 3.53, "eval_accuracy": 0.8690909090909091, "eval_loss": 0.5324931740760803, "eval_runtime": 56.072, "eval_samples_per_second": 19.618, "eval_steps_per_second": 2.461, "step": 1700 }, { "epoch": 3.55, "learning_rate": 2.261410788381743e-05, "loss": 0.024, "step": 1710 }, { "epoch": 3.57, "learning_rate": 2.157676348547718e-05, "loss": 0.0129, "step": 1720 }, { "epoch": 3.59, "learning_rate": 2.053941908713693e-05, "loss": 0.0889, "step": 1730 }, { "epoch": 3.61, "learning_rate": 1.9502074688796682e-05, "loss": 0.1226, "step": 1740 }, { "epoch": 3.63, "learning_rate": 1.8464730290456432e-05, "loss": 0.0127, "step": 1750 }, { "epoch": 3.65, "learning_rate": 1.7427385892116182e-05, "loss": 0.0624, "step": 1760 }, { "epoch": 3.67, "learning_rate": 1.6390041493775935e-05, "loss": 0.0191, "step": 1770 }, { "epoch": 3.69, "learning_rate": 1.5352697095435685e-05, "loss": 0.0122, "step": 1780 }, { "epoch": 3.71, "learning_rate": 1.4315352697095436e-05, "loss": 0.0154, "step": 1790 }, { "epoch": 3.73, "learning_rate": 1.3278008298755187e-05, "loss": 0.0113, "step": 1800 }, { "epoch": 3.73, "eval_accuracy": 0.8663636363636363, "eval_loss": 0.5351160764694214, "eval_runtime": 55.1313, "eval_samples_per_second": 19.952, "eval_steps_per_second": 2.503, "step": 1800 }, { "epoch": 3.76, "learning_rate": 1.2240663900414937e-05, "loss": 0.089, "step": 1810 }, { "epoch": 3.78, "learning_rate": 1.120331950207469e-05, "loss": 0.0482, "step": 1820 }, { "epoch": 3.8, "learning_rate": 1.016597510373444e-05, "loss": 0.089, "step": 1830 }, { "epoch": 3.82, "learning_rate": 9.128630705394191e-06, "loss": 0.0537, "step": 1840 }, { "epoch": 3.84, "learning_rate": 8.091286307053943e-06, "loss": 0.023, "step": 1850 }, { "epoch": 3.86, "learning_rate": 7.053941908713693e-06, "loss": 0.0528, "step": 1860 }, { "epoch": 3.88, "learning_rate": 6.016597510373445e-06, "loss": 0.1387, "step": 1870 }, { "epoch": 3.9, "learning_rate": 4.979253112033195e-06, "loss": 0.0683, "step": 1880 }, { "epoch": 3.92, "learning_rate": 3.941908713692946e-06, "loss": 0.0383, "step": 1890 }, { "epoch": 3.94, "learning_rate": 2.9045643153526973e-06, "loss": 0.0254, "step": 1900 }, { "epoch": 3.94, "eval_accuracy": 0.8690909090909091, "eval_loss": 0.5347076654434204, "eval_runtime": 52.6092, "eval_samples_per_second": 20.909, "eval_steps_per_second": 2.623, "step": 1900 }, { "epoch": 3.96, "learning_rate": 1.8672199170124482e-06, "loss": 0.037, "step": 1910 }, { "epoch": 3.98, "learning_rate": 8.298755186721992e-07, "loss": 0.0492, "step": 1920 }, { "epoch": 4.0, "step": 1928, "total_flos": 2.386945805561856e+18, "train_loss": 0.3497509906586275, "train_runtime": 9933.302, "train_samples_per_second": 3.101, "train_steps_per_second": 0.194 } ], "logging_steps": 10, "max_steps": 1928, "num_train_epochs": 4, "save_steps": 100, "total_flos": 2.386945805561856e+18, "trial_name": null, "trial_params": null }