{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2000, "global_step": 21811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 5.9866, "step": 10 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 5.7802, "step": 20 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 5.2475, "step": 30 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 4.6122, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 4.2112, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.00012, "loss": 3.7987, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.00014, "loss": 3.5017, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.00016, "loss": 3.3313, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.00018, "loss": 3.2171, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 3.1014, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.00019999989530883824, "loss": 3.0089, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.00019999958123557212, "loss": 2.9289, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.0001999990577808593, "loss": 2.9056, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.00019999832494579575, "loss": 2.8754, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.00019999738273191592, "loss": 2.838, "step": 150 }, { "epoch": 0.01, "learning_rate": 0.0001999962311411927, "loss": 2.8294, "step": 160 }, { "epoch": 0.01, "learning_rate": 0.0001999948701760372, "loss": 2.7845, "step": 170 }, { "epoch": 0.01, "learning_rate": 0.00019999329983929912, "loss": 2.7783, "step": 180 }, { "epoch": 0.01, "learning_rate": 0.00019999152013426644, "loss": 2.7443, "step": 190 }, { "epoch": 0.01, "learning_rate": 0.00019998953106466558, "loss": 2.7478, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.00019998733263466124, "loss": 2.7408, "step": 210 }, { "epoch": 0.01, "learning_rate": 0.00019998492484885663, "loss": 2.7232, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.00019998230771229314, "loss": 2.6969, "step": 230 }, { "epoch": 0.01, "learning_rate": 0.00019997948123045065, "loss": 2.6979, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.00019997644540924728, "loss": 2.6859, "step": 250 }, { "epoch": 0.01, "learning_rate": 0.0001999732002550395, "loss": 2.6947, "step": 260 }, { "epoch": 0.01, "learning_rate": 0.00019996974577462213, "loss": 2.6549, "step": 270 }, { "epoch": 0.01, "learning_rate": 0.00019996608197522822, "loss": 2.6624, "step": 280 }, { "epoch": 0.01, "learning_rate": 0.00019996220886452911, "loss": 2.6462, "step": 290 }, { "epoch": 0.01, "learning_rate": 0.0001999581264506344, "loss": 2.6612, "step": 300 }, { "epoch": 0.01, "learning_rate": 0.000199953834742092, "loss": 2.6211, "step": 310 }, { "epoch": 0.01, "learning_rate": 0.00019994933374788788, "loss": 2.642, "step": 320 }, { "epoch": 0.02, "learning_rate": 0.00019994462347744645, "loss": 2.6373, "step": 330 }, { "epoch": 0.02, "learning_rate": 0.0001999397039406301, "loss": 2.6165, "step": 340 }, { "epoch": 0.02, "learning_rate": 0.0001999345751477395, "loss": 2.6179, "step": 350 }, { "epoch": 0.02, "learning_rate": 0.00019992923710951342, "loss": 2.6159, "step": 360 }, { "epoch": 0.02, "learning_rate": 0.0001999236898371288, "loss": 2.606, "step": 370 }, { "epoch": 0.02, "learning_rate": 0.00019991793334220065, "loss": 2.604, "step": 380 }, { "epoch": 0.02, "learning_rate": 0.000199911967636782, "loss": 2.6028, "step": 390 }, { "epoch": 0.02, "learning_rate": 0.00019990579273336398, "loss": 2.6024, "step": 400 }, { "epoch": 0.02, "learning_rate": 0.00019989940864487585, "loss": 2.5923, "step": 410 }, { "epoch": 0.02, "learning_rate": 0.00019989281538468465, "loss": 2.6114, "step": 420 }, { "epoch": 0.02, "learning_rate": 0.00019988601296659555, "loss": 2.5789, "step": 430 }, { "epoch": 0.02, "learning_rate": 0.00019987900140485163, "loss": 2.6063, "step": 440 }, { "epoch": 0.02, "learning_rate": 0.0001998717807141338, "loss": 2.5787, "step": 450 }, { "epoch": 0.02, "learning_rate": 0.00019986435090956097, "loss": 2.555, "step": 460 }, { "epoch": 0.02, "learning_rate": 0.00019985671200668982, "loss": 2.5754, "step": 470 }, { "epoch": 0.02, "learning_rate": 0.00019984886402151482, "loss": 2.5737, "step": 480 }, { "epoch": 0.02, "learning_rate": 0.00019984080697046835, "loss": 2.5746, "step": 490 }, { "epoch": 0.02, "learning_rate": 0.00019983254087042036, "loss": 2.5604, "step": 500 }, { "epoch": 0.02, "learning_rate": 0.00019982406573867864, "loss": 2.564, "step": 510 }, { "epoch": 0.02, "learning_rate": 0.00019981538159298868, "loss": 2.5971, "step": 520 }, { "epoch": 0.02, "learning_rate": 0.00019980648845153342, "loss": 2.5353, "step": 530 }, { "epoch": 0.02, "learning_rate": 0.00019979738633293362, "loss": 2.5716, "step": 540 }, { "epoch": 0.03, "learning_rate": 0.0001997880752562475, "loss": 2.5572, "step": 550 }, { "epoch": 0.03, "learning_rate": 0.00019977855524097075, "loss": 2.5381, "step": 560 }, { "epoch": 0.03, "learning_rate": 0.00019976882630703667, "loss": 2.5477, "step": 570 }, { "epoch": 0.03, "learning_rate": 0.00019975888847481588, "loss": 2.5408, "step": 580 }, { "epoch": 0.03, "learning_rate": 0.00019974874176511648, "loss": 2.5406, "step": 590 }, { "epoch": 0.03, "learning_rate": 0.00019973838619918383, "loss": 2.569, "step": 600 }, { "epoch": 0.03, "learning_rate": 0.0001997278217987007, "loss": 2.5659, "step": 610 }, { "epoch": 0.03, "learning_rate": 0.0001997170485857871, "loss": 2.5437, "step": 620 }, { "epoch": 0.03, "learning_rate": 0.00019970606658300017, "loss": 2.549, "step": 630 }, { "epoch": 0.03, "learning_rate": 0.00019969487581333436, "loss": 2.5364, "step": 640 }, { "epoch": 0.03, "learning_rate": 0.00019968347630022106, "loss": 2.5291, "step": 650 }, { "epoch": 0.03, "learning_rate": 0.00019967186806752892, "loss": 2.5358, "step": 660 }, { "epoch": 0.03, "learning_rate": 0.00019966005113956353, "loss": 2.5367, "step": 670 }, { "epoch": 0.03, "learning_rate": 0.00019964802554106736, "loss": 2.5288, "step": 680 }, { "epoch": 0.03, "learning_rate": 0.00019963579129721998, "loss": 2.5281, "step": 690 }, { "epoch": 0.03, "learning_rate": 0.0001996233484336377, "loss": 2.5068, "step": 700 }, { "epoch": 0.03, "learning_rate": 0.00019961069697637366, "loss": 2.5263, "step": 710 }, { "epoch": 0.03, "learning_rate": 0.00019959783695191783, "loss": 2.5378, "step": 720 }, { "epoch": 0.03, "learning_rate": 0.00019958476838719674, "loss": 2.5527, "step": 730 }, { "epoch": 0.03, "learning_rate": 0.00019957149130957378, "loss": 2.5342, "step": 740 }, { "epoch": 0.03, "learning_rate": 0.00019955800574684868, "loss": 2.5549, "step": 750 }, { "epoch": 0.03, "learning_rate": 0.00019954431172725787, "loss": 2.4973, "step": 760 }, { "epoch": 0.04, "learning_rate": 0.00019953040927947423, "loss": 2.5182, "step": 770 }, { "epoch": 0.04, "learning_rate": 0.00019951629843260697, "loss": 2.53, "step": 780 }, { "epoch": 0.04, "learning_rate": 0.0001995019792162018, "loss": 2.5162, "step": 790 }, { "epoch": 0.04, "learning_rate": 0.00019948745166024055, "loss": 2.4986, "step": 800 }, { "epoch": 0.04, "learning_rate": 0.00019947271579514138, "loss": 2.5029, "step": 810 }, { "epoch": 0.04, "learning_rate": 0.00019945777165175858, "loss": 2.5156, "step": 820 }, { "epoch": 0.04, "learning_rate": 0.00019944261926138258, "loss": 2.5139, "step": 830 }, { "epoch": 0.04, "learning_rate": 0.00019942725865573974, "loss": 2.5217, "step": 840 }, { "epoch": 0.04, "learning_rate": 0.0001994116898669925, "loss": 2.5289, "step": 850 }, { "epoch": 0.04, "learning_rate": 0.00019939591292773913, "loss": 2.4894, "step": 860 }, { "epoch": 0.04, "learning_rate": 0.00019937992787101382, "loss": 2.5175, "step": 870 }, { "epoch": 0.04, "learning_rate": 0.00019936373473028636, "loss": 2.4996, "step": 880 }, { "epoch": 0.04, "learning_rate": 0.00019934733353946235, "loss": 2.5087, "step": 890 }, { "epoch": 0.04, "learning_rate": 0.000199330724332883, "loss": 2.4815, "step": 900 }, { "epoch": 0.04, "learning_rate": 0.00019931390714532507, "loss": 2.4905, "step": 910 }, { "epoch": 0.04, "learning_rate": 0.00019929688201200075, "loss": 2.511, "step": 920 }, { "epoch": 0.04, "learning_rate": 0.00019927964896855767, "loss": 2.5164, "step": 930 }, { "epoch": 0.04, "learning_rate": 0.00019926220805107877, "loss": 2.4972, "step": 940 }, { "epoch": 0.04, "learning_rate": 0.00019924455929608224, "loss": 2.4919, "step": 950 }, { "epoch": 0.04, "learning_rate": 0.00019922670274052147, "loss": 2.5115, "step": 960 }, { "epoch": 0.04, "learning_rate": 0.00019920863842178493, "loss": 2.5169, "step": 970 }, { "epoch": 0.04, "learning_rate": 0.00019919036637769613, "loss": 2.4865, "step": 980 }, { "epoch": 0.05, "learning_rate": 0.00019917188664651345, "loss": 2.5039, "step": 990 }, { "epoch": 0.05, "learning_rate": 0.00019915319926693022, "loss": 2.518, "step": 1000 }, { "epoch": 0.05, "learning_rate": 0.00019913430427807448, "loss": 2.4838, "step": 1010 }, { "epoch": 0.05, "learning_rate": 0.00019911520171950902, "loss": 2.5011, "step": 1020 }, { "epoch": 0.05, "learning_rate": 0.00019909589163123124, "loss": 2.4865, "step": 1030 }, { "epoch": 0.05, "learning_rate": 0.00019907637405367298, "loss": 2.4865, "step": 1040 }, { "epoch": 0.05, "learning_rate": 0.0001990566490277007, "loss": 2.4835, "step": 1050 }, { "epoch": 0.05, "learning_rate": 0.00019903671659461504, "loss": 2.4845, "step": 1060 }, { "epoch": 0.05, "learning_rate": 0.00019901657679615098, "loss": 2.4848, "step": 1070 }, { "epoch": 0.05, "learning_rate": 0.00019899622967447776, "loss": 2.4871, "step": 1080 }, { "epoch": 0.05, "learning_rate": 0.00019897567527219862, "loss": 2.4848, "step": 1090 }, { "epoch": 0.05, "learning_rate": 0.00019895491363235084, "loss": 2.4875, "step": 1100 }, { "epoch": 0.05, "learning_rate": 0.00019893394479840565, "loss": 2.503, "step": 1110 }, { "epoch": 0.05, "learning_rate": 0.0001989127688142681, "loss": 2.4778, "step": 1120 }, { "epoch": 0.05, "learning_rate": 0.00019889138572427685, "loss": 2.4976, "step": 1130 }, { "epoch": 0.05, "learning_rate": 0.00019886979557320444, "loss": 2.4647, "step": 1140 }, { "epoch": 0.05, "learning_rate": 0.00019884799840625677, "loss": 2.4749, "step": 1150 }, { "epoch": 0.05, "learning_rate": 0.00019882599426907326, "loss": 2.4967, "step": 1160 }, { "epoch": 0.05, "learning_rate": 0.0001988037832077267, "loss": 2.4628, "step": 1170 }, { "epoch": 0.05, "learning_rate": 0.0001987813652687231, "loss": 2.4912, "step": 1180 }, { "epoch": 0.05, "learning_rate": 0.00019875874049900166, "loss": 2.4981, "step": 1190 }, { "epoch": 0.06, "learning_rate": 0.00019873590894593465, "loss": 2.4628, "step": 1200 }, { "epoch": 0.06, "learning_rate": 0.00019871287065732735, "loss": 2.493, "step": 1210 }, { "epoch": 0.06, "learning_rate": 0.00019868962568141783, "loss": 2.4585, "step": 1220 }, { "epoch": 0.06, "learning_rate": 0.00019866617406687692, "loss": 2.4467, "step": 1230 }, { "epoch": 0.06, "learning_rate": 0.00019864251586280824, "loss": 2.4778, "step": 1240 }, { "epoch": 0.06, "learning_rate": 0.00019861865111874783, "loss": 2.485, "step": 1250 }, { "epoch": 0.06, "learning_rate": 0.00019859457988466426, "loss": 2.4815, "step": 1260 }, { "epoch": 0.06, "learning_rate": 0.00019857030221095845, "loss": 2.4662, "step": 1270 }, { "epoch": 0.06, "learning_rate": 0.0001985458181484635, "loss": 2.4743, "step": 1280 }, { "epoch": 0.06, "learning_rate": 0.0001985211277484448, "loss": 2.471, "step": 1290 }, { "epoch": 0.06, "learning_rate": 0.00019849623106259966, "loss": 2.4714, "step": 1300 }, { "epoch": 0.06, "learning_rate": 0.00019847112814305726, "loss": 2.471, "step": 1310 }, { "epoch": 0.06, "learning_rate": 0.0001984458190423788, "loss": 2.4944, "step": 1320 }, { "epoch": 0.06, "learning_rate": 0.00019842030381355693, "loss": 2.4824, "step": 1330 }, { "epoch": 0.06, "learning_rate": 0.00019839458251001615, "loss": 2.4546, "step": 1340 }, { "epoch": 0.06, "learning_rate": 0.00019836865518561223, "loss": 2.4587, "step": 1350 }, { "epoch": 0.06, "learning_rate": 0.00019834252189463247, "loss": 2.4773, "step": 1360 }, { "epoch": 0.06, "learning_rate": 0.00019831618269179532, "loss": 2.4772, "step": 1370 }, { "epoch": 0.06, "learning_rate": 0.00019828963763225045, "loss": 2.4833, "step": 1380 }, { "epoch": 0.06, "learning_rate": 0.00019826288677157847, "loss": 2.4767, "step": 1390 }, { "epoch": 0.06, "learning_rate": 0.00019823593016579102, "loss": 2.4637, "step": 1400 }, { "epoch": 0.06, "learning_rate": 0.00019820876787133041, "loss": 2.4569, "step": 1410 }, { "epoch": 0.07, "learning_rate": 0.00019818139994506976, "loss": 2.467, "step": 1420 }, { "epoch": 0.07, "learning_rate": 0.00019815382644431257, "loss": 2.4592, "step": 1430 }, { "epoch": 0.07, "learning_rate": 0.00019812604742679293, "loss": 2.4793, "step": 1440 }, { "epoch": 0.07, "learning_rate": 0.00019809806295067518, "loss": 2.4497, "step": 1450 }, { "epoch": 0.07, "learning_rate": 0.00019806987307455386, "loss": 2.4673, "step": 1460 }, { "epoch": 0.07, "learning_rate": 0.00019804147785745365, "loss": 2.4531, "step": 1470 }, { "epoch": 0.07, "learning_rate": 0.00019801287735882902, "loss": 2.4702, "step": 1480 }, { "epoch": 0.07, "learning_rate": 0.00019798407163856445, "loss": 2.4723, "step": 1490 }, { "epoch": 0.07, "learning_rate": 0.00019795506075697395, "loss": 2.4641, "step": 1500 }, { "epoch": 0.07, "learning_rate": 0.00019792584477480122, "loss": 2.4383, "step": 1510 }, { "epoch": 0.07, "learning_rate": 0.00019789642375321934, "loss": 2.4435, "step": 1520 }, { "epoch": 0.07, "learning_rate": 0.00019786679775383073, "loss": 2.4815, "step": 1530 }, { "epoch": 0.07, "learning_rate": 0.00019783696683866702, "loss": 2.459, "step": 1540 }, { "epoch": 0.07, "learning_rate": 0.00019780693107018883, "loss": 2.4581, "step": 1550 }, { "epoch": 0.07, "learning_rate": 0.00019777669051128583, "loss": 2.476, "step": 1560 }, { "epoch": 0.07, "learning_rate": 0.00019774624522527634, "loss": 2.465, "step": 1570 }, { "epoch": 0.07, "learning_rate": 0.0001977155952759074, "loss": 2.4681, "step": 1580 }, { "epoch": 0.07, "learning_rate": 0.00019768474072735462, "loss": 2.4557, "step": 1590 }, { "epoch": 0.07, "learning_rate": 0.00019765368164422194, "loss": 2.4589, "step": 1600 }, { "epoch": 0.07, "learning_rate": 0.00019762241809154164, "loss": 2.4721, "step": 1610 }, { "epoch": 0.07, "learning_rate": 0.000197590950134774, "loss": 2.4358, "step": 1620 }, { "epoch": 0.07, "learning_rate": 0.00019755927783980743, "loss": 2.4473, "step": 1630 }, { "epoch": 0.08, "learning_rate": 0.00019752740127295808, "loss": 2.4597, "step": 1640 }, { "epoch": 0.08, "learning_rate": 0.00019749532050096987, "loss": 2.4635, "step": 1650 }, { "epoch": 0.08, "learning_rate": 0.00019746303559101423, "loss": 2.4613, "step": 1660 }, { "epoch": 0.08, "learning_rate": 0.00019743054661069005, "loss": 2.434, "step": 1670 }, { "epoch": 0.08, "learning_rate": 0.00019739785362802355, "loss": 2.4583, "step": 1680 }, { "epoch": 0.08, "learning_rate": 0.00019736495671146805, "loss": 2.4445, "step": 1690 }, { "epoch": 0.08, "learning_rate": 0.0001973318559299039, "loss": 2.4526, "step": 1700 }, { "epoch": 0.08, "learning_rate": 0.00019729855135263823, "loss": 2.4444, "step": 1710 }, { "epoch": 0.08, "learning_rate": 0.00019726504304940496, "loss": 2.4348, "step": 1720 }, { "epoch": 0.08, "learning_rate": 0.00019723133109036457, "loss": 2.4788, "step": 1730 }, { "epoch": 0.08, "learning_rate": 0.0001971974155461039, "loss": 2.4486, "step": 1740 }, { "epoch": 0.08, "learning_rate": 0.00019716329648763616, "loss": 2.4431, "step": 1750 }, { "epoch": 0.08, "learning_rate": 0.00019712897398640062, "loss": 2.4576, "step": 1760 }, { "epoch": 0.08, "learning_rate": 0.00019709444811426247, "loss": 2.4411, "step": 1770 }, { "epoch": 0.08, "learning_rate": 0.00019705971894351288, "loss": 2.4575, "step": 1780 }, { "epoch": 0.08, "learning_rate": 0.00019702478654686848, "loss": 2.438, "step": 1790 }, { "epoch": 0.08, "learning_rate": 0.00019698965099747165, "loss": 2.4322, "step": 1800 }, { "epoch": 0.08, "learning_rate": 0.00019695431236888992, "loss": 2.4797, "step": 1810 }, { "epoch": 0.08, "learning_rate": 0.0001969187707351162, "loss": 2.4608, "step": 1820 }, { "epoch": 0.08, "learning_rate": 0.00019688302617056836, "loss": 2.4586, "step": 1830 }, { "epoch": 0.08, "learning_rate": 0.0001968470787500892, "loss": 2.4422, "step": 1840 }, { "epoch": 0.08, "learning_rate": 0.0001968109285489463, "loss": 2.4493, "step": 1850 }, { "epoch": 0.09, "learning_rate": 0.00019677457564283172, "loss": 2.4497, "step": 1860 }, { "epoch": 0.09, "learning_rate": 0.00019673802010786208, "loss": 2.4427, "step": 1870 }, { "epoch": 0.09, "learning_rate": 0.0001967012620205782, "loss": 2.4482, "step": 1880 }, { "epoch": 0.09, "learning_rate": 0.00019666430145794496, "loss": 2.4277, "step": 1890 }, { "epoch": 0.09, "learning_rate": 0.00019662713849735134, "loss": 2.4619, "step": 1900 }, { "epoch": 0.09, "learning_rate": 0.00019658977321660993, "loss": 2.4354, "step": 1910 }, { "epoch": 0.09, "learning_rate": 0.0001965522056939571, "loss": 2.458, "step": 1920 }, { "epoch": 0.09, "learning_rate": 0.0001965144360080525, "loss": 2.458, "step": 1930 }, { "epoch": 0.09, "learning_rate": 0.00019647646423797926, "loss": 2.4167, "step": 1940 }, { "epoch": 0.09, "learning_rate": 0.00019643829046324352, "loss": 2.4658, "step": 1950 }, { "epoch": 0.09, "learning_rate": 0.00019639991476377443, "loss": 2.4474, "step": 1960 }, { "epoch": 0.09, "learning_rate": 0.00019636133721992387, "loss": 2.4429, "step": 1970 }, { "epoch": 0.09, "learning_rate": 0.00019632255791246646, "loss": 2.4336, "step": 1980 }, { "epoch": 0.09, "learning_rate": 0.00019628357692259924, "loss": 2.4487, "step": 1990 }, { "epoch": 0.09, "learning_rate": 0.00019624439433194144, "loss": 2.4497, "step": 2000 }, { "epoch": 0.09, "eval_accuracy": 0.5302039425781068, "eval_loss": 2.2889890670776367, "eval_runtime": 10.496, "eval_samples_per_second": 125.477, "eval_steps_per_second": 1.048, "step": 2000 }, { "epoch": 0.09, "learning_rate": 0.0001962050102225345, "loss": 2.4419, "step": 2010 }, { "epoch": 0.09, "learning_rate": 0.0001961654246768418, "loss": 2.4419, "step": 2020 }, { "epoch": 0.09, "learning_rate": 0.00019612563777774846, "loss": 2.4574, "step": 2030 }, { "epoch": 0.09, "learning_rate": 0.00019608564960856123, "loss": 2.4221, "step": 2040 }, { "epoch": 0.09, "learning_rate": 0.00019604546025300828, "loss": 2.4448, "step": 2050 }, { "epoch": 0.09, "learning_rate": 0.00019600506979523897, "loss": 2.425, "step": 2060 }, { "epoch": 0.09, "learning_rate": 0.0001959644783198238, "loss": 2.459, "step": 2070 }, { "epoch": 0.1, "learning_rate": 0.00019592368591175415, "loss": 2.4464, "step": 2080 }, { "epoch": 0.1, "learning_rate": 0.0001958826926564421, "loss": 2.4366, "step": 2090 }, { "epoch": 0.1, "learning_rate": 0.0001958414986397203, "loss": 2.4411, "step": 2100 }, { "epoch": 0.1, "learning_rate": 0.00019580010394784172, "loss": 2.4026, "step": 2110 }, { "epoch": 0.1, "learning_rate": 0.00019575850866747953, "loss": 2.4511, "step": 2120 }, { "epoch": 0.1, "learning_rate": 0.00019571671288572693, "loss": 2.4216, "step": 2130 }, { "epoch": 0.1, "learning_rate": 0.00019567471669009685, "loss": 2.4393, "step": 2140 }, { "epoch": 0.1, "learning_rate": 0.00019563252016852194, "loss": 2.4549, "step": 2150 }, { "epoch": 0.1, "learning_rate": 0.0001955901234093542, "loss": 2.4517, "step": 2160 }, { "epoch": 0.1, "learning_rate": 0.000195547526501365, "loss": 2.4431, "step": 2170 }, { "epoch": 0.1, "learning_rate": 0.00019550472953374474, "loss": 2.4349, "step": 2180 }, { "epoch": 0.1, "learning_rate": 0.0001954617325961027, "loss": 2.4337, "step": 2190 }, { "epoch": 0.1, "learning_rate": 0.00019541853577846686, "loss": 2.4197, "step": 2200 }, { "epoch": 0.1, "learning_rate": 0.00019537513917128367, "loss": 2.4508, "step": 2210 }, { "epoch": 0.1, "learning_rate": 0.00019533154286541801, "loss": 2.4115, "step": 2220 }, { "epoch": 0.1, "learning_rate": 0.00019528774695215287, "loss": 2.4269, "step": 2230 }, { "epoch": 0.1, "learning_rate": 0.00019524375152318909, "loss": 2.4323, "step": 2240 }, { "epoch": 0.1, "learning_rate": 0.0001951995566706453, "loss": 2.4323, "step": 2250 }, { "epoch": 0.1, "learning_rate": 0.0001951551624870578, "loss": 2.4426, "step": 2260 }, { "epoch": 0.1, "learning_rate": 0.0001951105690653801, "loss": 2.4389, "step": 2270 }, { "epoch": 0.1, "learning_rate": 0.00019506577649898294, "loss": 2.448, "step": 2280 }, { "epoch": 0.1, "learning_rate": 0.0001950207848816541, "loss": 2.4289, "step": 2290 }, { "epoch": 0.11, "learning_rate": 0.00019497559430759796, "loss": 2.4392, "step": 2300 }, { "epoch": 0.11, "learning_rate": 0.00019493020487143569, "loss": 2.437, "step": 2310 }, { "epoch": 0.11, "learning_rate": 0.0001948846166682047, "loss": 2.4371, "step": 2320 }, { "epoch": 0.11, "learning_rate": 0.00019483882979335867, "loss": 2.4171, "step": 2330 }, { "epoch": 0.11, "learning_rate": 0.0001947928443427672, "loss": 2.4147, "step": 2340 }, { "epoch": 0.11, "learning_rate": 0.00019474666041271568, "loss": 2.4345, "step": 2350 }, { "epoch": 0.11, "learning_rate": 0.00019470027809990513, "loss": 2.4404, "step": 2360 }, { "epoch": 0.11, "learning_rate": 0.00019465369750145187, "loss": 2.454, "step": 2370 }, { "epoch": 0.11, "learning_rate": 0.00019460691871488742, "loss": 2.4344, "step": 2380 }, { "epoch": 0.11, "learning_rate": 0.0001945599418381584, "loss": 2.4523, "step": 2390 }, { "epoch": 0.11, "learning_rate": 0.000194512766969626, "loss": 2.431, "step": 2400 }, { "epoch": 0.11, "learning_rate": 0.00019446539420806604, "loss": 2.4223, "step": 2410 }, { "epoch": 0.11, "learning_rate": 0.00019441782365266878, "loss": 2.4584, "step": 2420 }, { "epoch": 0.11, "learning_rate": 0.0001943700554030385, "loss": 2.4183, "step": 2430 }, { "epoch": 0.11, "learning_rate": 0.0001943220895591935, "loss": 2.4164, "step": 2440 }, { "epoch": 0.11, "learning_rate": 0.00019427392622156577, "loss": 2.4048, "step": 2450 }, { "epoch": 0.11, "learning_rate": 0.0001942255654910008, "loss": 2.4366, "step": 2460 }, { "epoch": 0.11, "learning_rate": 0.00019417700746875743, "loss": 2.4395, "step": 2470 }, { "epoch": 0.11, "learning_rate": 0.0001941282522565076, "loss": 2.4041, "step": 2480 }, { "epoch": 0.11, "learning_rate": 0.00019407929995633608, "loss": 2.4511, "step": 2490 }, { "epoch": 0.11, "learning_rate": 0.00019403015067074035, "loss": 2.3915, "step": 2500 }, { "epoch": 0.12, "learning_rate": 0.0001939808045026303, "loss": 2.4129, "step": 2510 }, { "epoch": 0.12, "learning_rate": 0.0001939312615553281, "loss": 2.4414, "step": 2520 }, { "epoch": 0.12, "learning_rate": 0.0001938815219325679, "loss": 2.4143, "step": 2530 }, { "epoch": 0.12, "learning_rate": 0.0001938315857384957, "loss": 2.4359, "step": 2540 }, { "epoch": 0.12, "learning_rate": 0.0001937814530776691, "loss": 2.4252, "step": 2550 }, { "epoch": 0.12, "learning_rate": 0.000193731124055057, "loss": 2.4249, "step": 2560 }, { "epoch": 0.12, "learning_rate": 0.00019368059877603941, "loss": 2.4223, "step": 2570 }, { "epoch": 0.12, "learning_rate": 0.00019362987734640745, "loss": 2.4116, "step": 2580 }, { "epoch": 0.12, "learning_rate": 0.00019357895987236274, "loss": 2.4108, "step": 2590 }, { "epoch": 0.12, "learning_rate": 0.00019352784646051752, "loss": 2.4274, "step": 2600 }, { "epoch": 0.12, "learning_rate": 0.0001934765372178942, "loss": 2.4025, "step": 2610 }, { "epoch": 0.12, "learning_rate": 0.0001934250322519253, "loss": 2.4278, "step": 2620 }, { "epoch": 0.12, "learning_rate": 0.00019337333167045309, "loss": 2.4119, "step": 2630 }, { "epoch": 0.12, "learning_rate": 0.00019332143558172947, "loss": 2.4189, "step": 2640 }, { "epoch": 0.12, "learning_rate": 0.00019326934409441565, "loss": 2.4447, "step": 2650 }, { "epoch": 0.12, "learning_rate": 0.00019321705731758203, "loss": 2.4089, "step": 2660 }, { "epoch": 0.12, "learning_rate": 0.00019316457536070787, "loss": 2.4294, "step": 2670 }, { "epoch": 0.12, "learning_rate": 0.00019311189833368106, "loss": 2.4435, "step": 2680 }, { "epoch": 0.12, "learning_rate": 0.00019305902634679804, "loss": 2.4375, "step": 2690 }, { "epoch": 0.12, "learning_rate": 0.00019300595951076338, "loss": 2.4381, "step": 2700 }, { "epoch": 0.12, "learning_rate": 0.0001929526979366897, "loss": 2.4145, "step": 2710 }, { "epoch": 0.12, "learning_rate": 0.00019289924173609727, "loss": 2.4238, "step": 2720 }, { "epoch": 0.13, "learning_rate": 0.0001928455910209139, "loss": 2.4317, "step": 2730 }, { "epoch": 0.13, "learning_rate": 0.0001927917459034748, "loss": 2.4323, "step": 2740 }, { "epoch": 0.13, "learning_rate": 0.00019273770649652202, "loss": 2.4067, "step": 2750 }, { "epoch": 0.13, "learning_rate": 0.00019268347291320458, "loss": 2.4032, "step": 2760 }, { "epoch": 0.13, "learning_rate": 0.00019262904526707805, "loss": 2.4418, "step": 2770 }, { "epoch": 0.13, "learning_rate": 0.00019257442367210425, "loss": 2.4239, "step": 2780 }, { "epoch": 0.13, "learning_rate": 0.00019251960824265117, "loss": 2.4211, "step": 2790 }, { "epoch": 0.13, "learning_rate": 0.00019246459909349258, "loss": 2.4137, "step": 2800 }, { "epoch": 0.13, "learning_rate": 0.00019240939633980797, "loss": 2.4102, "step": 2810 }, { "epoch": 0.13, "learning_rate": 0.00019235400009718213, "loss": 2.4359, "step": 2820 }, { "epoch": 0.13, "learning_rate": 0.000192298410481605, "loss": 2.4166, "step": 2830 }, { "epoch": 0.13, "learning_rate": 0.00019224262760947144, "loss": 2.4513, "step": 2840 }, { "epoch": 0.13, "learning_rate": 0.00019218665159758086, "loss": 2.4078, "step": 2850 }, { "epoch": 0.13, "learning_rate": 0.0001921304825631372, "loss": 2.4249, "step": 2860 }, { "epoch": 0.13, "learning_rate": 0.00019207412062374843, "loss": 2.4068, "step": 2870 }, { "epoch": 0.13, "learning_rate": 0.00019201756589742652, "loss": 2.4054, "step": 2880 }, { "epoch": 0.13, "learning_rate": 0.00019196081850258708, "loss": 2.4044, "step": 2890 }, { "epoch": 0.13, "learning_rate": 0.0001919038785580491, "loss": 2.4108, "step": 2900 }, { "epoch": 0.13, "learning_rate": 0.00019184674618303476, "loss": 2.4189, "step": 2910 }, { "epoch": 0.13, "learning_rate": 0.00019178942149716916, "loss": 2.4104, "step": 2920 }, { "epoch": 0.13, "learning_rate": 0.00019173190462048008, "loss": 2.418, "step": 2930 }, { "epoch": 0.13, "learning_rate": 0.00019167419567339767, "loss": 2.4112, "step": 2940 }, { "epoch": 0.14, "learning_rate": 0.00019161629477675428, "loss": 2.4178, "step": 2950 }, { "epoch": 0.14, "learning_rate": 0.0001915582020517841, "loss": 2.4017, "step": 2960 }, { "epoch": 0.14, "learning_rate": 0.00019149991762012312, "loss": 2.4271, "step": 2970 }, { "epoch": 0.14, "learning_rate": 0.00019144144160380857, "loss": 2.3857, "step": 2980 }, { "epoch": 0.14, "learning_rate": 0.00019138277412527889, "loss": 2.4088, "step": 2990 }, { "epoch": 0.14, "learning_rate": 0.00019132391530737343, "loss": 2.386, "step": 3000 }, { "epoch": 0.14, "learning_rate": 0.00019126486527333217, "loss": 2.4218, "step": 3010 }, { "epoch": 0.14, "learning_rate": 0.0001912056241467954, "loss": 2.4485, "step": 3020 }, { "epoch": 0.14, "learning_rate": 0.0001911461920518036, "loss": 2.4105, "step": 3030 }, { "epoch": 0.14, "learning_rate": 0.00019108656911279707, "loss": 2.4086, "step": 3040 }, { "epoch": 0.14, "learning_rate": 0.00019102675545461568, "loss": 2.411, "step": 3050 }, { "epoch": 0.14, "learning_rate": 0.00019096675120249868, "loss": 2.4159, "step": 3060 }, { "epoch": 0.14, "learning_rate": 0.00019090655648208436, "loss": 2.4106, "step": 3070 }, { "epoch": 0.14, "learning_rate": 0.0001908461714194098, "loss": 2.4177, "step": 3080 }, { "epoch": 0.14, "learning_rate": 0.0001907855961409107, "loss": 2.4199, "step": 3090 }, { "epoch": 0.14, "learning_rate": 0.00019072483077342098, "loss": 2.4028, "step": 3100 }, { "epoch": 0.14, "learning_rate": 0.00019066387544417252, "loss": 2.4306, "step": 3110 }, { "epoch": 0.14, "learning_rate": 0.00019060273028079507, "loss": 2.3988, "step": 3120 }, { "epoch": 0.14, "learning_rate": 0.00019054139541131571, "loss": 2.4424, "step": 3130 }, { "epoch": 0.14, "learning_rate": 0.0001904798709641589, "loss": 2.4114, "step": 3140 }, { "epoch": 0.14, "learning_rate": 0.00019041815706814593, "loss": 2.3974, "step": 3150 }, { "epoch": 0.14, "learning_rate": 0.0001903562538524948, "loss": 2.3989, "step": 3160 }, { "epoch": 0.15, "learning_rate": 0.0001902941614468199, "loss": 2.4268, "step": 3170 }, { "epoch": 0.15, "learning_rate": 0.0001902318799811317, "loss": 2.4028, "step": 3180 }, { "epoch": 0.15, "learning_rate": 0.00019016940958583662, "loss": 2.4208, "step": 3190 }, { "epoch": 0.15, "learning_rate": 0.00019010675039173665, "loss": 2.3983, "step": 3200 }, { "epoch": 0.15, "learning_rate": 0.000190043902530029, "loss": 2.4205, "step": 3210 }, { "epoch": 0.15, "learning_rate": 0.00018998086613230606, "loss": 2.4077, "step": 3220 }, { "epoch": 0.15, "learning_rate": 0.00018991764133055486, "loss": 2.4044, "step": 3230 }, { "epoch": 0.15, "learning_rate": 0.00018985422825715692, "loss": 2.3765, "step": 3240 }, { "epoch": 0.15, "learning_rate": 0.00018979062704488814, "loss": 2.4315, "step": 3250 }, { "epoch": 0.15, "learning_rate": 0.00018972683782691804, "loss": 2.4289, "step": 3260 }, { "epoch": 0.15, "learning_rate": 0.00018966286073681012, "loss": 2.4033, "step": 3270 }, { "epoch": 0.15, "learning_rate": 0.00018959869590852102, "loss": 2.3982, "step": 3280 }, { "epoch": 0.15, "learning_rate": 0.00018953434347640055, "loss": 2.4293, "step": 3290 }, { "epoch": 0.15, "learning_rate": 0.00018946980357519136, "loss": 2.4087, "step": 3300 }, { "epoch": 0.15, "learning_rate": 0.00018940507634002853, "loss": 2.412, "step": 3310 }, { "epoch": 0.15, "learning_rate": 0.00018934016190643954, "loss": 2.414, "step": 3320 }, { "epoch": 0.15, "learning_rate": 0.00018927506041034367, "loss": 2.4216, "step": 3330 }, { "epoch": 0.15, "learning_rate": 0.00018920977198805199, "loss": 2.4053, "step": 3340 }, { "epoch": 0.15, "learning_rate": 0.00018914429677626687, "loss": 2.4182, "step": 3350 }, { "epoch": 0.15, "learning_rate": 0.00018907863491208183, "loss": 2.4077, "step": 3360 }, { "epoch": 0.15, "learning_rate": 0.00018901278653298128, "loss": 2.4089, "step": 3370 }, { "epoch": 0.15, "learning_rate": 0.00018894675177684003, "loss": 2.4061, "step": 3380 }, { "epoch": 0.16, "learning_rate": 0.00018888053078192315, "loss": 2.4029, "step": 3390 }, { "epoch": 0.16, "learning_rate": 0.0001888141236868858, "loss": 2.4107, "step": 3400 }, { "epoch": 0.16, "learning_rate": 0.00018874753063077263, "loss": 2.4016, "step": 3410 }, { "epoch": 0.16, "learning_rate": 0.00018868075175301768, "loss": 2.4213, "step": 3420 }, { "epoch": 0.16, "learning_rate": 0.00018861378719344424, "loss": 2.401, "step": 3430 }, { "epoch": 0.16, "learning_rate": 0.00018854663709226416, "loss": 2.4078, "step": 3440 }, { "epoch": 0.16, "learning_rate": 0.0001884793015900779, "loss": 2.4128, "step": 3450 }, { "epoch": 0.16, "learning_rate": 0.00018841178082787416, "loss": 2.3948, "step": 3460 }, { "epoch": 0.16, "learning_rate": 0.00018834407494702942, "loss": 2.4056, "step": 3470 }, { "epoch": 0.16, "learning_rate": 0.00018827618408930781, "loss": 2.3651, "step": 3480 }, { "epoch": 0.16, "learning_rate": 0.00018820810839686086, "loss": 2.3947, "step": 3490 }, { "epoch": 0.16, "learning_rate": 0.000188139848012227, "loss": 2.3893, "step": 3500 }, { "epoch": 0.16, "learning_rate": 0.0001880714030783314, "loss": 2.4021, "step": 3510 }, { "epoch": 0.16, "learning_rate": 0.00018800277373848563, "loss": 2.4101, "step": 3520 }, { "epoch": 0.16, "learning_rate": 0.00018793396013638743, "loss": 2.4058, "step": 3530 }, { "epoch": 0.16, "learning_rate": 0.00018786496241612034, "loss": 2.3911, "step": 3540 }, { "epoch": 0.16, "learning_rate": 0.0001877957807221533, "loss": 2.4102, "step": 3550 }, { "epoch": 0.16, "learning_rate": 0.00018772641519934066, "loss": 2.4375, "step": 3560 }, { "epoch": 0.16, "learning_rate": 0.00018765686599292152, "loss": 2.4198, "step": 3570 }, { "epoch": 0.16, "learning_rate": 0.00018758713324851963, "loss": 2.4036, "step": 3580 }, { "epoch": 0.16, "learning_rate": 0.000187517217112143, "loss": 2.3893, "step": 3590 }, { "epoch": 0.17, "learning_rate": 0.00018744711773018366, "loss": 2.3959, "step": 3600 }, { "epoch": 0.17, "learning_rate": 0.00018737683524941735, "loss": 2.393, "step": 3610 }, { "epoch": 0.17, "learning_rate": 0.00018730636981700318, "loss": 2.3963, "step": 3620 }, { "epoch": 0.17, "learning_rate": 0.00018723572158048328, "loss": 2.4216, "step": 3630 }, { "epoch": 0.17, "learning_rate": 0.00018716489068778256, "loss": 2.4168, "step": 3640 }, { "epoch": 0.17, "learning_rate": 0.0001870938772872084, "loss": 2.4178, "step": 3650 }, { "epoch": 0.17, "learning_rate": 0.00018702268152745032, "loss": 2.4108, "step": 3660 }, { "epoch": 0.17, "learning_rate": 0.00018695130355757966, "loss": 2.3884, "step": 3670 }, { "epoch": 0.17, "learning_rate": 0.00018687974352704924, "loss": 2.3969, "step": 3680 }, { "epoch": 0.17, "learning_rate": 0.00018680800158569317, "loss": 2.393, "step": 3690 }, { "epoch": 0.17, "learning_rate": 0.00018673607788372633, "loss": 2.426, "step": 3700 }, { "epoch": 0.17, "learning_rate": 0.00018666397257174428, "loss": 2.4089, "step": 3710 }, { "epoch": 0.17, "learning_rate": 0.00018659168580072275, "loss": 2.3972, "step": 3720 }, { "epoch": 0.17, "learning_rate": 0.00018651921772201753, "loss": 2.3956, "step": 3730 }, { "epoch": 0.17, "learning_rate": 0.00018644656848736392, "loss": 2.3882, "step": 3740 }, { "epoch": 0.17, "learning_rate": 0.0001863737382488766, "loss": 2.3841, "step": 3750 }, { "epoch": 0.17, "learning_rate": 0.00018630072715904918, "loss": 2.4086, "step": 3760 }, { "epoch": 0.17, "learning_rate": 0.000186227535370754, "loss": 2.4197, "step": 3770 }, { "epoch": 0.17, "learning_rate": 0.00018615416303724178, "loss": 2.4327, "step": 3780 }, { "epoch": 0.17, "learning_rate": 0.0001860806103121411, "loss": 2.408, "step": 3790 }, { "epoch": 0.17, "learning_rate": 0.00018600687734945844, "loss": 2.3878, "step": 3800 }, { "epoch": 0.17, "learning_rate": 0.00018593296430357762, "loss": 2.3852, "step": 3810 }, { "epoch": 0.18, "learning_rate": 0.0001858588713292594, "loss": 2.3824, "step": 3820 }, { "epoch": 0.18, "learning_rate": 0.00018578459858164146, "loss": 2.3784, "step": 3830 }, { "epoch": 0.18, "learning_rate": 0.00018571014621623775, "loss": 2.3991, "step": 3840 }, { "epoch": 0.18, "learning_rate": 0.00018563551438893837, "loss": 2.409, "step": 3850 }, { "epoch": 0.18, "learning_rate": 0.00018556070325600918, "loss": 2.3838, "step": 3860 }, { "epoch": 0.18, "learning_rate": 0.00018548571297409149, "loss": 2.3909, "step": 3870 }, { "epoch": 0.18, "learning_rate": 0.00018541054370020167, "loss": 2.3895, "step": 3880 }, { "epoch": 0.18, "learning_rate": 0.00018533519559173092, "loss": 2.4281, "step": 3890 }, { "epoch": 0.18, "learning_rate": 0.00018525966880644483, "loss": 2.4061, "step": 3900 }, { "epoch": 0.18, "learning_rate": 0.00018518396350248313, "loss": 2.3847, "step": 3910 }, { "epoch": 0.18, "learning_rate": 0.00018510807983835936, "loss": 2.4062, "step": 3920 }, { "epoch": 0.18, "learning_rate": 0.0001850320179729605, "loss": 2.4095, "step": 3930 }, { "epoch": 0.18, "learning_rate": 0.0001849557780655467, "loss": 2.3892, "step": 3940 }, { "epoch": 0.18, "learning_rate": 0.00018487936027575076, "loss": 2.3906, "step": 3950 }, { "epoch": 0.18, "learning_rate": 0.00018480276476357806, "loss": 2.3867, "step": 3960 }, { "epoch": 0.18, "learning_rate": 0.0001847259916894061, "loss": 2.3978, "step": 3970 }, { "epoch": 0.18, "learning_rate": 0.00018464904121398408, "loss": 2.412, "step": 3980 }, { "epoch": 0.18, "learning_rate": 0.0001845719134984327, "loss": 2.4178, "step": 3990 }, { "epoch": 0.18, "learning_rate": 0.00018449460870424377, "loss": 2.4021, "step": 4000 }, { "epoch": 0.18, "eval_accuracy": 0.5356110892153217, "eval_loss": 2.2496390342712402, "eval_runtime": 10.5309, "eval_samples_per_second": 125.061, "eval_steps_per_second": 1.045, "step": 4000 }, { "epoch": 0.18, "learning_rate": 0.00018441712699327989, "loss": 2.3893, "step": 4010 }, { "epoch": 0.18, "learning_rate": 0.00018433946852777403, "loss": 2.373, "step": 4020 }, { "epoch": 0.18, "learning_rate": 0.0001842616334703293, "loss": 2.3985, "step": 4030 }, { "epoch": 0.19, "learning_rate": 0.00018418362198391853, "loss": 2.3982, "step": 4040 }, { "epoch": 0.19, "learning_rate": 0.00018410543423188404, "loss": 2.3929, "step": 4050 }, { "epoch": 0.19, "learning_rate": 0.0001840270703779371, "loss": 2.3961, "step": 4060 }, { "epoch": 0.19, "learning_rate": 0.00018394853058615788, "loss": 2.4061, "step": 4070 }, { "epoch": 0.19, "learning_rate": 0.00018386981502099465, "loss": 2.4216, "step": 4080 }, { "epoch": 0.19, "learning_rate": 0.000183790923847264, "loss": 2.4063, "step": 4090 }, { "epoch": 0.19, "learning_rate": 0.00018371185723015007, "loss": 2.3886, "step": 4100 }, { "epoch": 0.19, "learning_rate": 0.00018363261533520443, "loss": 2.3979, "step": 4110 }, { "epoch": 0.19, "learning_rate": 0.0001835531983283455, "loss": 2.3898, "step": 4120 }, { "epoch": 0.19, "learning_rate": 0.00018347360637585857, "loss": 2.404, "step": 4130 }, { "epoch": 0.19, "learning_rate": 0.00018339383964439504, "loss": 2.3835, "step": 4140 }, { "epoch": 0.19, "learning_rate": 0.0001833138983009724, "loss": 2.3896, "step": 4150 }, { "epoch": 0.19, "learning_rate": 0.0001832337825129736, "loss": 2.3826, "step": 4160 }, { "epoch": 0.19, "learning_rate": 0.00018315349244814707, "loss": 2.4093, "step": 4170 }, { "epoch": 0.19, "learning_rate": 0.0001830730282746059, "loss": 2.3962, "step": 4180 }, { "epoch": 0.19, "learning_rate": 0.00018299239016082792, "loss": 2.3983, "step": 4190 }, { "epoch": 0.19, "learning_rate": 0.00018291157827565505, "loss": 2.3884, "step": 4200 }, { "epoch": 0.19, "learning_rate": 0.0001828305927882931, "loss": 2.3952, "step": 4210 }, { "epoch": 0.19, "learning_rate": 0.00018274943386831135, "loss": 2.3872, "step": 4220 }, { "epoch": 0.19, "learning_rate": 0.00018266810168564223, "loss": 2.4018, "step": 4230 }, { "epoch": 0.19, "learning_rate": 0.000182586596410581, "loss": 2.3948, "step": 4240 }, { "epoch": 0.19, "learning_rate": 0.00018250491821378525, "loss": 2.3805, "step": 4250 }, { "epoch": 0.2, "learning_rate": 0.00018242306726627474, "loss": 2.3928, "step": 4260 }, { "epoch": 0.2, "learning_rate": 0.00018234104373943084, "loss": 2.3957, "step": 4270 }, { "epoch": 0.2, "learning_rate": 0.00018225884780499632, "loss": 2.4042, "step": 4280 }, { "epoch": 0.2, "learning_rate": 0.00018217647963507494, "loss": 2.3872, "step": 4290 }, { "epoch": 0.2, "learning_rate": 0.0001820939394021311, "loss": 2.3861, "step": 4300 }, { "epoch": 0.2, "learning_rate": 0.00018201122727898948, "loss": 2.4026, "step": 4310 }, { "epoch": 0.2, "learning_rate": 0.0001819283434388346, "loss": 2.3895, "step": 4320 }, { "epoch": 0.2, "learning_rate": 0.00018184528805521055, "loss": 2.3909, "step": 4330 }, { "epoch": 0.2, "learning_rate": 0.0001817620613020207, "loss": 2.3991, "step": 4340 }, { "epoch": 0.2, "learning_rate": 0.0001816786633535271, "loss": 2.3907, "step": 4350 }, { "epoch": 0.2, "learning_rate": 0.00018159509438435035, "loss": 2.3951, "step": 4360 }, { "epoch": 0.2, "learning_rate": 0.00018151135456946903, "loss": 2.3823, "step": 4370 }, { "epoch": 0.2, "learning_rate": 0.0001814274440842196, "loss": 2.4021, "step": 4380 }, { "epoch": 0.2, "learning_rate": 0.00018134336310429575, "loss": 2.3981, "step": 4390 }, { "epoch": 0.2, "learning_rate": 0.0001812591118057482, "loss": 2.4157, "step": 4400 }, { "epoch": 0.2, "learning_rate": 0.0001811746903649842, "loss": 2.3981, "step": 4410 }, { "epoch": 0.2, "learning_rate": 0.00018109009895876744, "loss": 2.3924, "step": 4420 }, { "epoch": 0.2, "learning_rate": 0.00018100533776421732, "loss": 2.3973, "step": 4430 }, { "epoch": 0.2, "learning_rate": 0.00018092040695880877, "loss": 2.3862, "step": 4440 }, { "epoch": 0.2, "learning_rate": 0.0001808353067203719, "loss": 2.3788, "step": 4450 }, { "epoch": 0.2, "learning_rate": 0.00018075003722709156, "loss": 2.3876, "step": 4460 }, { "epoch": 0.2, "learning_rate": 0.00018066459865750706, "loss": 2.3864, "step": 4470 }, { "epoch": 0.21, "learning_rate": 0.00018057899119051162, "loss": 2.381, "step": 4480 }, { "epoch": 0.21, "learning_rate": 0.00018049321500535209, "loss": 2.3756, "step": 4490 }, { "epoch": 0.21, "learning_rate": 0.00018040727028162873, "loss": 2.3846, "step": 4500 }, { "epoch": 0.21, "learning_rate": 0.0001803211571992945, "loss": 2.3926, "step": 4510 }, { "epoch": 0.21, "learning_rate": 0.00018023487593865506, "loss": 2.3968, "step": 4520 }, { "epoch": 0.21, "learning_rate": 0.00018014842668036808, "loss": 2.3883, "step": 4530 }, { "epoch": 0.21, "learning_rate": 0.00018006180960544307, "loss": 2.379, "step": 4540 }, { "epoch": 0.21, "learning_rate": 0.00017997502489524081, "loss": 2.3949, "step": 4550 }, { "epoch": 0.21, "learning_rate": 0.00017988807273147317, "loss": 2.3997, "step": 4560 }, { "epoch": 0.21, "learning_rate": 0.00017980095329620262, "loss": 2.3659, "step": 4570 }, { "epoch": 0.21, "learning_rate": 0.00017971366677184183, "loss": 2.4092, "step": 4580 }, { "epoch": 0.21, "learning_rate": 0.0001796262133411534, "loss": 2.3843, "step": 4590 }, { "epoch": 0.21, "learning_rate": 0.0001795385931872493, "loss": 2.3884, "step": 4600 }, { "epoch": 0.21, "learning_rate": 0.00017945080649359069, "loss": 2.373, "step": 4610 }, { "epoch": 0.21, "learning_rate": 0.00017936285344398732, "loss": 2.3992, "step": 4620 }, { "epoch": 0.21, "learning_rate": 0.00017927473422259743, "loss": 2.3743, "step": 4630 }, { "epoch": 0.21, "learning_rate": 0.00017918644901392705, "loss": 2.389, "step": 4640 }, { "epoch": 0.21, "learning_rate": 0.00017909799800282976, "loss": 2.3865, "step": 4650 }, { "epoch": 0.21, "learning_rate": 0.00017900938137450638, "loss": 2.3862, "step": 4660 }, { "epoch": 0.21, "learning_rate": 0.00017892059931450446, "loss": 2.4079, "step": 4670 }, { "epoch": 0.21, "learning_rate": 0.00017883165200871797, "loss": 2.4191, "step": 4680 }, { "epoch": 0.22, "learning_rate": 0.0001787425396433868, "loss": 2.3877, "step": 4690 }, { "epoch": 0.22, "learning_rate": 0.0001786532624050965, "loss": 2.4038, "step": 4700 }, { "epoch": 0.22, "learning_rate": 0.00017856382048077782, "loss": 2.4007, "step": 4710 }, { "epoch": 0.22, "learning_rate": 0.0001784742140577064, "loss": 2.3739, "step": 4720 }, { "epoch": 0.22, "learning_rate": 0.00017838444332350214, "loss": 2.3765, "step": 4730 }, { "epoch": 0.22, "learning_rate": 0.0001782945084661292, "loss": 2.3829, "step": 4740 }, { "epoch": 0.22, "learning_rate": 0.0001782044096738952, "loss": 2.3895, "step": 4750 }, { "epoch": 0.22, "learning_rate": 0.00017811414713545114, "loss": 2.3742, "step": 4760 }, { "epoch": 0.22, "learning_rate": 0.00017802372103979078, "loss": 2.3856, "step": 4770 }, { "epoch": 0.22, "learning_rate": 0.00017793313157625038, "loss": 2.3929, "step": 4780 }, { "epoch": 0.22, "learning_rate": 0.00017784237893450828, "loss": 2.3837, "step": 4790 }, { "epoch": 0.22, "learning_rate": 0.00017775146330458448, "loss": 2.3861, "step": 4800 }, { "epoch": 0.22, "learning_rate": 0.0001776603848768402, "loss": 2.387, "step": 4810 }, { "epoch": 0.22, "learning_rate": 0.00017756914384197762, "loss": 2.3694, "step": 4820 }, { "epoch": 0.22, "learning_rate": 0.00017747774039103927, "loss": 2.3548, "step": 4830 }, { "epoch": 0.22, "learning_rate": 0.00017738617471540788, "loss": 2.366, "step": 4840 }, { "epoch": 0.22, "learning_rate": 0.00017729444700680577, "loss": 2.4099, "step": 4850 }, { "epoch": 0.22, "learning_rate": 0.00017720255745729457, "loss": 2.3567, "step": 4860 }, { "epoch": 0.22, "learning_rate": 0.0001771105062592747, "loss": 2.4122, "step": 4870 }, { "epoch": 0.22, "learning_rate": 0.00017701829360548517, "loss": 2.3988, "step": 4880 }, { "epoch": 0.22, "learning_rate": 0.00017692591968900288, "loss": 2.3775, "step": 4890 }, { "epoch": 0.22, "learning_rate": 0.0001768333847032426, "loss": 2.3915, "step": 4900 }, { "epoch": 0.23, "learning_rate": 0.00017674068884195612, "loss": 2.4085, "step": 4910 }, { "epoch": 0.23, "learning_rate": 0.00017664783229923226, "loss": 2.3651, "step": 4920 }, { "epoch": 0.23, "learning_rate": 0.00017655481526949616, "loss": 2.3697, "step": 4930 }, { "epoch": 0.23, "learning_rate": 0.0001764616379475091, "loss": 2.3695, "step": 4940 }, { "epoch": 0.23, "learning_rate": 0.0001763683005283679, "loss": 2.3927, "step": 4950 }, { "epoch": 0.23, "learning_rate": 0.00017627480320750455, "loss": 2.4027, "step": 4960 }, { "epoch": 0.23, "learning_rate": 0.00017618114618068596, "loss": 2.3786, "step": 4970 }, { "epoch": 0.23, "learning_rate": 0.00017608732964401343, "loss": 2.3923, "step": 4980 }, { "epoch": 0.23, "learning_rate": 0.00017599335379392219, "loss": 2.3938, "step": 4990 }, { "epoch": 0.23, "learning_rate": 0.00017589921882718102, "loss": 2.3768, "step": 5000 }, { "epoch": 0.23, "learning_rate": 0.00017580492494089192, "loss": 2.3899, "step": 5010 }, { "epoch": 0.23, "learning_rate": 0.00017571047233248962, "loss": 2.4152, "step": 5020 }, { "epoch": 0.23, "learning_rate": 0.00017561586119974116, "loss": 2.4003, "step": 5030 }, { "epoch": 0.23, "learning_rate": 0.00017552109174074555, "loss": 2.3835, "step": 5040 }, { "epoch": 0.23, "learning_rate": 0.00017542616415393332, "loss": 2.3748, "step": 5050 }, { "epoch": 0.23, "learning_rate": 0.000175331078638066, "loss": 2.402, "step": 5060 }, { "epoch": 0.23, "learning_rate": 0.00017523583539223587, "loss": 2.3572, "step": 5070 }, { "epoch": 0.23, "learning_rate": 0.0001751404346158655, "loss": 2.3714, "step": 5080 }, { "epoch": 0.23, "learning_rate": 0.0001750448765087072, "loss": 2.3893, "step": 5090 }, { "epoch": 0.23, "learning_rate": 0.00017494916127084274, "loss": 2.3862, "step": 5100 }, { "epoch": 0.23, "learning_rate": 0.00017485328910268292, "loss": 2.3715, "step": 5110 }, { "epoch": 0.23, "learning_rate": 0.00017475726020496719, "loss": 2.4044, "step": 5120 }, { "epoch": 0.24, "learning_rate": 0.00017466107477876296, "loss": 2.3896, "step": 5130 }, { "epoch": 0.24, "learning_rate": 0.00017456473302546562, "loss": 2.3748, "step": 5140 }, { "epoch": 0.24, "learning_rate": 0.0001744682351467977, "loss": 2.3865, "step": 5150 }, { "epoch": 0.24, "learning_rate": 0.00017437158134480876, "loss": 2.3701, "step": 5160 }, { "epoch": 0.24, "learning_rate": 0.00017427477182187473, "loss": 2.365, "step": 5170 }, { "epoch": 0.24, "learning_rate": 0.00017417780678069764, "loss": 2.3659, "step": 5180 }, { "epoch": 0.24, "learning_rate": 0.00017408068642430516, "loss": 2.3822, "step": 5190 }, { "epoch": 0.24, "learning_rate": 0.00017398341095605017, "loss": 2.3841, "step": 5200 }, { "epoch": 0.24, "learning_rate": 0.00017388598057961025, "loss": 2.3834, "step": 5210 }, { "epoch": 0.24, "learning_rate": 0.00017378839549898745, "loss": 2.4141, "step": 5220 }, { "epoch": 0.24, "learning_rate": 0.00017369065591850763, "loss": 2.3674, "step": 5230 }, { "epoch": 0.24, "learning_rate": 0.00017359276204282023, "loss": 2.3714, "step": 5240 }, { "epoch": 0.24, "learning_rate": 0.0001734947140768977, "loss": 2.379, "step": 5250 }, { "epoch": 0.24, "learning_rate": 0.0001733965122260351, "loss": 2.3895, "step": 5260 }, { "epoch": 0.24, "learning_rate": 0.0001732981566958499, "loss": 2.373, "step": 5270 }, { "epoch": 0.24, "learning_rate": 0.00017319964769228104, "loss": 2.372, "step": 5280 }, { "epoch": 0.24, "learning_rate": 0.00017310098542158903, "loss": 2.3674, "step": 5290 }, { "epoch": 0.24, "learning_rate": 0.0001730021700903552, "loss": 2.3651, "step": 5300 }, { "epoch": 0.24, "learning_rate": 0.0001729032019054814, "loss": 2.3757, "step": 5310 }, { "epoch": 0.24, "learning_rate": 0.0001728040810741895, "loss": 2.379, "step": 5320 }, { "epoch": 0.24, "learning_rate": 0.00017270480780402103, "loss": 2.3587, "step": 5330 }, { "epoch": 0.24, "learning_rate": 0.00017260538230283668, "loss": 2.3783, "step": 5340 }, { "epoch": 0.25, "learning_rate": 0.0001725058047788158, "loss": 2.3608, "step": 5350 }, { "epoch": 0.25, "learning_rate": 0.00017240607544045618, "loss": 2.3824, "step": 5360 }, { "epoch": 0.25, "learning_rate": 0.00017230619449657341, "loss": 2.4014, "step": 5370 }, { "epoch": 0.25, "learning_rate": 0.00017220616215630056, "loss": 2.3696, "step": 5380 }, { "epoch": 0.25, "learning_rate": 0.00017210597862908763, "loss": 2.3645, "step": 5390 }, { "epoch": 0.25, "learning_rate": 0.00017200564412470123, "loss": 2.3684, "step": 5400 }, { "epoch": 0.25, "learning_rate": 0.0001719051588532241, "loss": 2.3615, "step": 5410 }, { "epoch": 0.25, "learning_rate": 0.00017180452302505462, "loss": 2.3866, "step": 5420 }, { "epoch": 0.25, "learning_rate": 0.00017170373685090637, "loss": 2.3883, "step": 5430 }, { "epoch": 0.25, "learning_rate": 0.0001716028005418079, "loss": 2.3795, "step": 5440 }, { "epoch": 0.25, "learning_rate": 0.00017150171430910187, "loss": 2.3643, "step": 5450 }, { "epoch": 0.25, "learning_rate": 0.00017140047836444508, "loss": 2.377, "step": 5460 }, { "epoch": 0.25, "learning_rate": 0.00017129909291980767, "loss": 2.3633, "step": 5470 }, { "epoch": 0.25, "learning_rate": 0.00017119755818747283, "loss": 2.3656, "step": 5480 }, { "epoch": 0.25, "learning_rate": 0.00017109587438003638, "loss": 2.4058, "step": 5490 }, { "epoch": 0.25, "learning_rate": 0.00017099404171040618, "loss": 2.3744, "step": 5500 }, { "epoch": 0.25, "learning_rate": 0.00017089206039180187, "loss": 2.3962, "step": 5510 }, { "epoch": 0.25, "learning_rate": 0.00017078993063775433, "loss": 2.3937, "step": 5520 }, { "epoch": 0.25, "learning_rate": 0.00017068765266210515, "loss": 2.3879, "step": 5530 }, { "epoch": 0.25, "learning_rate": 0.00017058522667900638, "loss": 2.3537, "step": 5540 }, { "epoch": 0.25, "learning_rate": 0.00017048265290291992, "loss": 2.3575, "step": 5550 }, { "epoch": 0.25, "learning_rate": 0.00017037993154861712, "loss": 2.3659, "step": 5560 }, { "epoch": 0.26, "learning_rate": 0.0001702770628311783, "loss": 2.3788, "step": 5570 }, { "epoch": 0.26, "learning_rate": 0.00017017404696599244, "loss": 2.3753, "step": 5580 }, { "epoch": 0.26, "learning_rate": 0.00017007088416875652, "loss": 2.3699, "step": 5590 }, { "epoch": 0.26, "learning_rate": 0.00016996757465547518, "loss": 2.3619, "step": 5600 }, { "epoch": 0.26, "learning_rate": 0.0001698641186424603, "loss": 2.3943, "step": 5610 }, { "epoch": 0.26, "learning_rate": 0.0001697605163463305, "loss": 2.3801, "step": 5620 }, { "epoch": 0.26, "learning_rate": 0.00016965676798401066, "loss": 2.3857, "step": 5630 }, { "epoch": 0.26, "learning_rate": 0.0001695528737727315, "loss": 2.3686, "step": 5640 }, { "epoch": 0.26, "learning_rate": 0.00016944883393002914, "loss": 2.3527, "step": 5650 }, { "epoch": 0.26, "learning_rate": 0.0001693446486737446, "loss": 2.3712, "step": 5660 }, { "epoch": 0.26, "learning_rate": 0.00016924031822202347, "loss": 2.3771, "step": 5670 }, { "epoch": 0.26, "learning_rate": 0.00016913584279331517, "loss": 2.3701, "step": 5680 }, { "epoch": 0.26, "learning_rate": 0.00016903122260637288, "loss": 2.4011, "step": 5690 }, { "epoch": 0.26, "learning_rate": 0.00016892645788025266, "loss": 2.3826, "step": 5700 }, { "epoch": 0.26, "learning_rate": 0.00016882154883431346, "loss": 2.3886, "step": 5710 }, { "epoch": 0.26, "learning_rate": 0.00016871649568821616, "loss": 2.3818, "step": 5720 }, { "epoch": 0.26, "learning_rate": 0.00016861129866192355, "loss": 2.3776, "step": 5730 }, { "epoch": 0.26, "learning_rate": 0.00016850595797569958, "loss": 2.3581, "step": 5740 }, { "epoch": 0.26, "learning_rate": 0.00016840047385010905, "loss": 2.3507, "step": 5750 }, { "epoch": 0.26, "learning_rate": 0.00016829484650601707, "loss": 2.3819, "step": 5760 }, { "epoch": 0.26, "learning_rate": 0.0001681890761645886, "loss": 2.383, "step": 5770 }, { "epoch": 0.27, "learning_rate": 0.00016808316304728804, "loss": 2.3882, "step": 5780 }, { "epoch": 0.27, "learning_rate": 0.00016797710737587879, "loss": 2.3791, "step": 5790 }, { "epoch": 0.27, "learning_rate": 0.00016787090937242258, "loss": 2.3823, "step": 5800 }, { "epoch": 0.27, "learning_rate": 0.00016776456925927935, "loss": 2.3603, "step": 5810 }, { "epoch": 0.27, "learning_rate": 0.00016765808725910646, "loss": 2.3664, "step": 5820 }, { "epoch": 0.27, "learning_rate": 0.00016755146359485838, "loss": 2.3645, "step": 5830 }, { "epoch": 0.27, "learning_rate": 0.0001674446984897863, "loss": 2.3769, "step": 5840 }, { "epoch": 0.27, "learning_rate": 0.00016733779216743737, "loss": 2.3579, "step": 5850 }, { "epoch": 0.27, "learning_rate": 0.00016723074485165457, "loss": 2.3575, "step": 5860 }, { "epoch": 0.27, "learning_rate": 0.0001671235567665761, "loss": 2.3735, "step": 5870 }, { "epoch": 0.27, "learning_rate": 0.00016701622813663477, "loss": 2.3754, "step": 5880 }, { "epoch": 0.27, "learning_rate": 0.00016690875918655787, "loss": 2.3559, "step": 5890 }, { "epoch": 0.27, "learning_rate": 0.0001668011501413663, "loss": 2.3643, "step": 5900 }, { "epoch": 0.27, "learning_rate": 0.00016669340122637443, "loss": 2.3802, "step": 5910 }, { "epoch": 0.27, "learning_rate": 0.0001665855126671894, "loss": 2.3672, "step": 5920 }, { "epoch": 0.27, "learning_rate": 0.0001664774846897108, "loss": 2.3804, "step": 5930 }, { "epoch": 0.27, "learning_rate": 0.00016636931752013018, "loss": 2.3821, "step": 5940 }, { "epoch": 0.27, "learning_rate": 0.00016626101138493036, "loss": 2.3484, "step": 5950 }, { "epoch": 0.27, "learning_rate": 0.0001661525665108853, "loss": 2.395, "step": 5960 }, { "epoch": 0.27, "learning_rate": 0.0001660439831250594, "loss": 2.3502, "step": 5970 }, { "epoch": 0.27, "learning_rate": 0.00016593526145480708, "loss": 2.3561, "step": 5980 }, { "epoch": 0.27, "learning_rate": 0.00016582640172777233, "loss": 2.3821, "step": 5990 }, { "epoch": 0.28, "learning_rate": 0.0001657174041718881, "loss": 2.3883, "step": 6000 }, { "epoch": 0.28, "eval_accuracy": 0.5390075343782449, "eval_loss": 2.2250747680664062, "eval_runtime": 10.5596, "eval_samples_per_second": 124.721, "eval_steps_per_second": 1.042, "step": 6000 }, { "epoch": 0.28, "learning_rate": 0.00016560826901537606, "loss": 2.3956, "step": 6010 }, { "epoch": 0.28, "learning_rate": 0.00016549899648674588, "loss": 2.3826, "step": 6020 }, { "epoch": 0.28, "learning_rate": 0.000165389586814795, "loss": 2.3777, "step": 6030 }, { "epoch": 0.28, "learning_rate": 0.00016528004022860787, "loss": 2.3843, "step": 6040 }, { "epoch": 0.28, "learning_rate": 0.00016517035695755568, "loss": 2.3767, "step": 6050 }, { "epoch": 0.28, "learning_rate": 0.00016506053723129588, "loss": 2.355, "step": 6060 }, { "epoch": 0.28, "learning_rate": 0.0001649505812797715, "loss": 2.3792, "step": 6070 }, { "epoch": 0.28, "learning_rate": 0.00016484048933321086, "loss": 2.3639, "step": 6080 }, { "epoch": 0.28, "learning_rate": 0.00016473026162212707, "loss": 2.3617, "step": 6090 }, { "epoch": 0.28, "learning_rate": 0.00016461989837731746, "loss": 2.3728, "step": 6100 }, { "epoch": 0.28, "learning_rate": 0.00016450939982986314, "loss": 2.3831, "step": 6110 }, { "epoch": 0.28, "learning_rate": 0.0001643987662111286, "loss": 2.3744, "step": 6120 }, { "epoch": 0.28, "learning_rate": 0.00016428799775276097, "loss": 2.3583, "step": 6130 }, { "epoch": 0.28, "learning_rate": 0.00016417709468668994, "loss": 2.3703, "step": 6140 }, { "epoch": 0.28, "learning_rate": 0.00016406605724512684, "loss": 2.3688, "step": 6150 }, { "epoch": 0.28, "learning_rate": 0.00016395488566056448, "loss": 2.3695, "step": 6160 }, { "epoch": 0.28, "learning_rate": 0.0001638435801657765, "loss": 2.381, "step": 6170 }, { "epoch": 0.28, "learning_rate": 0.00016373214099381695, "loss": 2.3648, "step": 6180 }, { "epoch": 0.28, "learning_rate": 0.00016362056837801973, "loss": 2.3556, "step": 6190 }, { "epoch": 0.28, "learning_rate": 0.00016350886255199823, "loss": 2.3627, "step": 6200 }, { "epoch": 0.28, "learning_rate": 0.0001633970237496446, "loss": 2.3685, "step": 6210 }, { "epoch": 0.29, "learning_rate": 0.00016328505220512964, "loss": 2.3835, "step": 6220 }, { "epoch": 0.29, "learning_rate": 0.0001631729481529019, "loss": 2.3625, "step": 6230 }, { "epoch": 0.29, "learning_rate": 0.00016306071182768745, "loss": 2.3655, "step": 6240 }, { "epoch": 0.29, "learning_rate": 0.00016294834346448935, "loss": 2.3675, "step": 6250 }, { "epoch": 0.29, "learning_rate": 0.00016283584329858708, "loss": 2.3776, "step": 6260 }, { "epoch": 0.29, "learning_rate": 0.00016272321156553604, "loss": 2.3632, "step": 6270 }, { "epoch": 0.29, "learning_rate": 0.0001626104485011673, "loss": 2.3765, "step": 6280 }, { "epoch": 0.29, "learning_rate": 0.00016249755434158663, "loss": 2.3878, "step": 6290 }, { "epoch": 0.29, "learning_rate": 0.00016238452932317458, "loss": 2.3661, "step": 6300 }, { "epoch": 0.29, "learning_rate": 0.00016227137368258546, "loss": 2.3566, "step": 6310 }, { "epoch": 0.29, "learning_rate": 0.00016215808765674726, "loss": 2.3554, "step": 6320 }, { "epoch": 0.29, "learning_rate": 0.00016204467148286082, "loss": 2.3598, "step": 6330 }, { "epoch": 0.29, "learning_rate": 0.00016193112539839962, "loss": 2.3763, "step": 6340 }, { "epoch": 0.29, "learning_rate": 0.00016181744964110904, "loss": 2.3623, "step": 6350 }, { "epoch": 0.29, "learning_rate": 0.0001617036444490061, "loss": 2.3628, "step": 6360 }, { "epoch": 0.29, "learning_rate": 0.00016158971006037865, "loss": 2.3678, "step": 6370 }, { "epoch": 0.29, "learning_rate": 0.0001614756467137852, "loss": 2.3427, "step": 6380 }, { "epoch": 0.29, "learning_rate": 0.0001613614546480543, "loss": 2.3687, "step": 6390 }, { "epoch": 0.29, "learning_rate": 0.00016124713410228388, "loss": 2.3534, "step": 6400 }, { "epoch": 0.29, "learning_rate": 0.00016113268531584094, "loss": 2.3498, "step": 6410 }, { "epoch": 0.29, "learning_rate": 0.00016101810852836104, "loss": 2.3594, "step": 6420 }, { "epoch": 0.29, "learning_rate": 0.00016090340397974777, "loss": 2.3492, "step": 6430 }, { "epoch": 0.3, "learning_rate": 0.00016078857191017209, "loss": 2.3659, "step": 6440 }, { "epoch": 0.3, "learning_rate": 0.00016067361256007212, "loss": 2.3693, "step": 6450 }, { "epoch": 0.3, "learning_rate": 0.0001605585261701524, "loss": 2.3825, "step": 6460 }, { "epoch": 0.3, "learning_rate": 0.00016044331298138345, "loss": 2.3372, "step": 6470 }, { "epoch": 0.3, "learning_rate": 0.00016032797323500136, "loss": 2.3665, "step": 6480 }, { "epoch": 0.3, "learning_rate": 0.0001602125071725072, "loss": 2.3535, "step": 6490 }, { "epoch": 0.3, "learning_rate": 0.0001600969150356664, "loss": 2.3783, "step": 6500 }, { "epoch": 0.3, "learning_rate": 0.00015998119706650855, "loss": 2.3785, "step": 6510 }, { "epoch": 0.3, "learning_rate": 0.0001598653535073266, "loss": 2.3739, "step": 6520 }, { "epoch": 0.3, "learning_rate": 0.00015974938460067647, "loss": 2.3809, "step": 6530 }, { "epoch": 0.3, "learning_rate": 0.00015963329058937657, "loss": 2.3572, "step": 6540 }, { "epoch": 0.3, "learning_rate": 0.00015951707171650721, "loss": 2.3462, "step": 6550 }, { "epoch": 0.3, "learning_rate": 0.00015940072822541023, "loss": 2.3719, "step": 6560 }, { "epoch": 0.3, "learning_rate": 0.00015928426035968825, "loss": 2.3473, "step": 6570 }, { "epoch": 0.3, "learning_rate": 0.00015916766836320445, "loss": 2.4003, "step": 6580 }, { "epoch": 0.3, "learning_rate": 0.00015905095248008183, "loss": 2.367, "step": 6590 }, { "epoch": 0.3, "learning_rate": 0.00015893411295470284, "loss": 2.373, "step": 6600 }, { "epoch": 0.3, "learning_rate": 0.00015881715003170877, "loss": 2.3572, "step": 6610 }, { "epoch": 0.3, "learning_rate": 0.00015870006395599933, "loss": 2.3404, "step": 6620 }, { "epoch": 0.3, "learning_rate": 0.00015858285497273206, "loss": 2.3701, "step": 6630 }, { "epoch": 0.3, "learning_rate": 0.00015846552332732187, "loss": 2.3648, "step": 6640 }, { "epoch": 0.3, "learning_rate": 0.00015834806926544044, "loss": 2.3631, "step": 6650 }, { "epoch": 0.31, "learning_rate": 0.00015823049303301584, "loss": 2.3427, "step": 6660 }, { "epoch": 0.31, "learning_rate": 0.00015811279487623194, "loss": 2.3632, "step": 6670 }, { "epoch": 0.31, "learning_rate": 0.00015799497504152786, "loss": 2.3597, "step": 6680 }, { "epoch": 0.31, "learning_rate": 0.00015787703377559745, "loss": 2.3485, "step": 6690 }, { "epoch": 0.31, "learning_rate": 0.00015775897132538894, "loss": 2.3606, "step": 6700 }, { "epoch": 0.31, "learning_rate": 0.00015764078793810424, "loss": 2.3752, "step": 6710 }, { "epoch": 0.31, "learning_rate": 0.00015752248386119842, "loss": 2.3457, "step": 6720 }, { "epoch": 0.31, "learning_rate": 0.00015740405934237933, "loss": 2.3712, "step": 6730 }, { "epoch": 0.31, "learning_rate": 0.000157285514629607, "loss": 2.3453, "step": 6740 }, { "epoch": 0.31, "learning_rate": 0.00015716684997109308, "loss": 2.3674, "step": 6750 }, { "epoch": 0.31, "learning_rate": 0.00015704806561530038, "loss": 2.3543, "step": 6760 }, { "epoch": 0.31, "learning_rate": 0.00015692916181094237, "loss": 2.3909, "step": 6770 }, { "epoch": 0.31, "learning_rate": 0.00015681013880698258, "loss": 2.3788, "step": 6780 }, { "epoch": 0.31, "learning_rate": 0.00015669099685263415, "loss": 2.3528, "step": 6790 }, { "epoch": 0.31, "learning_rate": 0.00015657173619735928, "loss": 2.3626, "step": 6800 }, { "epoch": 0.31, "learning_rate": 0.0001564523570908687, "loss": 2.3552, "step": 6810 }, { "epoch": 0.31, "learning_rate": 0.0001563328597831211, "loss": 2.3638, "step": 6820 }, { "epoch": 0.31, "learning_rate": 0.00015621324452432282, "loss": 2.3565, "step": 6830 }, { "epoch": 0.31, "learning_rate": 0.00015609351156492697, "loss": 2.3564, "step": 6840 }, { "epoch": 0.31, "learning_rate": 0.0001559736611556333, "loss": 2.347, "step": 6850 }, { "epoch": 0.31, "learning_rate": 0.0001558536935473873, "loss": 2.3698, "step": 6860 }, { "epoch": 0.31, "learning_rate": 0.00015573360899138, "loss": 2.3543, "step": 6870 }, { "epoch": 0.32, "learning_rate": 0.00015561340773904715, "loss": 2.356, "step": 6880 }, { "epoch": 0.32, "learning_rate": 0.00015549309004206897, "loss": 2.366, "step": 6890 }, { "epoch": 0.32, "learning_rate": 0.0001553726561523695, "loss": 2.3477, "step": 6900 }, { "epoch": 0.32, "learning_rate": 0.00015525210632211594, "loss": 2.3915, "step": 6910 }, { "epoch": 0.32, "learning_rate": 0.00015513144080371834, "loss": 2.3613, "step": 6920 }, { "epoch": 0.32, "learning_rate": 0.00015501065984982901, "loss": 2.3708, "step": 6930 }, { "epoch": 0.32, "learning_rate": 0.0001548897637133419, "loss": 2.3831, "step": 6940 }, { "epoch": 0.32, "learning_rate": 0.00015476875264739212, "loss": 2.3723, "step": 6950 }, { "epoch": 0.32, "learning_rate": 0.00015464762690535548, "loss": 2.374, "step": 6960 }, { "epoch": 0.32, "learning_rate": 0.00015452638674084786, "loss": 2.3572, "step": 6970 }, { "epoch": 0.32, "learning_rate": 0.00015440503240772473, "loss": 2.3559, "step": 6980 }, { "epoch": 0.32, "learning_rate": 0.00015428356416008062, "loss": 2.3551, "step": 6990 }, { "epoch": 0.32, "learning_rate": 0.00015416198225224855, "loss": 2.3836, "step": 7000 }, { "epoch": 0.32, "learning_rate": 0.0001540402869387996, "loss": 2.3528, "step": 7010 }, { "epoch": 0.32, "learning_rate": 0.00015391847847454223, "loss": 2.3661, "step": 7020 }, { "epoch": 0.32, "learning_rate": 0.00015379655711452177, "loss": 2.3499, "step": 7030 }, { "epoch": 0.32, "learning_rate": 0.00015367452311402003, "loss": 2.3635, "step": 7040 }, { "epoch": 0.32, "learning_rate": 0.00015355237672855468, "loss": 2.3635, "step": 7050 }, { "epoch": 0.32, "learning_rate": 0.0001534301182138786, "loss": 2.3526, "step": 7060 }, { "epoch": 0.32, "learning_rate": 0.00015330774782597954, "loss": 2.3353, "step": 7070 }, { "epoch": 0.32, "learning_rate": 0.00015318526582107944, "loss": 2.336, "step": 7080 }, { "epoch": 0.33, "learning_rate": 0.00015306267245563396, "loss": 2.3942, "step": 7090 }, { "epoch": 0.33, "learning_rate": 0.000152939967986332, "loss": 2.3624, "step": 7100 }, { "epoch": 0.33, "learning_rate": 0.000152817152670095, "loss": 2.3426, "step": 7110 }, { "epoch": 0.33, "learning_rate": 0.00015269422676407647, "loss": 2.3585, "step": 7120 }, { "epoch": 0.33, "learning_rate": 0.00015257119052566157, "loss": 2.346, "step": 7130 }, { "epoch": 0.33, "learning_rate": 0.00015244804421246647, "loss": 2.376, "step": 7140 }, { "epoch": 0.33, "learning_rate": 0.0001523247880823377, "loss": 2.3538, "step": 7150 }, { "epoch": 0.33, "learning_rate": 0.00015220142239335188, "loss": 2.3514, "step": 7160 }, { "epoch": 0.33, "learning_rate": 0.0001520779474038149, "loss": 2.3742, "step": 7170 }, { "epoch": 0.33, "learning_rate": 0.00015195436337226163, "loss": 2.3508, "step": 7180 }, { "epoch": 0.33, "learning_rate": 0.00015183067055745512, "loss": 2.3486, "step": 7190 }, { "epoch": 0.33, "learning_rate": 0.00015170686921838633, "loss": 2.3559, "step": 7200 }, { "epoch": 0.33, "learning_rate": 0.0001515829596142733, "loss": 2.3388, "step": 7210 }, { "epoch": 0.33, "learning_rate": 0.0001514589420045609, "loss": 2.3687, "step": 7220 }, { "epoch": 0.33, "learning_rate": 0.00015133481664892005, "loss": 2.3462, "step": 7230 }, { "epoch": 0.33, "learning_rate": 0.0001512105838072473, "loss": 2.3717, "step": 7240 }, { "epoch": 0.33, "learning_rate": 0.00015108624373966429, "loss": 2.3588, "step": 7250 }, { "epoch": 0.33, "learning_rate": 0.00015096179670651713, "loss": 2.3789, "step": 7260 }, { "epoch": 0.33, "learning_rate": 0.00015083724296837588, "loss": 2.3564, "step": 7270 }, { "epoch": 0.33, "learning_rate": 0.00015071258278603412, "loss": 2.3616, "step": 7280 }, { "epoch": 0.33, "learning_rate": 0.00015058781642050817, "loss": 2.3401, "step": 7290 }, { "epoch": 0.33, "learning_rate": 0.00015046294413303678, "loss": 2.3366, "step": 7300 }, { "epoch": 0.34, "learning_rate": 0.00015033796618508044, "loss": 2.378, "step": 7310 }, { "epoch": 0.34, "learning_rate": 0.00015021288283832083, "loss": 2.3506, "step": 7320 }, { "epoch": 0.34, "learning_rate": 0.00015008769435466047, "loss": 2.376, "step": 7330 }, { "epoch": 0.34, "learning_rate": 0.0001499624009962218, "loss": 2.3973, "step": 7340 }, { "epoch": 0.34, "learning_rate": 0.00014983700302534709, "loss": 2.3523, "step": 7350 }, { "epoch": 0.34, "learning_rate": 0.00014971150070459743, "loss": 2.3608, "step": 7360 }, { "epoch": 0.34, "learning_rate": 0.00014958589429675255, "loss": 2.3578, "step": 7370 }, { "epoch": 0.34, "learning_rate": 0.00014946018406481002, "loss": 2.3421, "step": 7380 }, { "epoch": 0.34, "learning_rate": 0.00014933437027198486, "loss": 2.3525, "step": 7390 }, { "epoch": 0.34, "learning_rate": 0.00014920845318170894, "loss": 2.354, "step": 7400 }, { "epoch": 0.34, "learning_rate": 0.00014908243305763034, "loss": 2.3506, "step": 7410 }, { "epoch": 0.34, "learning_rate": 0.00014895631016361298, "loss": 2.3437, "step": 7420 }, { "epoch": 0.34, "learning_rate": 0.00014883008476373585, "loss": 2.3487, "step": 7430 }, { "epoch": 0.34, "learning_rate": 0.0001487037571222927, "loss": 2.3503, "step": 7440 }, { "epoch": 0.34, "learning_rate": 0.0001485773275037912, "loss": 2.337, "step": 7450 }, { "epoch": 0.34, "learning_rate": 0.00014845079617295268, "loss": 2.3461, "step": 7460 }, { "epoch": 0.34, "learning_rate": 0.00014832416339471133, "loss": 2.3407, "step": 7470 }, { "epoch": 0.34, "learning_rate": 0.00014819742943421383, "loss": 2.3621, "step": 7480 }, { "epoch": 0.34, "learning_rate": 0.00014807059455681872, "loss": 2.3459, "step": 7490 }, { "epoch": 0.34, "learning_rate": 0.00014794365902809578, "loss": 2.3394, "step": 7500 }, { "epoch": 0.34, "learning_rate": 0.00014781662311382556, "loss": 2.3649, "step": 7510 }, { "epoch": 0.34, "learning_rate": 0.0001476894870799988, "loss": 2.37, "step": 7520 }, { "epoch": 0.35, "learning_rate": 0.00014756225119281596, "loss": 2.3516, "step": 7530 }, { "epoch": 0.35, "learning_rate": 0.00014743491571868642, "loss": 2.3615, "step": 7540 }, { "epoch": 0.35, "learning_rate": 0.00014730748092422818, "loss": 2.3513, "step": 7550 }, { "epoch": 0.35, "learning_rate": 0.00014717994707626718, "loss": 2.3582, "step": 7560 }, { "epoch": 0.35, "learning_rate": 0.00014705231444183672, "loss": 2.3274, "step": 7570 }, { "epoch": 0.35, "learning_rate": 0.000146924583288177, "loss": 2.3512, "step": 7580 }, { "epoch": 0.35, "learning_rate": 0.0001467967538827345, "loss": 2.341, "step": 7590 }, { "epoch": 0.35, "learning_rate": 0.00014666882649316137, "loss": 2.3581, "step": 7600 }, { "epoch": 0.35, "learning_rate": 0.00014654080138731495, "loss": 2.3414, "step": 7610 }, { "epoch": 0.35, "learning_rate": 0.0001464126788332572, "loss": 2.3381, "step": 7620 }, { "epoch": 0.35, "learning_rate": 0.00014628445909925408, "loss": 2.3557, "step": 7630 }, { "epoch": 0.35, "learning_rate": 0.00014615614245377508, "loss": 2.3574, "step": 7640 }, { "epoch": 0.35, "learning_rate": 0.00014602772916549254, "loss": 2.3636, "step": 7650 }, { "epoch": 0.35, "learning_rate": 0.0001458992195032812, "loss": 2.3606, "step": 7660 }, { "epoch": 0.35, "learning_rate": 0.00014577061373621756, "loss": 2.3495, "step": 7670 }, { "epoch": 0.35, "learning_rate": 0.00014564191213357937, "loss": 2.3618, "step": 7680 }, { "epoch": 0.35, "learning_rate": 0.00014551311496484507, "loss": 2.3608, "step": 7690 }, { "epoch": 0.35, "learning_rate": 0.00014538422249969318, "loss": 2.3514, "step": 7700 }, { "epoch": 0.35, "learning_rate": 0.00014525523500800164, "loss": 2.3364, "step": 7710 }, { "epoch": 0.35, "learning_rate": 0.00014512615275984753, "loss": 2.3579, "step": 7720 }, { "epoch": 0.35, "learning_rate": 0.00014499697602550623, "loss": 2.3583, "step": 7730 }, { "epoch": 0.35, "learning_rate": 0.00014486770507545103, "loss": 2.3518, "step": 7740 }, { "epoch": 0.36, "learning_rate": 0.0001447383401803524, "loss": 2.3524, "step": 7750 }, { "epoch": 0.36, "learning_rate": 0.0001446088816110776, "loss": 2.3529, "step": 7760 }, { "epoch": 0.36, "learning_rate": 0.00014447932963868997, "loss": 2.342, "step": 7770 }, { "epoch": 0.36, "learning_rate": 0.00014434968453444846, "loss": 2.347, "step": 7780 }, { "epoch": 0.36, "learning_rate": 0.00014421994656980698, "loss": 2.3726, "step": 7790 }, { "epoch": 0.36, "learning_rate": 0.00014409011601641388, "loss": 2.3629, "step": 7800 }, { "epoch": 0.36, "learning_rate": 0.00014396019314611143, "loss": 2.3355, "step": 7810 }, { "epoch": 0.36, "learning_rate": 0.00014383017823093515, "loss": 2.344, "step": 7820 }, { "epoch": 0.36, "learning_rate": 0.00014370007154311325, "loss": 2.3544, "step": 7830 }, { "epoch": 0.36, "learning_rate": 0.00014356987335506618, "loss": 2.3461, "step": 7840 }, { "epoch": 0.36, "learning_rate": 0.0001434395839394059, "loss": 2.3396, "step": 7850 }, { "epoch": 0.36, "learning_rate": 0.00014330920356893544, "loss": 2.3526, "step": 7860 }, { "epoch": 0.36, "learning_rate": 0.00014317873251664827, "loss": 2.3637, "step": 7870 }, { "epoch": 0.36, "learning_rate": 0.00014304817105572766, "loss": 2.3332, "step": 7880 }, { "epoch": 0.36, "learning_rate": 0.00014291751945954623, "loss": 2.3436, "step": 7890 }, { "epoch": 0.36, "learning_rate": 0.00014278677800166537, "loss": 2.35, "step": 7900 }, { "epoch": 0.36, "learning_rate": 0.00014265594695583454, "loss": 2.3459, "step": 7910 }, { "epoch": 0.36, "learning_rate": 0.00014252502659599085, "loss": 2.3674, "step": 7920 }, { "epoch": 0.36, "learning_rate": 0.00014239401719625837, "loss": 2.3521, "step": 7930 }, { "epoch": 0.36, "learning_rate": 0.00014226291903094769, "loss": 2.369, "step": 7940 }, { "epoch": 0.36, "learning_rate": 0.0001421317323745551, "loss": 2.331, "step": 7950 }, { "epoch": 0.36, "learning_rate": 0.00014200045750176234, "loss": 2.3348, "step": 7960 }, { "epoch": 0.37, "learning_rate": 0.00014186909468743574, "loss": 2.332, "step": 7970 }, { "epoch": 0.37, "learning_rate": 0.00014173764420662586, "loss": 2.3425, "step": 7980 }, { "epoch": 0.37, "learning_rate": 0.00014160610633456673, "loss": 2.3307, "step": 7990 }, { "epoch": 0.37, "learning_rate": 0.00014147448134667543, "loss": 2.3684, "step": 8000 }, { "epoch": 0.37, "eval_accuracy": 0.5416320601859583, "eval_loss": 2.2055718898773193, "eval_runtime": 10.6267, "eval_samples_per_second": 123.933, "eval_steps_per_second": 1.035, "step": 8000 }, { "epoch": 0.37, "learning_rate": 0.00014134276951855142, "loss": 2.3557, "step": 8010 }, { "epoch": 0.37, "learning_rate": 0.00014121097112597598, "loss": 2.3543, "step": 8020 }, { "epoch": 0.37, "learning_rate": 0.00014107908644491162, "loss": 2.3706, "step": 8030 }, { "epoch": 0.37, "learning_rate": 0.00014094711575150158, "loss": 2.3548, "step": 8040 }, { "epoch": 0.37, "learning_rate": 0.00014081505932206915, "loss": 2.3409, "step": 8050 }, { "epoch": 0.37, "learning_rate": 0.00014068291743311718, "loss": 2.3452, "step": 8060 }, { "epoch": 0.37, "learning_rate": 0.00014055069036132735, "loss": 2.3435, "step": 8070 }, { "epoch": 0.37, "learning_rate": 0.00014041837838355987, "loss": 2.3603, "step": 8080 }, { "epoch": 0.37, "learning_rate": 0.00014028598177685258, "loss": 2.3679, "step": 8090 }, { "epoch": 0.37, "learning_rate": 0.00014015350081842057, "loss": 2.3396, "step": 8100 }, { "epoch": 0.37, "learning_rate": 0.0001400209357856556, "loss": 2.3571, "step": 8110 }, { "epoch": 0.37, "learning_rate": 0.00013988828695612538, "loss": 2.3511, "step": 8120 }, { "epoch": 0.37, "learning_rate": 0.00013975555460757306, "loss": 2.3306, "step": 8130 }, { "epoch": 0.37, "learning_rate": 0.0001396227390179168, "loss": 2.3444, "step": 8140 }, { "epoch": 0.37, "learning_rate": 0.0001394898404652489, "loss": 2.3473, "step": 8150 }, { "epoch": 0.37, "learning_rate": 0.0001393568592278355, "loss": 2.3557, "step": 8160 }, { "epoch": 0.37, "learning_rate": 0.0001392237955841158, "loss": 2.3716, "step": 8170 }, { "epoch": 0.38, "learning_rate": 0.00013909064981270148, "loss": 2.3547, "step": 8180 }, { "epoch": 0.38, "learning_rate": 0.00013895742219237632, "loss": 2.3568, "step": 8190 }, { "epoch": 0.38, "learning_rate": 0.00013882411300209535, "loss": 2.339, "step": 8200 }, { "epoch": 0.38, "learning_rate": 0.0001386907225209845, "loss": 2.3426, "step": 8210 }, { "epoch": 0.38, "learning_rate": 0.00013855725102833983, "loss": 2.3802, "step": 8220 }, { "epoch": 0.38, "learning_rate": 0.00013842369880362706, "loss": 2.3491, "step": 8230 }, { "epoch": 0.38, "learning_rate": 0.00013829006612648098, "loss": 2.3382, "step": 8240 }, { "epoch": 0.38, "learning_rate": 0.00013815635327670472, "loss": 2.358, "step": 8250 }, { "epoch": 0.38, "learning_rate": 0.0001380225605342694, "loss": 2.3452, "step": 8260 }, { "epoch": 0.38, "learning_rate": 0.00013788868817931339, "loss": 2.3629, "step": 8270 }, { "epoch": 0.38, "learning_rate": 0.0001377547364921417, "loss": 2.3447, "step": 8280 }, { "epoch": 0.38, "learning_rate": 0.00013762070575322547, "loss": 2.341, "step": 8290 }, { "epoch": 0.38, "learning_rate": 0.0001374865962432014, "loss": 2.3651, "step": 8300 }, { "epoch": 0.38, "learning_rate": 0.00013735240824287113, "loss": 2.3603, "step": 8310 }, { "epoch": 0.38, "learning_rate": 0.00013721814203320054, "loss": 2.3545, "step": 8320 }, { "epoch": 0.38, "learning_rate": 0.0001370837978953194, "loss": 2.3543, "step": 8330 }, { "epoch": 0.38, "learning_rate": 0.00013694937611052056, "loss": 2.3269, "step": 8340 }, { "epoch": 0.38, "learning_rate": 0.00013681487696025953, "loss": 2.3518, "step": 8350 }, { "epoch": 0.38, "learning_rate": 0.00013668030072615367, "loss": 2.3467, "step": 8360 }, { "epoch": 0.38, "learning_rate": 0.00013654564768998188, "loss": 2.3542, "step": 8370 }, { "epoch": 0.38, "learning_rate": 0.00013641091813368378, "loss": 2.3209, "step": 8380 }, { "epoch": 0.38, "learning_rate": 0.00013627611233935929, "loss": 2.3351, "step": 8390 }, { "epoch": 0.39, "learning_rate": 0.0001361412305892679, "loss": 2.3451, "step": 8400 }, { "epoch": 0.39, "learning_rate": 0.0001360062731658281, "loss": 2.3416, "step": 8410 }, { "epoch": 0.39, "learning_rate": 0.00013587124035161695, "loss": 2.3524, "step": 8420 }, { "epoch": 0.39, "learning_rate": 0.0001357361324293693, "loss": 2.3534, "step": 8430 }, { "epoch": 0.39, "learning_rate": 0.0001356009496819772, "loss": 2.3522, "step": 8440 }, { "epoch": 0.39, "learning_rate": 0.00013546569239248946, "loss": 2.3458, "step": 8450 }, { "epoch": 0.39, "learning_rate": 0.00013533036084411093, "loss": 2.341, "step": 8460 }, { "epoch": 0.39, "learning_rate": 0.00013519495532020193, "loss": 2.3546, "step": 8470 }, { "epoch": 0.39, "learning_rate": 0.00013505947610427773, "loss": 2.3311, "step": 8480 }, { "epoch": 0.39, "learning_rate": 0.00013492392348000783, "loss": 2.3602, "step": 8490 }, { "epoch": 0.39, "learning_rate": 0.00013478829773121547, "loss": 2.3627, "step": 8500 }, { "epoch": 0.39, "learning_rate": 0.00013465259914187698, "loss": 2.3627, "step": 8510 }, { "epoch": 0.39, "learning_rate": 0.0001345168279961213, "loss": 2.3453, "step": 8520 }, { "epoch": 0.39, "learning_rate": 0.00013438098457822908, "loss": 2.3476, "step": 8530 }, { "epoch": 0.39, "learning_rate": 0.00013424506917263253, "loss": 2.342, "step": 8540 }, { "epoch": 0.39, "learning_rate": 0.00013410908206391443, "loss": 2.3435, "step": 8550 }, { "epoch": 0.39, "learning_rate": 0.0001339730235368078, "loss": 2.341, "step": 8560 }, { "epoch": 0.39, "learning_rate": 0.00013383689387619507, "loss": 2.3205, "step": 8570 }, { "epoch": 0.39, "learning_rate": 0.00013370069336710773, "loss": 2.3599, "step": 8580 }, { "epoch": 0.39, "learning_rate": 0.0001335644222947256, "loss": 2.3414, "step": 8590 }, { "epoch": 0.39, "learning_rate": 0.00013342808094437614, "loss": 2.3485, "step": 8600 }, { "epoch": 0.39, "learning_rate": 0.00013329166960153414, "loss": 2.3385, "step": 8610 }, { "epoch": 0.4, "learning_rate": 0.00013315518855182073, "loss": 2.3394, "step": 8620 }, { "epoch": 0.4, "learning_rate": 0.0001330186380810032, "loss": 2.3398, "step": 8630 }, { "epoch": 0.4, "learning_rate": 0.00013288201847499402, "loss": 2.3477, "step": 8640 }, { "epoch": 0.4, "learning_rate": 0.00013274533001985054, "loss": 2.3483, "step": 8650 }, { "epoch": 0.4, "learning_rate": 0.00013260857300177422, "loss": 2.3414, "step": 8660 }, { "epoch": 0.4, "learning_rate": 0.00013247174770711007, "loss": 2.3218, "step": 8670 }, { "epoch": 0.4, "learning_rate": 0.00013233485442234606, "loss": 2.3611, "step": 8680 }, { "epoch": 0.4, "learning_rate": 0.00013219789343411258, "loss": 2.3431, "step": 8690 }, { "epoch": 0.4, "learning_rate": 0.00013206086502918167, "loss": 2.3499, "step": 8700 }, { "epoch": 0.4, "learning_rate": 0.00013192376949446662, "loss": 2.3229, "step": 8710 }, { "epoch": 0.4, "learning_rate": 0.0001317866071170212, "loss": 2.3556, "step": 8720 }, { "epoch": 0.4, "learning_rate": 0.00013164937818403927, "loss": 2.3446, "step": 8730 }, { "epoch": 0.4, "learning_rate": 0.0001315120829828539, "loss": 2.3142, "step": 8740 }, { "epoch": 0.4, "learning_rate": 0.000131374721800937, "loss": 2.3365, "step": 8750 }, { "epoch": 0.4, "learning_rate": 0.00013123729492589857, "loss": 2.3354, "step": 8760 }, { "epoch": 0.4, "learning_rate": 0.0001310998026454862, "loss": 2.3274, "step": 8770 }, { "epoch": 0.4, "learning_rate": 0.00013096224524758446, "loss": 2.3321, "step": 8780 }, { "epoch": 0.4, "learning_rate": 0.00013082462302021417, "loss": 2.3413, "step": 8790 }, { "epoch": 0.4, "learning_rate": 0.000130686936251532, "loss": 2.349, "step": 8800 }, { "epoch": 0.4, "learning_rate": 0.0001305491852298297, "loss": 2.3373, "step": 8810 }, { "epoch": 0.4, "learning_rate": 0.00013041137024353349, "loss": 2.3317, "step": 8820 }, { "epoch": 0.4, "learning_rate": 0.00013027349158120367, "loss": 2.3229, "step": 8830 }, { "epoch": 0.41, "learning_rate": 0.00013013554953153378, "loss": 2.3344, "step": 8840 }, { "epoch": 0.41, "learning_rate": 0.00012999754438335005, "loss": 2.3531, "step": 8850 }, { "epoch": 0.41, "learning_rate": 0.0001298594764256109, "loss": 2.3525, "step": 8860 }, { "epoch": 0.41, "learning_rate": 0.00012972134594740618, "loss": 2.3227, "step": 8870 }, { "epoch": 0.41, "learning_rate": 0.00012958315323795673, "loss": 2.3282, "step": 8880 }, { "epoch": 0.41, "learning_rate": 0.0001294448985866137, "loss": 2.3536, "step": 8890 }, { "epoch": 0.41, "learning_rate": 0.0001293065822828578, "loss": 2.3556, "step": 8900 }, { "epoch": 0.41, "learning_rate": 0.00012916820461629896, "loss": 2.3073, "step": 8910 }, { "epoch": 0.41, "learning_rate": 0.00012902976587667557, "loss": 2.3425, "step": 8920 }, { "epoch": 0.41, "learning_rate": 0.00012889126635385389, "loss": 2.3223, "step": 8930 }, { "epoch": 0.41, "learning_rate": 0.00012875270633782738, "loss": 2.3507, "step": 8940 }, { "epoch": 0.41, "learning_rate": 0.00012861408611871628, "loss": 2.3441, "step": 8950 }, { "epoch": 0.41, "learning_rate": 0.0001284754059867668, "loss": 2.3572, "step": 8960 }, { "epoch": 0.41, "learning_rate": 0.00012833666623235065, "loss": 2.3241, "step": 8970 }, { "epoch": 0.41, "learning_rate": 0.00012819786714596428, "loss": 2.3404, "step": 8980 }, { "epoch": 0.41, "learning_rate": 0.00012805900901822852, "loss": 2.3476, "step": 8990 }, { "epoch": 0.41, "learning_rate": 0.00012792009213988771, "loss": 2.332, "step": 9000 }, { "epoch": 0.41, "learning_rate": 0.00012778111680180923, "loss": 2.3302, "step": 9010 }, { "epoch": 0.41, "learning_rate": 0.0001276420832949829, "loss": 2.3379, "step": 9020 }, { "epoch": 0.41, "learning_rate": 0.0001275029919105203, "loss": 2.3382, "step": 9030 }, { "epoch": 0.41, "learning_rate": 0.00012736384293965416, "loss": 2.34, "step": 9040 }, { "epoch": 0.41, "learning_rate": 0.00012722463667373787, "loss": 2.3448, "step": 9050 }, { "epoch": 0.42, "learning_rate": 0.0001270853734042448, "loss": 2.336, "step": 9060 }, { "epoch": 0.42, "learning_rate": 0.0001269460534227675, "loss": 2.353, "step": 9070 }, { "epoch": 0.42, "learning_rate": 0.00012680667702101743, "loss": 2.3403, "step": 9080 }, { "epoch": 0.42, "learning_rate": 0.00012666724449082416, "loss": 2.3312, "step": 9090 }, { "epoch": 0.42, "learning_rate": 0.00012652775612413477, "loss": 2.3148, "step": 9100 }, { "epoch": 0.42, "learning_rate": 0.0001263882122130132, "loss": 2.3232, "step": 9110 }, { "epoch": 0.42, "learning_rate": 0.00012624861304963978, "loss": 2.3316, "step": 9120 }, { "epoch": 0.42, "learning_rate": 0.00012610895892631042, "loss": 2.3495, "step": 9130 }, { "epoch": 0.42, "learning_rate": 0.00012596925013543623, "loss": 2.347, "step": 9140 }, { "epoch": 0.42, "learning_rate": 0.0001258294869695427, "loss": 2.3412, "step": 9150 }, { "epoch": 0.42, "learning_rate": 0.0001256896697212692, "loss": 2.3574, "step": 9160 }, { "epoch": 0.42, "learning_rate": 0.0001255497986833683, "loss": 2.3262, "step": 9170 }, { "epoch": 0.42, "learning_rate": 0.0001254098741487053, "loss": 2.3389, "step": 9180 }, { "epoch": 0.42, "learning_rate": 0.00012526989641025737, "loss": 2.3373, "step": 9190 }, { "epoch": 0.42, "learning_rate": 0.00012512986576111316, "loss": 2.3419, "step": 9200 }, { "epoch": 0.42, "learning_rate": 0.00012498978249447215, "loss": 2.3333, "step": 9210 }, { "epoch": 0.42, "learning_rate": 0.0001248496469036439, "loss": 2.3359, "step": 9220 }, { "epoch": 0.42, "learning_rate": 0.00012470945928204756, "loss": 2.3154, "step": 9230 }, { "epoch": 0.42, "learning_rate": 0.00012456921992321123, "loss": 2.3446, "step": 9240 }, { "epoch": 0.42, "learning_rate": 0.00012442892912077136, "loss": 2.3326, "step": 9250 }, { "epoch": 0.42, "learning_rate": 0.00012428858716847211, "loss": 2.3385, "step": 9260 }, { "epoch": 0.43, "learning_rate": 0.00012414819436016466, "loss": 2.3426, "step": 9270 }, { "epoch": 0.43, "learning_rate": 0.00012400775098980678, "loss": 2.3453, "step": 9280 }, { "epoch": 0.43, "learning_rate": 0.00012386725735146202, "loss": 2.3093, "step": 9290 }, { "epoch": 0.43, "learning_rate": 0.00012372671373929927, "loss": 2.3348, "step": 9300 }, { "epoch": 0.43, "learning_rate": 0.000123586120447592, "loss": 2.3323, "step": 9310 }, { "epoch": 0.43, "learning_rate": 0.00012344547777071768, "loss": 2.3327, "step": 9320 }, { "epoch": 0.43, "learning_rate": 0.00012330478600315725, "loss": 2.3569, "step": 9330 }, { "epoch": 0.43, "learning_rate": 0.0001231640454394944, "loss": 2.3349, "step": 9340 }, { "epoch": 0.43, "learning_rate": 0.000123023256374415, "loss": 2.3302, "step": 9350 }, { "epoch": 0.43, "learning_rate": 0.0001228824191027064, "loss": 2.3398, "step": 9360 }, { "epoch": 0.43, "learning_rate": 0.00012274153391925703, "loss": 2.3458, "step": 9370 }, { "epoch": 0.43, "learning_rate": 0.00012260060111905552, "loss": 2.3434, "step": 9380 }, { "epoch": 0.43, "learning_rate": 0.00012245962099719025, "loss": 2.3486, "step": 9390 }, { "epoch": 0.43, "learning_rate": 0.00012231859384884865, "loss": 2.3346, "step": 9400 }, { "epoch": 0.43, "learning_rate": 0.00012217751996931671, "loss": 2.3237, "step": 9410 }, { "epoch": 0.43, "learning_rate": 0.00012203639965397815, "loss": 2.3337, "step": 9420 }, { "epoch": 0.43, "learning_rate": 0.00012189523319831396, "loss": 2.3282, "step": 9430 }, { "epoch": 0.43, "learning_rate": 0.00012175402089790175, "loss": 2.3311, "step": 9440 }, { "epoch": 0.43, "learning_rate": 0.00012161276304841512, "loss": 2.3573, "step": 9450 }, { "epoch": 0.43, "learning_rate": 0.00012147145994562305, "loss": 2.3221, "step": 9460 }, { "epoch": 0.43, "learning_rate": 0.00012133011188538925, "loss": 2.3431, "step": 9470 }, { "epoch": 0.43, "learning_rate": 0.00012118871916367153, "loss": 2.3519, "step": 9480 }, { "epoch": 0.44, "learning_rate": 0.00012104728207652134, "loss": 2.3331, "step": 9490 }, { "epoch": 0.44, "learning_rate": 0.0001209058009200829, "loss": 2.3214, "step": 9500 }, { "epoch": 0.44, "learning_rate": 0.0001207642759905927, "loss": 2.3392, "step": 9510 }, { "epoch": 0.44, "learning_rate": 0.00012062270758437899, "loss": 2.339, "step": 9520 }, { "epoch": 0.44, "learning_rate": 0.00012048109599786095, "loss": 2.3393, "step": 9530 }, { "epoch": 0.44, "learning_rate": 0.00012033944152754823, "loss": 2.3193, "step": 9540 }, { "epoch": 0.44, "learning_rate": 0.00012019774447004024, "loss": 2.3344, "step": 9550 }, { "epoch": 0.44, "learning_rate": 0.00012005600512202557, "loss": 2.3298, "step": 9560 }, { "epoch": 0.44, "learning_rate": 0.00011991422378028136, "loss": 2.3538, "step": 9570 }, { "epoch": 0.44, "learning_rate": 0.0001197724007416727, "loss": 2.3357, "step": 9580 }, { "epoch": 0.44, "learning_rate": 0.00011963053630315196, "loss": 2.3265, "step": 9590 }, { "epoch": 0.44, "learning_rate": 0.00011948863076175816, "loss": 2.3273, "step": 9600 }, { "epoch": 0.44, "learning_rate": 0.00011934668441461644, "loss": 2.3729, "step": 9610 }, { "epoch": 0.44, "learning_rate": 0.00011920469755893738, "loss": 2.3381, "step": 9620 }, { "epoch": 0.44, "learning_rate": 0.00011906267049201631, "loss": 2.3267, "step": 9630 }, { "epoch": 0.44, "learning_rate": 0.00011892060351123287, "loss": 2.3404, "step": 9640 }, { "epoch": 0.44, "learning_rate": 0.00011877849691405016, "loss": 2.3367, "step": 9650 }, { "epoch": 0.44, "learning_rate": 0.00011863635099801431, "loss": 2.3371, "step": 9660 }, { "epoch": 0.44, "learning_rate": 0.0001184941660607537, "loss": 2.334, "step": 9670 }, { "epoch": 0.44, "learning_rate": 0.00011835194239997847, "loss": 2.3494, "step": 9680 }, { "epoch": 0.44, "learning_rate": 0.0001182096803134798, "loss": 2.3407, "step": 9690 }, { "epoch": 0.44, "learning_rate": 0.00011806738009912941, "loss": 2.3175, "step": 9700 }, { "epoch": 0.45, "learning_rate": 0.00011792504205487875, "loss": 2.357, "step": 9710 }, { "epoch": 0.45, "learning_rate": 0.00011778266647875853, "loss": 2.3244, "step": 9720 }, { "epoch": 0.45, "learning_rate": 0.00011764025366887808, "loss": 2.358, "step": 9730 }, { "epoch": 0.45, "learning_rate": 0.00011749780392342459, "loss": 2.3311, "step": 9740 }, { "epoch": 0.45, "learning_rate": 0.00011735531754066271, "loss": 2.3168, "step": 9750 }, { "epoch": 0.45, "learning_rate": 0.00011721279481893368, "loss": 2.3429, "step": 9760 }, { "epoch": 0.45, "learning_rate": 0.0001170702360566549, "loss": 2.3225, "step": 9770 }, { "epoch": 0.45, "learning_rate": 0.00011692764155231926, "loss": 2.3359, "step": 9780 }, { "epoch": 0.45, "learning_rate": 0.0001167850116044944, "loss": 2.3153, "step": 9790 }, { "epoch": 0.45, "learning_rate": 0.00011664234651182222, "loss": 2.3389, "step": 9800 }, { "epoch": 0.45, "learning_rate": 0.00011649964657301824, "loss": 2.3372, "step": 9810 }, { "epoch": 0.45, "learning_rate": 0.00011635691208687092, "loss": 2.3199, "step": 9820 }, { "epoch": 0.45, "learning_rate": 0.00011621414335224096, "loss": 2.3124, "step": 9830 }, { "epoch": 0.45, "learning_rate": 0.00011607134066806093, "loss": 2.3238, "step": 9840 }, { "epoch": 0.45, "learning_rate": 0.00011592850433333438, "loss": 2.3227, "step": 9850 }, { "epoch": 0.45, "learning_rate": 0.00011578563464713537, "loss": 2.3387, "step": 9860 }, { "epoch": 0.45, "learning_rate": 0.00011564273190860772, "loss": 2.3425, "step": 9870 }, { "epoch": 0.45, "learning_rate": 0.00011549979641696456, "loss": 2.3328, "step": 9880 }, { "epoch": 0.45, "learning_rate": 0.0001153568284714875, "loss": 2.3353, "step": 9890 }, { "epoch": 0.45, "learning_rate": 0.00011521382837152614, "loss": 2.3266, "step": 9900 }, { "epoch": 0.45, "learning_rate": 0.0001150707964164975, "loss": 2.339, "step": 9910 }, { "epoch": 0.45, "learning_rate": 0.0001149277329058851, "loss": 2.3144, "step": 9920 }, { "epoch": 0.46, "learning_rate": 0.0001147846381392387, "loss": 2.3437, "step": 9930 }, { "epoch": 0.46, "learning_rate": 0.00011464151241617345, "loss": 2.3251, "step": 9940 }, { "epoch": 0.46, "learning_rate": 0.00011449835603636926, "loss": 2.3323, "step": 9950 }, { "epoch": 0.46, "learning_rate": 0.00011435516929957035, "loss": 2.3331, "step": 9960 }, { "epoch": 0.46, "learning_rate": 0.00011421195250558442, "loss": 2.3385, "step": 9970 }, { "epoch": 0.46, "learning_rate": 0.0001140687059542821, "loss": 2.3485, "step": 9980 }, { "epoch": 0.46, "learning_rate": 0.00011392542994559638, "loss": 2.3415, "step": 9990 }, { "epoch": 0.46, "learning_rate": 0.00011378212477952188, "loss": 2.3547, "step": 10000 }, { "epoch": 0.46, "eval_accuracy": 0.5437904654599489, "eval_loss": 2.188946008682251, "eval_runtime": 10.6182, "eval_samples_per_second": 124.033, "eval_steps_per_second": 1.036, "step": 10000 }, { "epoch": 0.46, "learning_rate": 0.00011363879075611426, "loss": 2.3236, "step": 10010 }, { "epoch": 0.46, "learning_rate": 0.00011349542817548966, "loss": 2.3131, "step": 10020 }, { "epoch": 0.46, "learning_rate": 0.00011335203733782396, "loss": 2.3228, "step": 10030 }, { "epoch": 0.46, "learning_rate": 0.00011320861854335226, "loss": 2.3258, "step": 10040 }, { "epoch": 0.46, "learning_rate": 0.00011306517209236814, "loss": 2.3135, "step": 10050 }, { "epoch": 0.46, "learning_rate": 0.00011292169828522313, "loss": 2.3359, "step": 10060 }, { "epoch": 0.46, "learning_rate": 0.000112778197422326, "loss": 2.3428, "step": 10070 }, { "epoch": 0.46, "learning_rate": 0.00011263466980414221, "loss": 2.3336, "step": 10080 }, { "epoch": 0.46, "learning_rate": 0.0001124911157311932, "loss": 2.3513, "step": 10090 }, { "epoch": 0.46, "learning_rate": 0.00011234753550405584, "loss": 2.3385, "step": 10100 }, { "epoch": 0.46, "learning_rate": 0.00011220392942336173, "loss": 2.3467, "step": 10110 }, { "epoch": 0.46, "learning_rate": 0.00011206029778979663, "loss": 2.3522, "step": 10120 }, { "epoch": 0.46, "learning_rate": 0.00011191664090409979, "loss": 2.3512, "step": 10130 }, { "epoch": 0.46, "learning_rate": 0.00011177295906706336, "loss": 2.3318, "step": 10140 }, { "epoch": 0.47, "learning_rate": 0.00011162925257953166, "loss": 2.3364, "step": 10150 }, { "epoch": 0.47, "learning_rate": 0.00011148552174240073, "loss": 2.3263, "step": 10160 }, { "epoch": 0.47, "learning_rate": 0.00011134176685661748, "loss": 2.3265, "step": 10170 }, { "epoch": 0.47, "learning_rate": 0.00011119798822317924, "loss": 2.3343, "step": 10180 }, { "epoch": 0.47, "learning_rate": 0.0001110541861431331, "loss": 2.3139, "step": 10190 }, { "epoch": 0.47, "learning_rate": 0.00011091036091757514, "loss": 2.3179, "step": 10200 }, { "epoch": 0.47, "learning_rate": 0.00011076651284764998, "loss": 2.3462, "step": 10210 }, { "epoch": 0.47, "learning_rate": 0.00011062264223455007, "loss": 2.3327, "step": 10220 }, { "epoch": 0.47, "learning_rate": 0.00011047874937951506, "loss": 2.328, "step": 10230 }, { "epoch": 0.47, "learning_rate": 0.00011033483458383107, "loss": 2.3367, "step": 10240 }, { "epoch": 0.47, "learning_rate": 0.00011019089814883032, "loss": 2.3379, "step": 10250 }, { "epoch": 0.47, "learning_rate": 0.00011004694037589023, "loss": 2.287, "step": 10260 }, { "epoch": 0.47, "learning_rate": 0.00010990296156643294, "loss": 2.3171, "step": 10270 }, { "epoch": 0.47, "learning_rate": 0.00010975896202192462, "loss": 2.3391, "step": 10280 }, { "epoch": 0.47, "learning_rate": 0.00010961494204387486, "loss": 2.3295, "step": 10290 }, { "epoch": 0.47, "learning_rate": 0.00010947090193383604, "loss": 2.3165, "step": 10300 }, { "epoch": 0.47, "learning_rate": 0.00010932684199340268, "loss": 2.3188, "step": 10310 }, { "epoch": 0.47, "learning_rate": 0.00010918276252421087, "loss": 2.3403, "step": 10320 }, { "epoch": 0.47, "learning_rate": 0.0001090386638279375, "loss": 2.3262, "step": 10330 }, { "epoch": 0.47, "learning_rate": 0.0001088945462062998, "loss": 2.3395, "step": 10340 }, { "epoch": 0.47, "learning_rate": 0.00010875040996105459, "loss": 2.3411, "step": 10350 }, { "epoch": 0.47, "learning_rate": 0.00010860625539399767, "loss": 2.3279, "step": 10360 }, { "epoch": 0.48, "learning_rate": 0.00010846208280696324, "loss": 2.3217, "step": 10370 }, { "epoch": 0.48, "learning_rate": 0.0001083178925018232, "loss": 2.3251, "step": 10380 }, { "epoch": 0.48, "learning_rate": 0.00010817368478048657, "loss": 2.3115, "step": 10390 }, { "epoch": 0.48, "learning_rate": 0.00010802945994489886, "loss": 2.3251, "step": 10400 }, { "epoch": 0.48, "learning_rate": 0.00010788521829704132, "loss": 2.3286, "step": 10410 }, { "epoch": 0.48, "learning_rate": 0.00010774096013893049, "loss": 2.3224, "step": 10420 }, { "epoch": 0.48, "learning_rate": 0.00010759668577261746, "loss": 2.3305, "step": 10430 }, { "epoch": 0.48, "learning_rate": 0.00010745239550018725, "loss": 2.3222, "step": 10440 }, { "epoch": 0.48, "learning_rate": 0.00010730808962375817, "loss": 2.3178, "step": 10450 }, { "epoch": 0.48, "learning_rate": 0.00010716376844548126, "loss": 2.3329, "step": 10460 }, { "epoch": 0.48, "learning_rate": 0.00010701943226753953, "loss": 2.3072, "step": 10470 }, { "epoch": 0.48, "learning_rate": 0.00010687508139214739, "loss": 2.3339, "step": 10480 }, { "epoch": 0.48, "learning_rate": 0.0001067307161215501, "loss": 2.3264, "step": 10490 }, { "epoch": 0.48, "learning_rate": 0.00010658633675802301, "loss": 2.3086, "step": 10500 }, { "epoch": 0.48, "learning_rate": 0.00010644194360387096, "loss": 2.3008, "step": 10510 }, { "epoch": 0.48, "learning_rate": 0.0001062975369614277, "loss": 2.3459, "step": 10520 }, { "epoch": 0.48, "learning_rate": 0.00010615311713305525, "loss": 2.3223, "step": 10530 }, { "epoch": 0.48, "learning_rate": 0.00010600868442114315, "loss": 2.321, "step": 10540 }, { "epoch": 0.48, "learning_rate": 0.00010586423912810801, "loss": 2.329, "step": 10550 }, { "epoch": 0.48, "learning_rate": 0.00010571978155639273, "loss": 2.3204, "step": 10560 }, { "epoch": 0.48, "learning_rate": 0.00010557531200846596, "loss": 2.3127, "step": 10570 }, { "epoch": 0.49, "learning_rate": 0.0001054308307868213, "loss": 2.3208, "step": 10580 }, { "epoch": 0.49, "learning_rate": 0.00010528633819397697, "loss": 2.3143, "step": 10590 }, { "epoch": 0.49, "learning_rate": 0.0001051418345324749, "loss": 2.3253, "step": 10600 }, { "epoch": 0.49, "learning_rate": 0.00010499732010488024, "loss": 2.3292, "step": 10610 }, { "epoch": 0.49, "learning_rate": 0.0001048527952137806, "loss": 2.3198, "step": 10620 }, { "epoch": 0.49, "learning_rate": 0.00010470826016178559, "loss": 2.3259, "step": 10630 }, { "epoch": 0.49, "learning_rate": 0.00010456371525152607, "loss": 2.33, "step": 10640 }, { "epoch": 0.49, "learning_rate": 0.0001044191607856535, "loss": 2.3291, "step": 10650 }, { "epoch": 0.49, "learning_rate": 0.0001042745970668394, "loss": 2.3436, "step": 10660 }, { "epoch": 0.49, "learning_rate": 0.00010413002439777466, "loss": 2.32, "step": 10670 }, { "epoch": 0.49, "learning_rate": 0.00010398544308116884, "loss": 2.3209, "step": 10680 }, { "epoch": 0.49, "learning_rate": 0.0001038408534197497, "loss": 2.3243, "step": 10690 }, { "epoch": 0.49, "learning_rate": 0.00010369625571626242, "loss": 2.3334, "step": 10700 }, { "epoch": 0.49, "learning_rate": 0.00010355165027346905, "loss": 2.3407, "step": 10710 }, { "epoch": 0.49, "learning_rate": 0.0001034070373941478, "loss": 2.3382, "step": 10720 }, { "epoch": 0.49, "learning_rate": 0.00010326241738109253, "loss": 2.3267, "step": 10730 }, { "epoch": 0.49, "learning_rate": 0.00010311779053711192, "loss": 2.3239, "step": 10740 }, { "epoch": 0.49, "learning_rate": 0.00010297315716502902, "loss": 2.2937, "step": 10750 }, { "epoch": 0.49, "learning_rate": 0.00010282851756768059, "loss": 2.3524, "step": 10760 }, { "epoch": 0.49, "learning_rate": 0.00010268387204791635, "loss": 2.3297, "step": 10770 }, { "epoch": 0.49, "learning_rate": 0.00010253922090859845, "loss": 2.3087, "step": 10780 }, { "epoch": 0.49, "learning_rate": 0.0001023945644526008, "loss": 2.3399, "step": 10790 }, { "epoch": 0.5, "learning_rate": 0.00010224990298280846, "loss": 2.3008, "step": 10800 }, { "epoch": 0.5, "learning_rate": 0.000102105236802117, "loss": 2.3369, "step": 10810 }, { "epoch": 0.5, "learning_rate": 0.00010196056621343182, "loss": 2.3362, "step": 10820 }, { "epoch": 0.5, "learning_rate": 0.00010181589151966751, "loss": 2.3117, "step": 10830 }, { "epoch": 0.5, "learning_rate": 0.00010167121302374736, "loss": 2.3117, "step": 10840 }, { "epoch": 0.5, "learning_rate": 0.00010152653102860255, "loss": 2.3073, "step": 10850 }, { "epoch": 0.5, "learning_rate": 0.00010138184583717158, "loss": 2.323, "step": 10860 }, { "epoch": 0.5, "learning_rate": 0.00010123715775239975, "loss": 2.3223, "step": 10870 }, { "epoch": 0.5, "learning_rate": 0.0001010924670772382, "loss": 2.3245, "step": 10880 }, { "epoch": 0.5, "learning_rate": 0.00010094777411464373, "loss": 2.306, "step": 10890 }, { "epoch": 0.5, "learning_rate": 0.00010080307916757782, "loss": 2.3155, "step": 10900 }, { "epoch": 0.5, "learning_rate": 0.00010065838253900608, "loss": 2.3591, "step": 10910 }, { "epoch": 0.5, "learning_rate": 0.00010051368453189767, "loss": 2.3251, "step": 10920 }, { "epoch": 0.5, "learning_rate": 0.00010036898544922463, "loss": 2.3192, "step": 10930 }, { "epoch": 0.5, "learning_rate": 0.0001002242855939613, "loss": 2.3133, "step": 10940 }, { "epoch": 0.5, "learning_rate": 0.00010007958526908359, "loss": 2.3151, "step": 10950 }, { "epoch": 0.5, "learning_rate": 9.993488477756839e-05, "loss": 2.3222, "step": 10960 }, { "epoch": 0.5, "learning_rate": 9.979018442239296e-05, "loss": 2.3251, "step": 10970 }, { "epoch": 0.5, "learning_rate": 9.964548450653426e-05, "loss": 2.3065, "step": 10980 }, { "epoch": 0.5, "learning_rate": 9.950078533296835e-05, "loss": 2.3386, "step": 10990 }, { "epoch": 0.5, "learning_rate": 9.935608720466966e-05, "loss": 2.3296, "step": 11000 }, { "epoch": 0.5, "learning_rate": 9.92113904246106e-05, "loss": 2.361, "step": 11010 }, { "epoch": 0.51, "learning_rate": 9.906669529576055e-05, "loss": 2.3285, "step": 11020 }, { "epoch": 0.51, "learning_rate": 9.892200212108561e-05, "loss": 2.3269, "step": 11030 }, { "epoch": 0.51, "learning_rate": 9.877731120354764e-05, "loss": 2.3327, "step": 11040 }, { "epoch": 0.51, "learning_rate": 9.863262284610392e-05, "loss": 2.3421, "step": 11050 }, { "epoch": 0.51, "learning_rate": 9.848793735170624e-05, "loss": 2.3239, "step": 11060 }, { "epoch": 0.51, "learning_rate": 9.834325502330048e-05, "loss": 2.3325, "step": 11070 }, { "epoch": 0.51, "learning_rate": 9.819857616382581e-05, "loss": 2.326, "step": 11080 }, { "epoch": 0.51, "learning_rate": 9.805390107621426e-05, "loss": 2.3081, "step": 11090 }, { "epoch": 0.51, "learning_rate": 9.790923006338982e-05, "loss": 2.3398, "step": 11100 }, { "epoch": 0.51, "learning_rate": 9.776456342826809e-05, "loss": 2.3334, "step": 11110 }, { "epoch": 0.51, "learning_rate": 9.761990147375536e-05, "loss": 2.3268, "step": 11120 }, { "epoch": 0.51, "learning_rate": 9.747524450274826e-05, "loss": 2.3239, "step": 11130 }, { "epoch": 0.51, "learning_rate": 9.733059281813289e-05, "loss": 2.2933, "step": 11140 }, { "epoch": 0.51, "learning_rate": 9.71859467227843e-05, "loss": 2.3225, "step": 11150 }, { "epoch": 0.51, "learning_rate": 9.70413065195658e-05, "loss": 2.3225, "step": 11160 }, { "epoch": 0.51, "learning_rate": 9.68966725113285e-05, "loss": 2.3043, "step": 11170 }, { "epoch": 0.51, "learning_rate": 9.675204500091041e-05, "loss": 2.3365, "step": 11180 }, { "epoch": 0.51, "learning_rate": 9.660742429113592e-05, "loss": 2.3251, "step": 11190 }, { "epoch": 0.51, "learning_rate": 9.646281068481532e-05, "loss": 2.3294, "step": 11200 }, { "epoch": 0.51, "learning_rate": 9.631820448474386e-05, "loss": 2.3111, "step": 11210 }, { "epoch": 0.51, "learning_rate": 9.617360599370143e-05, "loss": 2.3297, "step": 11220 }, { "epoch": 0.51, "learning_rate": 9.602901551445167e-05, "loss": 2.3262, "step": 11230 }, { "epoch": 0.52, "learning_rate": 9.588443334974151e-05, "loss": 2.3244, "step": 11240 }, { "epoch": 0.52, "learning_rate": 9.573985980230037e-05, "loss": 2.3242, "step": 11250 }, { "epoch": 0.52, "learning_rate": 9.559529517483983e-05, "loss": 2.3113, "step": 11260 }, { "epoch": 0.52, "learning_rate": 9.545073977005254e-05, "loss": 2.3404, "step": 11270 }, { "epoch": 0.52, "learning_rate": 9.530619389061207e-05, "loss": 2.3105, "step": 11280 }, { "epoch": 0.52, "learning_rate": 9.516165783917187e-05, "loss": 2.3289, "step": 11290 }, { "epoch": 0.52, "learning_rate": 9.50171319183649e-05, "loss": 2.3197, "step": 11300 }, { "epoch": 0.52, "learning_rate": 9.487261643080292e-05, "loss": 2.3298, "step": 11310 }, { "epoch": 0.52, "learning_rate": 9.472811167907581e-05, "loss": 2.3388, "step": 11320 }, { "epoch": 0.52, "learning_rate": 9.45836179657509e-05, "loss": 2.3218, "step": 11330 }, { "epoch": 0.52, "learning_rate": 9.443913559337261e-05, "loss": 2.3367, "step": 11340 }, { "epoch": 0.52, "learning_rate": 9.429466486446137e-05, "loss": 2.323, "step": 11350 }, { "epoch": 0.52, "learning_rate": 9.415020608151345e-05, "loss": 2.3319, "step": 11360 }, { "epoch": 0.52, "learning_rate": 9.400575954699995e-05, "loss": 2.3079, "step": 11370 }, { "epoch": 0.52, "learning_rate": 9.386132556336635e-05, "loss": 2.2967, "step": 11380 }, { "epoch": 0.52, "learning_rate": 9.371690443303198e-05, "loss": 2.3178, "step": 11390 }, { "epoch": 0.52, "learning_rate": 9.357249645838905e-05, "loss": 2.3233, "step": 11400 }, { "epoch": 0.52, "learning_rate": 9.342810194180241e-05, "loss": 2.3219, "step": 11410 }, { "epoch": 0.52, "learning_rate": 9.32837211856086e-05, "loss": 2.3279, "step": 11420 }, { "epoch": 0.52, "learning_rate": 9.313935449211546e-05, "loss": 2.3196, "step": 11430 }, { "epoch": 0.52, "learning_rate": 9.299500216360125e-05, "loss": 2.3096, "step": 11440 }, { "epoch": 0.52, "learning_rate": 9.28506645023143e-05, "loss": 2.3127, "step": 11450 }, { "epoch": 0.53, "learning_rate": 9.270634181047214e-05, "loss": 2.3342, "step": 11460 }, { "epoch": 0.53, "learning_rate": 9.256203439026096e-05, "loss": 2.3316, "step": 11470 }, { "epoch": 0.53, "learning_rate": 9.241774254383498e-05, "loss": 2.3219, "step": 11480 }, { "epoch": 0.53, "learning_rate": 9.227346657331589e-05, "loss": 2.3233, "step": 11490 }, { "epoch": 0.53, "learning_rate": 9.212920678079195e-05, "loss": 2.3039, "step": 11500 }, { "epoch": 0.53, "learning_rate": 9.198496346831779e-05, "loss": 2.313, "step": 11510 }, { "epoch": 0.53, "learning_rate": 9.184073693791331e-05, "loss": 2.3463, "step": 11520 }, { "epoch": 0.53, "learning_rate": 9.169652749156345e-05, "loss": 2.3184, "step": 11530 }, { "epoch": 0.53, "learning_rate": 9.155233543121725e-05, "loss": 2.324, "step": 11540 }, { "epoch": 0.53, "learning_rate": 9.140816105878742e-05, "loss": 2.3043, "step": 11550 }, { "epoch": 0.53, "learning_rate": 9.126400467614956e-05, "loss": 2.338, "step": 11560 }, { "epoch": 0.53, "learning_rate": 9.111986658514175e-05, "loss": 2.3335, "step": 11570 }, { "epoch": 0.53, "learning_rate": 9.09757470875636e-05, "loss": 2.3216, "step": 11580 }, { "epoch": 0.53, "learning_rate": 9.083164648517584e-05, "loss": 2.3223, "step": 11590 }, { "epoch": 0.53, "learning_rate": 9.068756507969974e-05, "loss": 2.3095, "step": 11600 }, { "epoch": 0.53, "learning_rate": 9.054350317281623e-05, "loss": 2.326, "step": 11610 }, { "epoch": 0.53, "learning_rate": 9.039946106616553e-05, "loss": 2.3118, "step": 11620 }, { "epoch": 0.53, "learning_rate": 9.02554390613463e-05, "loss": 2.3286, "step": 11630 }, { "epoch": 0.53, "learning_rate": 9.011143745991522e-05, "loss": 2.3335, "step": 11640 }, { "epoch": 0.53, "learning_rate": 8.99674565633861e-05, "loss": 2.3235, "step": 11650 }, { "epoch": 0.53, "learning_rate": 8.982349667322957e-05, "loss": 2.3166, "step": 11660 }, { "epoch": 0.54, "learning_rate": 8.967955809087213e-05, "loss": 2.3259, "step": 11670 }, { "epoch": 0.54, "learning_rate": 8.95356411176958e-05, "loss": 2.3218, "step": 11680 }, { "epoch": 0.54, "learning_rate": 8.939174605503719e-05, "loss": 2.3175, "step": 11690 }, { "epoch": 0.54, "learning_rate": 8.924787320418724e-05, "loss": 2.3011, "step": 11700 }, { "epoch": 0.54, "learning_rate": 8.910402286639017e-05, "loss": 2.3041, "step": 11710 }, { "epoch": 0.54, "learning_rate": 8.896019534284321e-05, "loss": 2.3196, "step": 11720 }, { "epoch": 0.54, "learning_rate": 8.881639093469572e-05, "loss": 2.3146, "step": 11730 }, { "epoch": 0.54, "learning_rate": 8.867260994304881e-05, "loss": 2.3137, "step": 11740 }, { "epoch": 0.54, "learning_rate": 8.852885266895433e-05, "loss": 2.3096, "step": 11750 }, { "epoch": 0.54, "learning_rate": 8.838511941341472e-05, "loss": 2.3291, "step": 11760 }, { "epoch": 0.54, "learning_rate": 8.82414104773819e-05, "loss": 2.3255, "step": 11770 }, { "epoch": 0.54, "learning_rate": 8.80977261617571e-05, "loss": 2.3145, "step": 11780 }, { "epoch": 0.54, "learning_rate": 8.795406676738983e-05, "loss": 2.3358, "step": 11790 }, { "epoch": 0.54, "learning_rate": 8.78104325950774e-05, "loss": 2.312, "step": 11800 }, { "epoch": 0.54, "learning_rate": 8.766682394556447e-05, "loss": 2.3181, "step": 11810 }, { "epoch": 0.54, "learning_rate": 8.75232411195421e-05, "loss": 2.325, "step": 11820 }, { "epoch": 0.54, "learning_rate": 8.737968441764738e-05, "loss": 2.3073, "step": 11830 }, { "epoch": 0.54, "learning_rate": 8.723615414046267e-05, "loss": 2.3108, "step": 11840 }, { "epoch": 0.54, "learning_rate": 8.709265058851501e-05, "loss": 2.2938, "step": 11850 }, { "epoch": 0.54, "learning_rate": 8.694917406227541e-05, "loss": 2.3231, "step": 11860 }, { "epoch": 0.54, "learning_rate": 8.680572486215845e-05, "loss": 2.3185, "step": 11870 }, { "epoch": 0.54, "learning_rate": 8.666230328852134e-05, "loss": 2.3233, "step": 11880 }, { "epoch": 0.55, "learning_rate": 8.651890964166352e-05, "loss": 2.3289, "step": 11890 }, { "epoch": 0.55, "learning_rate": 8.637554422182589e-05, "loss": 2.3115, "step": 11900 }, { "epoch": 0.55, "learning_rate": 8.623220732919038e-05, "loss": 2.3082, "step": 11910 }, { "epoch": 0.55, "learning_rate": 8.608889926387903e-05, "loss": 2.3248, "step": 11920 }, { "epoch": 0.55, "learning_rate": 8.594562032595365e-05, "loss": 2.325, "step": 11930 }, { "epoch": 0.55, "learning_rate": 8.580237081541497e-05, "loss": 2.3202, "step": 11940 }, { "epoch": 0.55, "learning_rate": 8.56591510322022e-05, "loss": 2.3097, "step": 11950 }, { "epoch": 0.55, "learning_rate": 8.55159612761922e-05, "loss": 2.3318, "step": 11960 }, { "epoch": 0.55, "learning_rate": 8.537280184719904e-05, "loss": 2.3225, "step": 11970 }, { "epoch": 0.55, "learning_rate": 8.522967304497325e-05, "loss": 2.3159, "step": 11980 }, { "epoch": 0.55, "learning_rate": 8.508657516920117e-05, "loss": 2.3115, "step": 11990 }, { "epoch": 0.55, "learning_rate": 8.494350851950458e-05, "loss": 2.3271, "step": 12000 }, { "epoch": 0.55, "eval_accuracy": 0.5458657409572245, "eval_loss": 2.175852060317993, "eval_runtime": 10.6378, "eval_samples_per_second": 123.804, "eval_steps_per_second": 1.034, "step": 12000 }, { "epoch": 0.55, "learning_rate": 8.480047339543964e-05, "loss": 2.3019, "step": 12010 }, { "epoch": 0.55, "learning_rate": 8.465747009649674e-05, "loss": 2.3274, "step": 12020 }, { "epoch": 0.55, "learning_rate": 8.451449892209938e-05, "loss": 2.2891, "step": 12030 }, { "epoch": 0.55, "learning_rate": 8.437156017160403e-05, "loss": 2.3138, "step": 12040 }, { "epoch": 0.55, "learning_rate": 8.422865414429912e-05, "loss": 2.2956, "step": 12050 }, { "epoch": 0.55, "learning_rate": 8.408578113940463e-05, "loss": 2.3327, "step": 12060 }, { "epoch": 0.55, "learning_rate": 8.394294145607131e-05, "loss": 2.304, "step": 12070 }, { "epoch": 0.55, "learning_rate": 8.380013539338032e-05, "loss": 2.2892, "step": 12080 }, { "epoch": 0.55, "learning_rate": 8.365736325034222e-05, "loss": 2.3222, "step": 12090 }, { "epoch": 0.55, "learning_rate": 8.35146253258967e-05, "loss": 2.3145, "step": 12100 }, { "epoch": 0.56, "learning_rate": 8.33719219189117e-05, "loss": 2.3297, "step": 12110 }, { "epoch": 0.56, "learning_rate": 8.322925332818295e-05, "loss": 2.325, "step": 12120 }, { "epoch": 0.56, "learning_rate": 8.308661985243324e-05, "loss": 2.2909, "step": 12130 }, { "epoch": 0.56, "learning_rate": 8.294402179031192e-05, "loss": 2.3282, "step": 12140 }, { "epoch": 0.56, "learning_rate": 8.280145944039401e-05, "loss": 2.2954, "step": 12150 }, { "epoch": 0.56, "learning_rate": 8.265893310118e-05, "loss": 2.3196, "step": 12160 }, { "epoch": 0.56, "learning_rate": 8.251644307109474e-05, "loss": 2.296, "step": 12170 }, { "epoch": 0.56, "learning_rate": 8.237398964848726e-05, "loss": 2.3298, "step": 12180 }, { "epoch": 0.56, "learning_rate": 8.223157313162981e-05, "loss": 2.3163, "step": 12190 }, { "epoch": 0.56, "learning_rate": 8.208919381871735e-05, "loss": 2.316, "step": 12200 }, { "epoch": 0.56, "learning_rate": 8.194685200786707e-05, "loss": 2.3233, "step": 12210 }, { "epoch": 0.56, "learning_rate": 8.180454799711751e-05, "loss": 2.3365, "step": 12220 }, { "epoch": 0.56, "learning_rate": 8.166228208442817e-05, "loss": 2.3212, "step": 12230 }, { "epoch": 0.56, "learning_rate": 8.152005456767861e-05, "loss": 2.3161, "step": 12240 }, { "epoch": 0.56, "learning_rate": 8.137786574466826e-05, "loss": 2.304, "step": 12250 }, { "epoch": 0.56, "learning_rate": 8.123571591311525e-05, "loss": 2.2967, "step": 12260 }, { "epoch": 0.56, "learning_rate": 8.109360537065632e-05, "loss": 2.323, "step": 12270 }, { "epoch": 0.56, "learning_rate": 8.095153441484571e-05, "loss": 2.3459, "step": 12280 }, { "epoch": 0.56, "learning_rate": 8.080950334315496e-05, "loss": 2.3108, "step": 12290 }, { "epoch": 0.56, "learning_rate": 8.0667512452972e-05, "loss": 2.3404, "step": 12300 }, { "epoch": 0.56, "learning_rate": 8.05255620416007e-05, "loss": 2.3181, "step": 12310 }, { "epoch": 0.56, "learning_rate": 8.038365240626004e-05, "loss": 2.2944, "step": 12320 }, { "epoch": 0.57, "learning_rate": 8.024178384408382e-05, "loss": 2.3162, "step": 12330 }, { "epoch": 0.57, "learning_rate": 8.009995665211963e-05, "loss": 2.2941, "step": 12340 }, { "epoch": 0.57, "learning_rate": 7.995817112732861e-05, "loss": 2.2952, "step": 12350 }, { "epoch": 0.57, "learning_rate": 7.981642756658458e-05, "loss": 2.3299, "step": 12360 }, { "epoch": 0.57, "learning_rate": 7.967472626667348e-05, "loss": 2.3268, "step": 12370 }, { "epoch": 0.57, "learning_rate": 7.953306752429278e-05, "loss": 2.2929, "step": 12380 }, { "epoch": 0.57, "learning_rate": 7.939145163605085e-05, "loss": 2.307, "step": 12390 }, { "epoch": 0.57, "learning_rate": 7.924987889846636e-05, "loss": 2.297, "step": 12400 }, { "epoch": 0.57, "learning_rate": 7.910834960796752e-05, "loss": 2.2928, "step": 12410 }, { "epoch": 0.57, "learning_rate": 7.896686406089175e-05, "loss": 2.3357, "step": 12420 }, { "epoch": 0.57, "learning_rate": 7.882542255348468e-05, "loss": 2.3131, "step": 12430 }, { "epoch": 0.57, "learning_rate": 7.868402538189989e-05, "loss": 2.3266, "step": 12440 }, { "epoch": 0.57, "learning_rate": 7.854267284219805e-05, "loss": 2.3137, "step": 12450 }, { "epoch": 0.57, "learning_rate": 7.84013652303464e-05, "loss": 2.3135, "step": 12460 }, { "epoch": 0.57, "learning_rate": 7.826010284221802e-05, "loss": 2.3032, "step": 12470 }, { "epoch": 0.57, "learning_rate": 7.811888597359149e-05, "loss": 2.3219, "step": 12480 }, { "epoch": 0.57, "learning_rate": 7.79777149201499e-05, "loss": 2.3129, "step": 12490 }, { "epoch": 0.57, "learning_rate": 7.783658997748054e-05, "loss": 2.3234, "step": 12500 }, { "epoch": 0.57, "learning_rate": 7.769551144107403e-05, "loss": 2.3221, "step": 12510 }, { "epoch": 0.57, "learning_rate": 7.755447960632394e-05, "loss": 2.3042, "step": 12520 }, { "epoch": 0.57, "learning_rate": 7.741349476852598e-05, "loss": 2.3078, "step": 12530 }, { "epoch": 0.57, "learning_rate": 7.72725572228775e-05, "loss": 2.3131, "step": 12540 }, { "epoch": 0.58, "learning_rate": 7.713166726447675e-05, "loss": 2.3146, "step": 12550 }, { "epoch": 0.58, "learning_rate": 7.699082518832246e-05, "loss": 2.3017, "step": 12560 }, { "epoch": 0.58, "learning_rate": 7.685003128931299e-05, "loss": 2.3062, "step": 12570 }, { "epoch": 0.58, "learning_rate": 7.670928586224594e-05, "loss": 2.3198, "step": 12580 }, { "epoch": 0.58, "learning_rate": 7.656858920181734e-05, "loss": 2.3028, "step": 12590 }, { "epoch": 0.58, "learning_rate": 7.642794160262106e-05, "loss": 2.3043, "step": 12600 }, { "epoch": 0.58, "learning_rate": 7.628734335914839e-05, "loss": 2.3187, "step": 12610 }, { "epoch": 0.58, "learning_rate": 7.614679476578716e-05, "loss": 2.311, "step": 12620 }, { "epoch": 0.58, "learning_rate": 7.600629611682131e-05, "loss": 2.3086, "step": 12630 }, { "epoch": 0.58, "learning_rate": 7.58658477064301e-05, "loss": 2.3117, "step": 12640 }, { "epoch": 0.58, "learning_rate": 7.572544982868778e-05, "loss": 2.2931, "step": 12650 }, { "epoch": 0.58, "learning_rate": 7.558510277756259e-05, "loss": 2.3126, "step": 12660 }, { "epoch": 0.58, "learning_rate": 7.544480684691654e-05, "loss": 2.3062, "step": 12670 }, { "epoch": 0.58, "learning_rate": 7.530456233050442e-05, "loss": 2.3177, "step": 12680 }, { "epoch": 0.58, "learning_rate": 7.516436952197355e-05, "loss": 2.3034, "step": 12690 }, { "epoch": 0.58, "learning_rate": 7.502422871486282e-05, "loss": 2.3085, "step": 12700 }, { "epoch": 0.58, "learning_rate": 7.488414020260235e-05, "loss": 2.2849, "step": 12710 }, { "epoch": 0.58, "learning_rate": 7.474410427851267e-05, "loss": 2.3199, "step": 12720 }, { "epoch": 0.58, "learning_rate": 7.46041212358043e-05, "loss": 2.2908, "step": 12730 }, { "epoch": 0.58, "learning_rate": 7.446419136757695e-05, "loss": 2.3071, "step": 12740 }, { "epoch": 0.58, "learning_rate": 7.432431496681908e-05, "loss": 2.3305, "step": 12750 }, { "epoch": 0.59, "learning_rate": 7.418449232640708e-05, "loss": 2.3035, "step": 12760 }, { "epoch": 0.59, "learning_rate": 7.404472373910493e-05, "loss": 2.318, "step": 12770 }, { "epoch": 0.59, "learning_rate": 7.390500949756329e-05, "loss": 2.3143, "step": 12780 }, { "epoch": 0.59, "learning_rate": 7.376534989431907e-05, "loss": 2.3345, "step": 12790 }, { "epoch": 0.59, "learning_rate": 7.362574522179484e-05, "loss": 2.3115, "step": 12800 }, { "epoch": 0.59, "learning_rate": 7.348619577229804e-05, "loss": 2.3191, "step": 12810 }, { "epoch": 0.59, "learning_rate": 7.334670183802064e-05, "loss": 2.2909, "step": 12820 }, { "epoch": 0.59, "learning_rate": 7.320726371103817e-05, "loss": 2.3126, "step": 12830 }, { "epoch": 0.59, "learning_rate": 7.306788168330954e-05, "loss": 2.3198, "step": 12840 }, { "epoch": 0.59, "learning_rate": 7.292855604667599e-05, "loss": 2.3077, "step": 12850 }, { "epoch": 0.59, "learning_rate": 7.278928709286082e-05, "loss": 2.3028, "step": 12860 }, { "epoch": 0.59, "learning_rate": 7.265007511346859e-05, "loss": 2.3104, "step": 12870 }, { "epoch": 0.59, "learning_rate": 7.251092039998459e-05, "loss": 2.3405, "step": 12880 }, { "epoch": 0.59, "learning_rate": 7.237182324377412e-05, "loss": 2.3247, "step": 12890 }, { "epoch": 0.59, "learning_rate": 7.223278393608216e-05, "loss": 2.3115, "step": 12900 }, { "epoch": 0.59, "learning_rate": 7.209380276803232e-05, "loss": 2.3153, "step": 12910 }, { "epoch": 0.59, "learning_rate": 7.195488003062671e-05, "loss": 2.3048, "step": 12920 }, { "epoch": 0.59, "learning_rate": 7.18160160147449e-05, "loss": 2.3083, "step": 12930 }, { "epoch": 0.59, "learning_rate": 7.167721101114364e-05, "loss": 2.2995, "step": 12940 }, { "epoch": 0.59, "learning_rate": 7.153846531045605e-05, "loss": 2.3199, "step": 12950 }, { "epoch": 0.59, "learning_rate": 7.139977920319112e-05, "loss": 2.2871, "step": 12960 }, { "epoch": 0.59, "learning_rate": 7.1261152979733e-05, "loss": 2.3065, "step": 12970 }, { "epoch": 0.6, "learning_rate": 7.112258693034058e-05, "loss": 2.3207, "step": 12980 }, { "epoch": 0.6, "learning_rate": 7.09840813451466e-05, "loss": 2.3323, "step": 12990 }, { "epoch": 0.6, "learning_rate": 7.084563651415728e-05, "loss": 2.311, "step": 13000 }, { "epoch": 0.6, "learning_rate": 7.070725272725167e-05, "loss": 2.304, "step": 13010 }, { "epoch": 0.6, "learning_rate": 7.056893027418092e-05, "loss": 2.3209, "step": 13020 }, { "epoch": 0.6, "learning_rate": 7.04306694445678e-05, "loss": 2.277, "step": 13030 }, { "epoch": 0.6, "learning_rate": 7.029247052790605e-05, "loss": 2.3236, "step": 13040 }, { "epoch": 0.6, "learning_rate": 7.015433381355981e-05, "loss": 2.311, "step": 13050 }, { "epoch": 0.6, "learning_rate": 7.001625959076284e-05, "loss": 2.3165, "step": 13060 }, { "epoch": 0.6, "learning_rate": 6.987824814861828e-05, "loss": 2.2844, "step": 13070 }, { "epoch": 0.6, "learning_rate": 6.97402997760976e-05, "loss": 2.3302, "step": 13080 }, { "epoch": 0.6, "learning_rate": 6.960241476204039e-05, "loss": 2.2867, "step": 13090 }, { "epoch": 0.6, "learning_rate": 6.946459339515343e-05, "loss": 2.3083, "step": 13100 }, { "epoch": 0.6, "learning_rate": 6.932683596401032e-05, "loss": 2.3313, "step": 13110 }, { "epoch": 0.6, "learning_rate": 6.918914275705077e-05, "loss": 2.3127, "step": 13120 }, { "epoch": 0.6, "learning_rate": 6.905151406258003e-05, "loss": 2.2841, "step": 13130 }, { "epoch": 0.6, "learning_rate": 6.891395016876823e-05, "loss": 2.2911, "step": 13140 }, { "epoch": 0.6, "learning_rate": 6.87764513636499e-05, "loss": 2.3052, "step": 13150 }, { "epoch": 0.6, "learning_rate": 6.863901793512315e-05, "loss": 2.306, "step": 13160 }, { "epoch": 0.6, "learning_rate": 6.850165017094936e-05, "loss": 2.2995, "step": 13170 }, { "epoch": 0.6, "learning_rate": 6.836434835875234e-05, "loss": 2.3245, "step": 13180 }, { "epoch": 0.6, "learning_rate": 6.822711278601777e-05, "loss": 2.3026, "step": 13190 }, { "epoch": 0.61, "learning_rate": 6.80899437400927e-05, "loss": 2.3274, "step": 13200 }, { "epoch": 0.61, "learning_rate": 6.795284150818489e-05, "loss": 2.3085, "step": 13210 }, { "epoch": 0.61, "learning_rate": 6.781580637736216e-05, "loss": 2.3136, "step": 13220 }, { "epoch": 0.61, "learning_rate": 6.767883863455182e-05, "loss": 2.3212, "step": 13230 }, { "epoch": 0.61, "learning_rate": 6.754193856654016e-05, "loss": 2.2962, "step": 13240 }, { "epoch": 0.61, "learning_rate": 6.74051064599717e-05, "loss": 2.3109, "step": 13250 }, { "epoch": 0.61, "learning_rate": 6.72683426013487e-05, "loss": 2.2984, "step": 13260 }, { "epoch": 0.61, "learning_rate": 6.71316472770305e-05, "loss": 2.3213, "step": 13270 }, { "epoch": 0.61, "learning_rate": 6.699502077323297e-05, "loss": 2.3236, "step": 13280 }, { "epoch": 0.61, "learning_rate": 6.685846337602776e-05, "loss": 2.3148, "step": 13290 }, { "epoch": 0.61, "learning_rate": 6.672197537134204e-05, "loss": 2.2987, "step": 13300 }, { "epoch": 0.61, "learning_rate": 6.658555704495748e-05, "loss": 2.3213, "step": 13310 }, { "epoch": 0.61, "learning_rate": 6.644920868251003e-05, "loss": 2.3146, "step": 13320 }, { "epoch": 0.61, "learning_rate": 6.631293056948895e-05, "loss": 2.3265, "step": 13330 }, { "epoch": 0.61, "learning_rate": 6.617672299123663e-05, "loss": 2.3107, "step": 13340 }, { "epoch": 0.61, "learning_rate": 6.604058623294757e-05, "loss": 2.3034, "step": 13350 }, { "epoch": 0.61, "learning_rate": 6.590452057966816e-05, "loss": 2.3502, "step": 13360 }, { "epoch": 0.61, "learning_rate": 6.576852631629573e-05, "loss": 2.2922, "step": 13370 }, { "epoch": 0.61, "learning_rate": 6.563260372757833e-05, "loss": 2.3409, "step": 13380 }, { "epoch": 0.61, "learning_rate": 6.549675309811379e-05, "loss": 2.3116, "step": 13390 }, { "epoch": 0.61, "learning_rate": 6.536097471234927e-05, "loss": 2.3259, "step": 13400 }, { "epoch": 0.61, "learning_rate": 6.522526885458078e-05, "loss": 2.3194, "step": 13410 }, { "epoch": 0.62, "learning_rate": 6.508963580895233e-05, "loss": 2.3174, "step": 13420 }, { "epoch": 0.62, "learning_rate": 6.495407585945563e-05, "loss": 2.3057, "step": 13430 }, { "epoch": 0.62, "learning_rate": 6.481858928992918e-05, "loss": 2.321, "step": 13440 }, { "epoch": 0.62, "learning_rate": 6.468317638405794e-05, "loss": 2.3236, "step": 13450 }, { "epoch": 0.62, "learning_rate": 6.454783742537255e-05, "loss": 2.2912, "step": 13460 }, { "epoch": 0.62, "learning_rate": 6.441257269724896e-05, "loss": 2.324, "step": 13470 }, { "epoch": 0.62, "learning_rate": 6.427738248290748e-05, "loss": 2.3173, "step": 13480 }, { "epoch": 0.62, "learning_rate": 6.414226706541267e-05, "loss": 2.3069, "step": 13490 }, { "epoch": 0.62, "learning_rate": 6.400722672767216e-05, "loss": 2.3107, "step": 13500 }, { "epoch": 0.62, "learning_rate": 6.387226175243672e-05, "loss": 2.2888, "step": 13510 }, { "epoch": 0.62, "learning_rate": 6.373737242229903e-05, "loss": 2.2753, "step": 13520 }, { "epoch": 0.62, "learning_rate": 6.360255901969354e-05, "loss": 2.308, "step": 13530 }, { "epoch": 0.62, "learning_rate": 6.346782182689566e-05, "loss": 2.3152, "step": 13540 }, { "epoch": 0.62, "learning_rate": 6.333316112602132e-05, "loss": 2.319, "step": 13550 }, { "epoch": 0.62, "learning_rate": 6.319857719902614e-05, "loss": 2.3116, "step": 13560 }, { "epoch": 0.62, "learning_rate": 6.306407032770516e-05, "loss": 2.296, "step": 13570 }, { "epoch": 0.62, "learning_rate": 6.292964079369192e-05, "loss": 2.3127, "step": 13580 }, { "epoch": 0.62, "learning_rate": 6.279528887845815e-05, "loss": 2.304, "step": 13590 }, { "epoch": 0.62, "learning_rate": 6.266101486331302e-05, "loss": 2.3073, "step": 13600 }, { "epoch": 0.62, "learning_rate": 6.252681902940252e-05, "loss": 2.3044, "step": 13610 }, { "epoch": 0.62, "learning_rate": 6.239270165770908e-05, "loss": 2.3053, "step": 13620 }, { "epoch": 0.62, "learning_rate": 6.225866302905066e-05, "loss": 2.3229, "step": 13630 }, { "epoch": 0.63, "learning_rate": 6.212470342408058e-05, "loss": 2.3171, "step": 13640 }, { "epoch": 0.63, "learning_rate": 6.19908231232865e-05, "loss": 2.3066, "step": 13650 }, { "epoch": 0.63, "learning_rate": 6.185702240699014e-05, "loss": 2.3237, "step": 13660 }, { "epoch": 0.63, "learning_rate": 6.172330155534651e-05, "loss": 2.2863, "step": 13670 }, { "epoch": 0.63, "learning_rate": 6.158966084834348e-05, "loss": 2.308, "step": 13680 }, { "epoch": 0.63, "learning_rate": 6.145610056580105e-05, "loss": 2.3156, "step": 13690 }, { "epoch": 0.63, "learning_rate": 6.132262098737083e-05, "loss": 2.3101, "step": 13700 }, { "epoch": 0.63, "learning_rate": 6.118922239253544e-05, "loss": 2.3016, "step": 13710 }, { "epoch": 0.63, "learning_rate": 6.105590506060806e-05, "loss": 2.317, "step": 13720 }, { "epoch": 0.63, "learning_rate": 6.092266927073147e-05, "loss": 2.3091, "step": 13730 }, { "epoch": 0.63, "learning_rate": 6.0789515301877995e-05, "loss": 2.3037, "step": 13740 }, { "epoch": 0.63, "learning_rate": 6.065644343284844e-05, "loss": 2.2933, "step": 13750 }, { "epoch": 0.63, "learning_rate": 6.052345394227178e-05, "loss": 2.3054, "step": 13760 }, { "epoch": 0.63, "learning_rate": 6.039054710860451e-05, "loss": 2.32, "step": 13770 }, { "epoch": 0.63, "learning_rate": 6.025772321013008e-05, "loss": 2.3083, "step": 13780 }, { "epoch": 0.63, "learning_rate": 6.0124982524958205e-05, "loss": 2.321, "step": 13790 }, { "epoch": 0.63, "learning_rate": 5.999232533102439e-05, "loss": 2.3379, "step": 13800 }, { "epoch": 0.63, "learning_rate": 5.985975190608946e-05, "loss": 2.3056, "step": 13810 }, { "epoch": 0.63, "learning_rate": 5.9727262527738595e-05, "loss": 2.2938, "step": 13820 }, { "epoch": 0.63, "learning_rate": 5.959485747338126e-05, "loss": 2.2844, "step": 13830 }, { "epoch": 0.63, "learning_rate": 5.9462537020250155e-05, "loss": 2.2905, "step": 13840 }, { "epoch": 0.64, "learning_rate": 5.933030144540094e-05, "loss": 2.301, "step": 13850 }, { "epoch": 0.64, "learning_rate": 5.9198151025711534e-05, "loss": 2.3103, "step": 13860 }, { "epoch": 0.64, "learning_rate": 5.90660860378816e-05, "loss": 2.2778, "step": 13870 }, { "epoch": 0.64, "learning_rate": 5.893410675843176e-05, "loss": 2.2887, "step": 13880 }, { "epoch": 0.64, "learning_rate": 5.880221346370345e-05, "loss": 2.3046, "step": 13890 }, { "epoch": 0.64, "learning_rate": 5.867040642985777e-05, "loss": 2.3042, "step": 13900 }, { "epoch": 0.64, "learning_rate": 5.853868593287548e-05, "loss": 2.3081, "step": 13910 }, { "epoch": 0.64, "learning_rate": 5.840705224855592e-05, "loss": 2.3084, "step": 13920 }, { "epoch": 0.64, "learning_rate": 5.8275505652516806e-05, "loss": 2.3106, "step": 13930 }, { "epoch": 0.64, "learning_rate": 5.814404642019341e-05, "loss": 2.3253, "step": 13940 }, { "epoch": 0.64, "learning_rate": 5.801267482683821e-05, "loss": 2.3085, "step": 13950 }, { "epoch": 0.64, "learning_rate": 5.788139114752005e-05, "loss": 2.3029, "step": 13960 }, { "epoch": 0.64, "learning_rate": 5.7750195657123743e-05, "loss": 2.3, "step": 13970 }, { "epoch": 0.64, "learning_rate": 5.7619088630349416e-05, "loss": 2.3055, "step": 13980 }, { "epoch": 0.64, "learning_rate": 5.7488070341712106e-05, "loss": 2.3151, "step": 13990 }, { "epoch": 0.64, "learning_rate": 5.73571410655409e-05, "loss": 2.3153, "step": 14000 }, { "epoch": 0.64, "eval_accuracy": 0.547557283467343, "eval_loss": 2.1664211750030518, "eval_runtime": 10.6183, "eval_samples_per_second": 124.031, "eval_steps_per_second": 1.036, "step": 14000 }, { "epoch": 0.64, "learning_rate": 5.7226301075978504e-05, "loss": 2.2999, "step": 14010 }, { "epoch": 0.64, "learning_rate": 5.7095550646980834e-05, "loss": 2.3314, "step": 14020 }, { "epoch": 0.64, "learning_rate": 5.696489005231607e-05, "loss": 2.3378, "step": 14030 }, { "epoch": 0.64, "learning_rate": 5.6834319565564506e-05, "loss": 2.3276, "step": 14040 }, { "epoch": 0.64, "learning_rate": 5.670383946011757e-05, "loss": 2.3035, "step": 14050 }, { "epoch": 0.64, "learning_rate": 5.6573450009177595e-05, "loss": 2.3095, "step": 14060 }, { "epoch": 0.65, "learning_rate": 5.6443151485757005e-05, "loss": 2.3013, "step": 14070 }, { "epoch": 0.65, "learning_rate": 5.631294416267794e-05, "loss": 2.3074, "step": 14080 }, { "epoch": 0.65, "learning_rate": 5.6182828312571424e-05, "loss": 2.3108, "step": 14090 }, { "epoch": 0.65, "learning_rate": 5.605280420787715e-05, "loss": 2.3163, "step": 14100 }, { "epoch": 0.65, "learning_rate": 5.592287212084251e-05, "loss": 2.2957, "step": 14110 }, { "epoch": 0.65, "learning_rate": 5.579303232352241e-05, "loss": 2.307, "step": 14120 }, { "epoch": 0.65, "learning_rate": 5.56632850877784e-05, "loss": 2.294, "step": 14130 }, { "epoch": 0.65, "learning_rate": 5.553363068527826e-05, "loss": 2.3065, "step": 14140 }, { "epoch": 0.65, "learning_rate": 5.5404069387495336e-05, "loss": 2.3169, "step": 14150 }, { "epoch": 0.65, "learning_rate": 5.52746014657082e-05, "loss": 2.3231, "step": 14160 }, { "epoch": 0.65, "learning_rate": 5.514522719099967e-05, "loss": 2.3279, "step": 14170 }, { "epoch": 0.65, "learning_rate": 5.5015946834256704e-05, "loss": 2.3173, "step": 14180 }, { "epoch": 0.65, "learning_rate": 5.488676066616949e-05, "loss": 2.3125, "step": 14190 }, { "epoch": 0.65, "learning_rate": 5.4757668957230977e-05, "loss": 2.3106, "step": 14200 }, { "epoch": 0.65, "learning_rate": 5.462867197773647e-05, "loss": 2.3074, "step": 14210 }, { "epoch": 0.65, "learning_rate": 5.4499769997782746e-05, "loss": 2.301, "step": 14220 }, { "epoch": 0.65, "learning_rate": 5.4370963287267874e-05, "loss": 2.2743, "step": 14230 }, { "epoch": 0.65, "learning_rate": 5.424225211589024e-05, "loss": 2.314, "step": 14240 }, { "epoch": 0.65, "learning_rate": 5.411363675314836e-05, "loss": 2.2887, "step": 14250 }, { "epoch": 0.65, "learning_rate": 5.3985117468339986e-05, "loss": 2.3154, "step": 14260 }, { "epoch": 0.65, "learning_rate": 5.385669453056189e-05, "loss": 2.293, "step": 14270 }, { "epoch": 0.65, "learning_rate": 5.37283682087089e-05, "loss": 2.3178, "step": 14280 }, { "epoch": 0.66, "learning_rate": 5.360013877147374e-05, "loss": 2.3154, "step": 14290 }, { "epoch": 0.66, "learning_rate": 5.347200648734614e-05, "loss": 2.3046, "step": 14300 }, { "epoch": 0.66, "learning_rate": 5.3343971624612446e-05, "loss": 2.3247, "step": 14310 }, { "epoch": 0.66, "learning_rate": 5.3216034451355004e-05, "loss": 2.3111, "step": 14320 }, { "epoch": 0.66, "learning_rate": 5.30881952354517e-05, "loss": 2.319, "step": 14330 }, { "epoch": 0.66, "learning_rate": 5.296045424457521e-05, "loss": 2.297, "step": 14340 }, { "epoch": 0.66, "learning_rate": 5.2832811746192635e-05, "loss": 2.3038, "step": 14350 }, { "epoch": 0.66, "learning_rate": 5.270526800756475e-05, "loss": 2.2795, "step": 14360 }, { "epoch": 0.66, "learning_rate": 5.2577823295745646e-05, "loss": 2.3192, "step": 14370 }, { "epoch": 0.66, "learning_rate": 5.245047787758197e-05, "loss": 2.323, "step": 14380 }, { "epoch": 0.66, "learning_rate": 5.232323201971257e-05, "loss": 2.2988, "step": 14390 }, { "epoch": 0.66, "learning_rate": 5.219608598856778e-05, "loss": 2.3088, "step": 14400 }, { "epoch": 0.66, "learning_rate": 5.206904005036884e-05, "loss": 2.2934, "step": 14410 }, { "epoch": 0.66, "learning_rate": 5.194209447112759e-05, "loss": 2.2837, "step": 14420 }, { "epoch": 0.66, "learning_rate": 5.1815249516645536e-05, "loss": 2.3019, "step": 14430 }, { "epoch": 0.66, "learning_rate": 5.168850545251367e-05, "loss": 2.312, "step": 14440 }, { "epoch": 0.66, "learning_rate": 5.156186254411159e-05, "loss": 2.2882, "step": 14450 }, { "epoch": 0.66, "learning_rate": 5.143532105660724e-05, "loss": 2.3297, "step": 14460 }, { "epoch": 0.66, "learning_rate": 5.13088812549561e-05, "loss": 2.3327, "step": 14470 }, { "epoch": 0.66, "learning_rate": 5.118254340390072e-05, "loss": 2.313, "step": 14480 }, { "epoch": 0.66, "learning_rate": 5.1056307767970235e-05, "loss": 2.2993, "step": 14490 }, { "epoch": 0.66, "learning_rate": 5.09301746114798e-05, "loss": 2.3118, "step": 14500 }, { "epoch": 0.67, "learning_rate": 5.080414419852988e-05, "loss": 2.3066, "step": 14510 }, { "epoch": 0.67, "learning_rate": 5.067821679300597e-05, "loss": 2.3277, "step": 14520 }, { "epoch": 0.67, "learning_rate": 5.055239265857772e-05, "loss": 2.3103, "step": 14530 }, { "epoch": 0.67, "learning_rate": 5.0426672058698685e-05, "loss": 2.3147, "step": 14540 }, { "epoch": 0.67, "learning_rate": 5.030105525660554e-05, "loss": 2.3035, "step": 14550 }, { "epoch": 0.67, "learning_rate": 5.017554251531768e-05, "loss": 2.2993, "step": 14560 }, { "epoch": 0.67, "learning_rate": 5.005013409763657e-05, "loss": 2.2943, "step": 14570 }, { "epoch": 0.67, "learning_rate": 4.992483026614533e-05, "loss": 2.3149, "step": 14580 }, { "epoch": 0.67, "learning_rate": 4.9799631283207995e-05, "loss": 2.3263, "step": 14590 }, { "epoch": 0.67, "learning_rate": 4.967453741096907e-05, "loss": 2.3054, "step": 14600 }, { "epoch": 0.67, "learning_rate": 4.954954891135308e-05, "loss": 2.2943, "step": 14610 }, { "epoch": 0.67, "learning_rate": 4.9424666046063795e-05, "loss": 2.2874, "step": 14620 }, { "epoch": 0.67, "learning_rate": 4.92998890765839e-05, "loss": 2.3065, "step": 14630 }, { "epoch": 0.67, "learning_rate": 4.9175218264174315e-05, "loss": 2.2987, "step": 14640 }, { "epoch": 0.67, "learning_rate": 4.905065386987365e-05, "loss": 2.3038, "step": 14650 }, { "epoch": 0.67, "learning_rate": 4.8926196154497713e-05, "loss": 2.2866, "step": 14660 }, { "epoch": 0.67, "learning_rate": 4.8801845378639036e-05, "loss": 2.2866, "step": 14670 }, { "epoch": 0.67, "learning_rate": 4.8677601802666065e-05, "loss": 2.3225, "step": 14680 }, { "epoch": 0.67, "learning_rate": 4.8553465686722976e-05, "loss": 2.3008, "step": 14690 }, { "epoch": 0.67, "learning_rate": 4.8429437290728774e-05, "loss": 2.3137, "step": 14700 }, { "epoch": 0.67, "learning_rate": 4.8305516874377086e-05, "loss": 2.3078, "step": 14710 }, { "epoch": 0.67, "learning_rate": 4.8181704697135265e-05, "loss": 2.3098, "step": 14720 }, { "epoch": 0.68, "learning_rate": 4.8058001018244204e-05, "loss": 2.3152, "step": 14730 }, { "epoch": 0.68, "learning_rate": 4.793440609671748e-05, "loss": 2.2998, "step": 14740 }, { "epoch": 0.68, "learning_rate": 4.781092019134107e-05, "loss": 2.309, "step": 14750 }, { "epoch": 0.68, "learning_rate": 4.768754356067255e-05, "loss": 2.2867, "step": 14760 }, { "epoch": 0.68, "learning_rate": 4.756427646304088e-05, "loss": 2.3157, "step": 14770 }, { "epoch": 0.68, "learning_rate": 4.744111915654551e-05, "loss": 2.3163, "step": 14780 }, { "epoch": 0.68, "learning_rate": 4.731807189905608e-05, "loss": 2.2974, "step": 14790 }, { "epoch": 0.68, "learning_rate": 4.719513494821176e-05, "loss": 2.3136, "step": 14800 }, { "epoch": 0.68, "learning_rate": 4.7072308561420866e-05, "loss": 2.3072, "step": 14810 }, { "epoch": 0.68, "learning_rate": 4.6949592995860106e-05, "loss": 2.3148, "step": 14820 }, { "epoch": 0.68, "learning_rate": 4.6826988508474134e-05, "loss": 2.3122, "step": 14830 }, { "epoch": 0.68, "learning_rate": 4.6704495355975164e-05, "loss": 2.3239, "step": 14840 }, { "epoch": 0.68, "learning_rate": 4.658211379484213e-05, "loss": 2.3139, "step": 14850 }, { "epoch": 0.68, "learning_rate": 4.645984408132045e-05, "loss": 2.3075, "step": 14860 }, { "epoch": 0.68, "learning_rate": 4.6337686471421225e-05, "loss": 2.2895, "step": 14870 }, { "epoch": 0.68, "learning_rate": 4.621564122092097e-05, "loss": 2.3127, "step": 14880 }, { "epoch": 0.68, "learning_rate": 4.60937085853608e-05, "loss": 2.3051, "step": 14890 }, { "epoch": 0.68, "learning_rate": 4.597188882004616e-05, "loss": 2.3095, "step": 14900 }, { "epoch": 0.68, "learning_rate": 4.585018218004603e-05, "loss": 2.3097, "step": 14910 }, { "epoch": 0.68, "learning_rate": 4.5728588920192696e-05, "loss": 2.3249, "step": 14920 }, { "epoch": 0.68, "learning_rate": 4.5607109295080865e-05, "loss": 2.3014, "step": 14930 }, { "epoch": 0.68, "learning_rate": 4.548574355906746e-05, "loss": 2.311, "step": 14940 }, { "epoch": 0.69, "learning_rate": 4.5364491966270864e-05, "loss": 2.2995, "step": 14950 }, { "epoch": 0.69, "learning_rate": 4.5243354770570465e-05, "loss": 2.2954, "step": 14960 }, { "epoch": 0.69, "learning_rate": 4.51223322256061e-05, "loss": 2.32, "step": 14970 }, { "epoch": 0.69, "learning_rate": 4.5001424584777674e-05, "loss": 2.2899, "step": 14980 }, { "epoch": 0.69, "learning_rate": 4.488063210124436e-05, "loss": 2.3119, "step": 14990 }, { "epoch": 0.69, "learning_rate": 4.475995502792424e-05, "loss": 2.3186, "step": 15000 }, { "epoch": 0.69, "learning_rate": 4.4639393617493855e-05, "loss": 2.2999, "step": 15010 }, { "epoch": 0.69, "learning_rate": 4.4518948122387394e-05, "loss": 2.3013, "step": 15020 }, { "epoch": 0.69, "learning_rate": 4.439861879479652e-05, "loss": 2.3055, "step": 15030 }, { "epoch": 0.69, "learning_rate": 4.4278405886669495e-05, "loss": 2.3014, "step": 15040 }, { "epoch": 0.69, "learning_rate": 4.415830964971098e-05, "loss": 2.2959, "step": 15050 }, { "epoch": 0.69, "learning_rate": 4.4038330335381195e-05, "loss": 2.3173, "step": 15060 }, { "epoch": 0.69, "learning_rate": 4.3918468194895677e-05, "loss": 2.2939, "step": 15070 }, { "epoch": 0.69, "learning_rate": 4.379872347922449e-05, "loss": 2.2893, "step": 15080 }, { "epoch": 0.69, "learning_rate": 4.3679096439091996e-05, "loss": 2.3129, "step": 15090 }, { "epoch": 0.69, "learning_rate": 4.355958732497597e-05, "loss": 2.3153, "step": 15100 }, { "epoch": 0.69, "learning_rate": 4.3440196387107454e-05, "loss": 2.29, "step": 15110 }, { "epoch": 0.69, "learning_rate": 4.332092387546994e-05, "loss": 2.2898, "step": 15120 }, { "epoch": 0.69, "learning_rate": 4.320177003979898e-05, "loss": 2.278, "step": 15130 }, { "epoch": 0.69, "learning_rate": 4.308273512958161e-05, "loss": 2.3059, "step": 15140 }, { "epoch": 0.69, "learning_rate": 4.296381939405595e-05, "loss": 2.2969, "step": 15150 }, { "epoch": 0.7, "learning_rate": 4.284502308221047e-05, "loss": 2.2898, "step": 15160 }, { "epoch": 0.7, "learning_rate": 4.272634644278372e-05, "loss": 2.321, "step": 15170 }, { "epoch": 0.7, "learning_rate": 4.260778972426352e-05, "loss": 2.308, "step": 15180 }, { "epoch": 0.7, "learning_rate": 4.248935317488678e-05, "loss": 2.2984, "step": 15190 }, { "epoch": 0.7, "learning_rate": 4.237103704263864e-05, "loss": 2.3131, "step": 15200 }, { "epoch": 0.7, "learning_rate": 4.2252841575252135e-05, "loss": 2.3112, "step": 15210 }, { "epoch": 0.7, "learning_rate": 4.2134767020207765e-05, "loss": 2.3169, "step": 15220 }, { "epoch": 0.7, "learning_rate": 4.201681362473269e-05, "loss": 2.3182, "step": 15230 }, { "epoch": 0.7, "learning_rate": 4.189898163580055e-05, "loss": 2.3171, "step": 15240 }, { "epoch": 0.7, "learning_rate": 4.178127130013062e-05, "loss": 2.3055, "step": 15250 }, { "epoch": 0.7, "learning_rate": 4.1663682864187624e-05, "loss": 2.3057, "step": 15260 }, { "epoch": 0.7, "learning_rate": 4.1546216574180876e-05, "loss": 2.3231, "step": 15270 }, { "epoch": 0.7, "learning_rate": 4.14288726760641e-05, "loss": 2.3223, "step": 15280 }, { "epoch": 0.7, "learning_rate": 4.1311651415534644e-05, "loss": 2.3112, "step": 15290 }, { "epoch": 0.7, "learning_rate": 4.11945530380331e-05, "loss": 2.2802, "step": 15300 }, { "epoch": 0.7, "learning_rate": 4.107757778874274e-05, "loss": 2.3204, "step": 15310 }, { "epoch": 0.7, "learning_rate": 4.096072591258913e-05, "loss": 2.3151, "step": 15320 }, { "epoch": 0.7, "learning_rate": 4.0843997654239374e-05, "loss": 2.3136, "step": 15330 }, { "epoch": 0.7, "learning_rate": 4.0727393258101876e-05, "loss": 2.3046, "step": 15340 }, { "epoch": 0.7, "learning_rate": 4.0610912968325565e-05, "loss": 2.3124, "step": 15350 }, { "epoch": 0.7, "learning_rate": 4.049455702879965e-05, "loss": 2.2923, "step": 15360 }, { "epoch": 0.7, "learning_rate": 4.0378325683152815e-05, "loss": 2.2849, "step": 15370 }, { "epoch": 0.71, "learning_rate": 4.0262219174753055e-05, "loss": 2.294, "step": 15380 }, { "epoch": 0.71, "learning_rate": 4.014623774670677e-05, "loss": 2.325, "step": 15390 }, { "epoch": 0.71, "learning_rate": 4.0030381641858674e-05, "loss": 2.3117, "step": 15400 }, { "epoch": 0.71, "learning_rate": 3.99146511027909e-05, "loss": 2.3086, "step": 15410 }, { "epoch": 0.71, "learning_rate": 3.9799046371822726e-05, "loss": 2.302, "step": 15420 }, { "epoch": 0.71, "learning_rate": 3.968356769101008e-05, "loss": 2.3094, "step": 15430 }, { "epoch": 0.71, "learning_rate": 3.9568215302144885e-05, "loss": 2.2963, "step": 15440 }, { "epoch": 0.71, "learning_rate": 3.945298944675462e-05, "loss": 2.3276, "step": 15450 }, { "epoch": 0.71, "learning_rate": 3.933789036610192e-05, "loss": 2.3057, "step": 15460 }, { "epoch": 0.71, "learning_rate": 3.9222918301183885e-05, "loss": 2.2968, "step": 15470 }, { "epoch": 0.71, "learning_rate": 3.9108073492731654e-05, "loss": 2.2894, "step": 15480 }, { "epoch": 0.71, "learning_rate": 3.8993356181210025e-05, "loss": 2.3068, "step": 15490 }, { "epoch": 0.71, "learning_rate": 3.8878766606816705e-05, "loss": 2.3008, "step": 15500 }, { "epoch": 0.71, "learning_rate": 3.8764305009482084e-05, "loss": 2.2847, "step": 15510 }, { "epoch": 0.71, "learning_rate": 3.864997162886844e-05, "loss": 2.3015, "step": 15520 }, { "epoch": 0.71, "learning_rate": 3.853576670436973e-05, "loss": 2.3248, "step": 15530 }, { "epoch": 0.71, "learning_rate": 3.842169047511079e-05, "loss": 2.3129, "step": 15540 }, { "epoch": 0.71, "learning_rate": 3.830774317994717e-05, "loss": 2.2694, "step": 15550 }, { "epoch": 0.71, "learning_rate": 3.819392505746431e-05, "loss": 2.2874, "step": 15560 }, { "epoch": 0.71, "learning_rate": 3.808023634597728e-05, "loss": 2.3114, "step": 15570 }, { "epoch": 0.71, "learning_rate": 3.7966677283530083e-05, "loss": 2.2998, "step": 15580 }, { "epoch": 0.71, "learning_rate": 3.78532481078954e-05, "loss": 2.2926, "step": 15590 }, { "epoch": 0.72, "learning_rate": 3.773994905657384e-05, "loss": 2.309, "step": 15600 }, { "epoch": 0.72, "learning_rate": 3.762678036679358e-05, "loss": 2.3068, "step": 15610 }, { "epoch": 0.72, "learning_rate": 3.751374227550982e-05, "loss": 2.3137, "step": 15620 }, { "epoch": 0.72, "learning_rate": 3.740083501940441e-05, "loss": 2.3001, "step": 15630 }, { "epoch": 0.72, "learning_rate": 3.7288058834885165e-05, "loss": 2.3113, "step": 15640 }, { "epoch": 0.72, "learning_rate": 3.717541395808542e-05, "loss": 2.3245, "step": 15650 }, { "epoch": 0.72, "learning_rate": 3.706290062486374e-05, "loss": 2.3196, "step": 15660 }, { "epoch": 0.72, "learning_rate": 3.695051907080306e-05, "loss": 2.3048, "step": 15670 }, { "epoch": 0.72, "learning_rate": 3.6838269531210565e-05, "loss": 2.2982, "step": 15680 }, { "epoch": 0.72, "learning_rate": 3.672615224111688e-05, "loss": 2.3063, "step": 15690 }, { "epoch": 0.72, "learning_rate": 3.661416743527587e-05, "loss": 2.3045, "step": 15700 }, { "epoch": 0.72, "learning_rate": 3.650231534816384e-05, "loss": 2.3311, "step": 15710 }, { "epoch": 0.72, "learning_rate": 3.6390596213979366e-05, "loss": 2.2962, "step": 15720 }, { "epoch": 0.72, "learning_rate": 3.62790102666425e-05, "loss": 2.2972, "step": 15730 }, { "epoch": 0.72, "learning_rate": 3.6167557739794556e-05, "loss": 2.2845, "step": 15740 }, { "epoch": 0.72, "learning_rate": 3.6056238866797356e-05, "loss": 2.3186, "step": 15750 }, { "epoch": 0.72, "learning_rate": 3.5945053880733e-05, "loss": 2.297, "step": 15760 }, { "epoch": 0.72, "learning_rate": 3.5834003014403197e-05, "loss": 2.3066, "step": 15770 }, { "epoch": 0.72, "learning_rate": 3.572308650032878e-05, "loss": 2.2711, "step": 15780 }, { "epoch": 0.72, "learning_rate": 3.5612304570749335e-05, "loss": 2.3067, "step": 15790 }, { "epoch": 0.72, "learning_rate": 3.5501657457622684e-05, "loss": 2.2966, "step": 15800 }, { "epoch": 0.72, "learning_rate": 3.5391145392624305e-05, "loss": 2.3022, "step": 15810 }, { "epoch": 0.73, "learning_rate": 3.5280768607146876e-05, "loss": 2.2973, "step": 15820 }, { "epoch": 0.73, "learning_rate": 3.517052733229994e-05, "loss": 2.2985, "step": 15830 }, { "epoch": 0.73, "learning_rate": 3.5060421798909195e-05, "loss": 2.3133, "step": 15840 }, { "epoch": 0.73, "learning_rate": 3.4950452237516216e-05, "loss": 2.3151, "step": 15850 }, { "epoch": 0.73, "learning_rate": 3.484061887837777e-05, "loss": 2.3002, "step": 15860 }, { "epoch": 0.73, "learning_rate": 3.473092195146556e-05, "loss": 2.296, "step": 15870 }, { "epoch": 0.73, "learning_rate": 3.462136168646547e-05, "loss": 2.3132, "step": 15880 }, { "epoch": 0.73, "learning_rate": 3.451193831277743e-05, "loss": 2.3139, "step": 15890 }, { "epoch": 0.73, "learning_rate": 3.4402652059514565e-05, "loss": 2.2927, "step": 15900 }, { "epoch": 0.73, "learning_rate": 3.4293503155503026e-05, "loss": 2.3147, "step": 15910 }, { "epoch": 0.73, "learning_rate": 3.4184491829281264e-05, "loss": 2.3012, "step": 15920 }, { "epoch": 0.73, "learning_rate": 3.40756183090998e-05, "loss": 2.3054, "step": 15930 }, { "epoch": 0.73, "learning_rate": 3.39668828229205e-05, "loss": 2.3129, "step": 15940 }, { "epoch": 0.73, "learning_rate": 3.385828559841625e-05, "loss": 2.3001, "step": 15950 }, { "epoch": 0.73, "learning_rate": 3.374982686297041e-05, "loss": 2.3276, "step": 15960 }, { "epoch": 0.73, "learning_rate": 3.364150684367647e-05, "loss": 2.3241, "step": 15970 }, { "epoch": 0.73, "learning_rate": 3.353332576733735e-05, "loss": 2.3157, "step": 15980 }, { "epoch": 0.73, "learning_rate": 3.342528386046514e-05, "loss": 2.3026, "step": 15990 }, { "epoch": 0.73, "learning_rate": 3.331738134928049e-05, "loss": 2.3175, "step": 16000 }, { "epoch": 0.73, "eval_accuracy": 0.548459835328819, "eval_loss": 2.161773920059204, "eval_runtime": 10.6727, "eval_samples_per_second": 123.4, "eval_steps_per_second": 1.031, "step": 16000 }, { "epoch": 0.73, "learning_rate": 3.320961845971212e-05, "loss": 2.2986, "step": 16010 }, { "epoch": 0.73, "learning_rate": 3.310199541739656e-05, "loss": 2.3007, "step": 16020 }, { "epoch": 0.73, "learning_rate": 3.2994512447677374e-05, "loss": 2.291, "step": 16030 }, { "epoch": 0.74, "learning_rate": 3.288716977560495e-05, "loss": 2.3156, "step": 16040 }, { "epoch": 0.74, "learning_rate": 3.277996762593582e-05, "loss": 2.2971, "step": 16050 }, { "epoch": 0.74, "learning_rate": 3.267290622313239e-05, "loss": 2.3141, "step": 16060 }, { "epoch": 0.74, "learning_rate": 3.2565985791362254e-05, "loss": 2.2884, "step": 16070 }, { "epoch": 0.74, "learning_rate": 3.2459206554497954e-05, "loss": 2.3175, "step": 16080 }, { "epoch": 0.74, "learning_rate": 3.2352568736116285e-05, "loss": 2.3089, "step": 16090 }, { "epoch": 0.74, "learning_rate": 3.2246072559498045e-05, "loss": 2.3024, "step": 16100 }, { "epoch": 0.74, "learning_rate": 3.213971824762737e-05, "loss": 2.2867, "step": 16110 }, { "epoch": 0.74, "learning_rate": 3.203350602319138e-05, "loss": 2.3223, "step": 16120 }, { "epoch": 0.74, "learning_rate": 3.19274361085797e-05, "loss": 2.2916, "step": 16130 }, { "epoch": 0.74, "learning_rate": 3.182150872588401e-05, "loss": 2.2983, "step": 16140 }, { "epoch": 0.74, "learning_rate": 3.171572409689747e-05, "loss": 2.2905, "step": 16150 }, { "epoch": 0.74, "learning_rate": 3.161008244311447e-05, "loss": 2.3159, "step": 16160 }, { "epoch": 0.74, "learning_rate": 3.1504583985729886e-05, "loss": 2.316, "step": 16170 }, { "epoch": 0.74, "learning_rate": 3.139922894563891e-05, "loss": 2.3099, "step": 16180 }, { "epoch": 0.74, "learning_rate": 3.1294017543436304e-05, "loss": 2.3322, "step": 16190 }, { "epoch": 0.74, "learning_rate": 3.1188949999416203e-05, "loss": 2.3297, "step": 16200 }, { "epoch": 0.74, "learning_rate": 3.108402653357145e-05, "loss": 2.2975, "step": 16210 }, { "epoch": 0.74, "learning_rate": 3.097924736559321e-05, "loss": 2.2976, "step": 16220 }, { "epoch": 0.74, "learning_rate": 3.087461271487059e-05, "loss": 2.3075, "step": 16230 }, { "epoch": 0.74, "learning_rate": 3.077012280048999e-05, "loss": 2.3148, "step": 16240 }, { "epoch": 0.75, "learning_rate": 3.066577784123489e-05, "loss": 2.2901, "step": 16250 }, { "epoch": 0.75, "learning_rate": 3.0561578055585164e-05, "loss": 2.3105, "step": 16260 }, { "epoch": 0.75, "learning_rate": 3.04575236617167e-05, "loss": 2.2884, "step": 16270 }, { "epoch": 0.75, "learning_rate": 3.035361487750107e-05, "loss": 2.3152, "step": 16280 }, { "epoch": 0.75, "learning_rate": 3.024985192050488e-05, "loss": 2.291, "step": 16290 }, { "epoch": 0.75, "learning_rate": 3.0146235007989376e-05, "loss": 2.2914, "step": 16300 }, { "epoch": 0.75, "learning_rate": 3.0042764356910137e-05, "loss": 2.3059, "step": 16310 }, { "epoch": 0.75, "learning_rate": 2.9939440183916334e-05, "loss": 2.2772, "step": 16320 }, { "epoch": 0.75, "learning_rate": 2.9836262705350594e-05, "loss": 2.2922, "step": 16330 }, { "epoch": 0.75, "learning_rate": 2.9733232137248258e-05, "loss": 2.307, "step": 16340 }, { "epoch": 0.75, "learning_rate": 2.9630348695337184e-05, "loss": 2.3241, "step": 16350 }, { "epoch": 0.75, "learning_rate": 2.9527612595037045e-05, "loss": 2.2688, "step": 16360 }, { "epoch": 0.75, "learning_rate": 2.9425024051459138e-05, "loss": 2.2978, "step": 16370 }, { "epoch": 0.75, "learning_rate": 2.932258327940569e-05, "loss": 2.2968, "step": 16380 }, { "epoch": 0.75, "learning_rate": 2.922029049336963e-05, "loss": 2.3018, "step": 16390 }, { "epoch": 0.75, "learning_rate": 2.911814590753391e-05, "loss": 2.3042, "step": 16400 }, { "epoch": 0.75, "learning_rate": 2.901614973577125e-05, "loss": 2.3177, "step": 16410 }, { "epoch": 0.75, "learning_rate": 2.8914302191643638e-05, "loss": 2.2883, "step": 16420 }, { "epoch": 0.75, "learning_rate": 2.88126034884018e-05, "loss": 2.3142, "step": 16430 }, { "epoch": 0.75, "learning_rate": 2.8711053838984815e-05, "loss": 2.3003, "step": 16440 }, { "epoch": 0.75, "learning_rate": 2.860965345601977e-05, "loss": 2.3007, "step": 16450 }, { "epoch": 0.75, "learning_rate": 2.85084025518211e-05, "loss": 2.3066, "step": 16460 }, { "epoch": 0.76, "learning_rate": 2.840730133839028e-05, "loss": 2.3003, "step": 16470 }, { "epoch": 0.76, "learning_rate": 2.8306350027415428e-05, "loss": 2.3037, "step": 16480 }, { "epoch": 0.76, "learning_rate": 2.82055488302707e-05, "loss": 2.2898, "step": 16490 }, { "epoch": 0.76, "learning_rate": 2.810489795801603e-05, "loss": 2.3115, "step": 16500 }, { "epoch": 0.76, "learning_rate": 2.8004397621396507e-05, "loss": 2.2959, "step": 16510 }, { "epoch": 0.76, "learning_rate": 2.790404803084211e-05, "loss": 2.3228, "step": 16520 }, { "epoch": 0.76, "learning_rate": 2.7803849396467107e-05, "loss": 2.3131, "step": 16530 }, { "epoch": 0.76, "learning_rate": 2.7703801928069772e-05, "loss": 2.2947, "step": 16540 }, { "epoch": 0.76, "learning_rate": 2.760390583513176e-05, "loss": 2.3233, "step": 16550 }, { "epoch": 0.76, "learning_rate": 2.75041613268179e-05, "loss": 2.3242, "step": 16560 }, { "epoch": 0.76, "learning_rate": 2.74045686119755e-05, "loss": 2.3177, "step": 16570 }, { "epoch": 0.76, "learning_rate": 2.730512789913415e-05, "loss": 2.28, "step": 16580 }, { "epoch": 0.76, "learning_rate": 2.7205839396505118e-05, "loss": 2.3258, "step": 16590 }, { "epoch": 0.76, "learning_rate": 2.710670331198094e-05, "loss": 2.3015, "step": 16600 }, { "epoch": 0.76, "learning_rate": 2.7007719853135082e-05, "loss": 2.3047, "step": 16610 }, { "epoch": 0.76, "learning_rate": 2.6908889227221378e-05, "loss": 2.2958, "step": 16620 }, { "epoch": 0.76, "learning_rate": 2.6810211641173742e-05, "loss": 2.3081, "step": 16630 }, { "epoch": 0.76, "learning_rate": 2.6711687301605537e-05, "loss": 2.3224, "step": 16640 }, { "epoch": 0.76, "learning_rate": 2.6613316414809377e-05, "loss": 2.2959, "step": 16650 }, { "epoch": 0.76, "learning_rate": 2.6515099186756443e-05, "loss": 2.2976, "step": 16660 }, { "epoch": 0.76, "learning_rate": 2.6417035823096315e-05, "loss": 2.301, "step": 16670 }, { "epoch": 0.76, "learning_rate": 2.631912652915629e-05, "loss": 2.3015, "step": 16680 }, { "epoch": 0.77, "learning_rate": 2.6221371509941173e-05, "loss": 2.3011, "step": 16690 }, { "epoch": 0.77, "learning_rate": 2.6123770970132632e-05, "loss": 2.2944, "step": 16700 }, { "epoch": 0.77, "learning_rate": 2.6026325114089023e-05, "loss": 2.2846, "step": 16710 }, { "epoch": 0.77, "learning_rate": 2.5929034145844655e-05, "loss": 2.2901, "step": 16720 }, { "epoch": 0.77, "learning_rate": 2.58318982691097e-05, "loss": 2.3167, "step": 16730 }, { "epoch": 0.77, "learning_rate": 2.5734917687269455e-05, "loss": 2.2903, "step": 16740 }, { "epoch": 0.77, "learning_rate": 2.5638092603384156e-05, "loss": 2.3095, "step": 16750 }, { "epoch": 0.77, "learning_rate": 2.55414232201884e-05, "loss": 2.2955, "step": 16760 }, { "epoch": 0.77, "learning_rate": 2.5444909740090784e-05, "loss": 2.279, "step": 16770 }, { "epoch": 0.77, "learning_rate": 2.534855236517344e-05, "loss": 2.309, "step": 16780 }, { "epoch": 0.77, "learning_rate": 2.5252351297191747e-05, "loss": 2.2961, "step": 16790 }, { "epoch": 0.77, "learning_rate": 2.5156306737573676e-05, "loss": 2.3033, "step": 16800 }, { "epoch": 0.77, "learning_rate": 2.50604188874196e-05, "loss": 2.312, "step": 16810 }, { "epoch": 0.77, "learning_rate": 2.496468794750172e-05, "loss": 2.2992, "step": 16820 }, { "epoch": 0.77, "learning_rate": 2.486911411826367e-05, "loss": 2.3011, "step": 16830 }, { "epoch": 0.77, "learning_rate": 2.47736975998202e-05, "loss": 2.3055, "step": 16840 }, { "epoch": 0.77, "learning_rate": 2.4678438591956576e-05, "loss": 2.3156, "step": 16850 }, { "epoch": 0.77, "learning_rate": 2.4583337294128393e-05, "loss": 2.3154, "step": 16860 }, { "epoch": 0.77, "learning_rate": 2.4488393905460892e-05, "loss": 2.2837, "step": 16870 }, { "epoch": 0.77, "learning_rate": 2.439360862474881e-05, "loss": 2.3017, "step": 16880 }, { "epoch": 0.77, "learning_rate": 2.429898165045571e-05, "loss": 2.282, "step": 16890 }, { "epoch": 0.77, "learning_rate": 2.4204513180713796e-05, "loss": 2.3139, "step": 16900 }, { "epoch": 0.78, "learning_rate": 2.4110203413323306e-05, "loss": 2.2838, "step": 16910 }, { "epoch": 0.78, "learning_rate": 2.4016052545752267e-05, "loss": 2.2969, "step": 16920 }, { "epoch": 0.78, "learning_rate": 2.3922060775135923e-05, "loss": 2.3137, "step": 16930 }, { "epoch": 0.78, "learning_rate": 2.3828228298276435e-05, "loss": 2.2933, "step": 16940 }, { "epoch": 0.78, "learning_rate": 2.3734555311642393e-05, "loss": 2.3056, "step": 16950 }, { "epoch": 0.78, "learning_rate": 2.3641042011368518e-05, "loss": 2.2857, "step": 16960 }, { "epoch": 0.78, "learning_rate": 2.3547688593255102e-05, "loss": 2.3257, "step": 16970 }, { "epoch": 0.78, "learning_rate": 2.3454495252767727e-05, "loss": 2.2987, "step": 16980 }, { "epoch": 0.78, "learning_rate": 2.3361462185036742e-05, "loss": 2.3047, "step": 16990 }, { "epoch": 0.78, "learning_rate": 2.326858958485698e-05, "loss": 2.304, "step": 17000 }, { "epoch": 0.78, "learning_rate": 2.317587764668725e-05, "loss": 2.3215, "step": 17010 }, { "epoch": 0.78, "learning_rate": 2.3083326564649897e-05, "loss": 2.3158, "step": 17020 }, { "epoch": 0.78, "learning_rate": 2.2990936532530616e-05, "loss": 2.2929, "step": 17030 }, { "epoch": 0.78, "learning_rate": 2.2898707743777738e-05, "loss": 2.3008, "step": 17040 }, { "epoch": 0.78, "learning_rate": 2.2806640391502086e-05, "loss": 2.2874, "step": 17050 }, { "epoch": 0.78, "learning_rate": 2.2714734668476377e-05, "loss": 2.3024, "step": 17060 }, { "epoch": 0.78, "learning_rate": 2.2622990767135012e-05, "loss": 2.2978, "step": 17070 }, { "epoch": 0.78, "learning_rate": 2.2531408879573457e-05, "loss": 2.298, "step": 17080 }, { "epoch": 0.78, "learning_rate": 2.2439989197547984e-05, "loss": 2.3104, "step": 17090 }, { "epoch": 0.78, "learning_rate": 2.2348731912475295e-05, "loss": 2.285, "step": 17100 }, { "epoch": 0.78, "learning_rate": 2.225763721543198e-05, "loss": 2.3173, "step": 17110 }, { "epoch": 0.78, "learning_rate": 2.2166705297154234e-05, "loss": 2.2826, "step": 17120 }, { "epoch": 0.79, "learning_rate": 2.207593634803744e-05, "loss": 2.3086, "step": 17130 }, { "epoch": 0.79, "learning_rate": 2.1985330558135698e-05, "loss": 2.2742, "step": 17140 }, { "epoch": 0.79, "learning_rate": 2.189488811716156e-05, "loss": 2.2977, "step": 17150 }, { "epoch": 0.79, "learning_rate": 2.1804609214485462e-05, "loss": 2.3063, "step": 17160 }, { "epoch": 0.79, "learning_rate": 2.1714494039135525e-05, "loss": 2.3053, "step": 17170 }, { "epoch": 0.79, "learning_rate": 2.162454277979694e-05, "loss": 2.3253, "step": 17180 }, { "epoch": 0.79, "learning_rate": 2.1534755624811796e-05, "loss": 2.2944, "step": 17190 }, { "epoch": 0.79, "learning_rate": 2.1445132762178477e-05, "loss": 2.2964, "step": 17200 }, { "epoch": 0.79, "learning_rate": 2.135567437955146e-05, "loss": 2.2812, "step": 17210 }, { "epoch": 0.79, "learning_rate": 2.126638066424076e-05, "loss": 2.3111, "step": 17220 }, { "epoch": 0.79, "learning_rate": 2.1177251803211607e-05, "loss": 2.2905, "step": 17230 }, { "epoch": 0.79, "learning_rate": 2.1088287983084142e-05, "loss": 2.2832, "step": 17240 }, { "epoch": 0.79, "learning_rate": 2.0999489390132843e-05, "loss": 2.3018, "step": 17250 }, { "epoch": 0.79, "learning_rate": 2.0910856210286232e-05, "loss": 2.3035, "step": 17260 }, { "epoch": 0.79, "learning_rate": 2.08223886291266e-05, "loss": 2.2931, "step": 17270 }, { "epoch": 0.79, "learning_rate": 2.0734086831889377e-05, "loss": 2.3126, "step": 17280 }, { "epoch": 0.79, "learning_rate": 2.0645951003462906e-05, "loss": 2.2971, "step": 17290 }, { "epoch": 0.79, "learning_rate": 2.0557981328388063e-05, "loss": 2.305, "step": 17300 }, { "epoch": 0.79, "learning_rate": 2.0470177990857762e-05, "loss": 2.2776, "step": 17310 }, { "epoch": 0.79, "learning_rate": 2.038254117471673e-05, "loss": 2.3058, "step": 17320 }, { "epoch": 0.79, "learning_rate": 2.029507106346089e-05, "loss": 2.2888, "step": 17330 }, { "epoch": 0.8, "learning_rate": 2.020776784023727e-05, "loss": 2.2938, "step": 17340 }, { "epoch": 0.8, "learning_rate": 2.0120631687843327e-05, "loss": 2.3006, "step": 17350 }, { "epoch": 0.8, "learning_rate": 2.0033662788726815e-05, "loss": 2.2834, "step": 17360 }, { "epoch": 0.8, "learning_rate": 1.9946861324985177e-05, "loss": 2.2852, "step": 17370 }, { "epoch": 0.8, "learning_rate": 1.98602274783654e-05, "loss": 2.2909, "step": 17380 }, { "epoch": 0.8, "learning_rate": 1.9773761430263383e-05, "loss": 2.3079, "step": 17390 }, { "epoch": 0.8, "learning_rate": 1.96874633617238e-05, "loss": 2.2947, "step": 17400 }, { "epoch": 0.8, "learning_rate": 1.960133345343953e-05, "loss": 2.2873, "step": 17410 }, { "epoch": 0.8, "learning_rate": 1.9515371885751366e-05, "loss": 2.3176, "step": 17420 }, { "epoch": 0.8, "learning_rate": 1.9429578838647644e-05, "loss": 2.2847, "step": 17430 }, { "epoch": 0.8, "learning_rate": 1.9343954491763816e-05, "loss": 2.3363, "step": 17440 }, { "epoch": 0.8, "learning_rate": 1.925849902438218e-05, "loss": 2.3102, "step": 17450 }, { "epoch": 0.8, "learning_rate": 1.9173212615431325e-05, "loss": 2.2951, "step": 17460 }, { "epoch": 0.8, "learning_rate": 1.9088095443485964e-05, "loss": 2.3141, "step": 17470 }, { "epoch": 0.8, "learning_rate": 1.9003147686766355e-05, "loss": 2.3042, "step": 17480 }, { "epoch": 0.8, "learning_rate": 1.8918369523138147e-05, "loss": 2.3026, "step": 17490 }, { "epoch": 0.8, "learning_rate": 1.883376113011177e-05, "loss": 2.3113, "step": 17500 }, { "epoch": 0.8, "learning_rate": 1.8749322684842306e-05, "loss": 2.286, "step": 17510 }, { "epoch": 0.8, "learning_rate": 1.8665054364128865e-05, "loss": 2.3095, "step": 17520 }, { "epoch": 0.8, "learning_rate": 1.858095634441448e-05, "loss": 2.2883, "step": 17530 }, { "epoch": 0.8, "learning_rate": 1.8497028801785476e-05, "loss": 2.2918, "step": 17540 }, { "epoch": 0.8, "learning_rate": 1.841327191197135e-05, "loss": 2.2789, "step": 17550 }, { "epoch": 0.81, "learning_rate": 1.832968585034418e-05, "loss": 2.3008, "step": 17560 }, { "epoch": 0.81, "learning_rate": 1.824627079191844e-05, "loss": 2.2852, "step": 17570 }, { "epoch": 0.81, "learning_rate": 1.816302691135051e-05, "loss": 2.2731, "step": 17580 }, { "epoch": 0.81, "learning_rate": 1.8079954382938334e-05, "loss": 2.3033, "step": 17590 }, { "epoch": 0.81, "learning_rate": 1.7997053380621108e-05, "loss": 2.2707, "step": 17600 }, { "epoch": 0.81, "learning_rate": 1.7914324077978905e-05, "loss": 2.2908, "step": 17610 }, { "epoch": 0.81, "learning_rate": 1.7831766648232252e-05, "loss": 2.3059, "step": 17620 }, { "epoch": 0.81, "learning_rate": 1.774938126424177e-05, "loss": 2.2947, "step": 17630 }, { "epoch": 0.81, "learning_rate": 1.7667168098507958e-05, "loss": 2.2863, "step": 17640 }, { "epoch": 0.81, "learning_rate": 1.758512732317059e-05, "loss": 2.3019, "step": 17650 }, { "epoch": 0.81, "learning_rate": 1.7503259110008607e-05, "loss": 2.3132, "step": 17660 }, { "epoch": 0.81, "learning_rate": 1.742156363043952e-05, "loss": 2.3237, "step": 17670 }, { "epoch": 0.81, "learning_rate": 1.7340041055519275e-05, "loss": 2.2949, "step": 17680 }, { "epoch": 0.81, "learning_rate": 1.7258691555941686e-05, "loss": 2.3226, "step": 17690 }, { "epoch": 0.81, "learning_rate": 1.717751530203827e-05, "loss": 2.2936, "step": 17700 }, { "epoch": 0.81, "learning_rate": 1.7096512463777702e-05, "loss": 2.2997, "step": 17710 }, { "epoch": 0.81, "learning_rate": 1.7015683210765664e-05, "loss": 2.2835, "step": 17720 }, { "epoch": 0.81, "learning_rate": 1.693502771224428e-05, "loss": 2.3139, "step": 17730 }, { "epoch": 0.81, "learning_rate": 1.6854546137091932e-05, "loss": 2.3089, "step": 17740 }, { "epoch": 0.81, "learning_rate": 1.6774238653822814e-05, "loss": 2.3076, "step": 17750 }, { "epoch": 0.81, "learning_rate": 1.6694105430586583e-05, "loss": 2.2998, "step": 17760 }, { "epoch": 0.81, "learning_rate": 1.6614146635168025e-05, "loss": 2.3056, "step": 17770 }, { "epoch": 0.82, "learning_rate": 1.6534362434986782e-05, "loss": 2.3181, "step": 17780 }, { "epoch": 0.82, "learning_rate": 1.64547529970968e-05, "loss": 2.2939, "step": 17790 }, { "epoch": 0.82, "learning_rate": 1.637531848818623e-05, "loss": 2.2714, "step": 17800 }, { "epoch": 0.82, "learning_rate": 1.6296059074576863e-05, "loss": 2.2896, "step": 17810 }, { "epoch": 0.82, "learning_rate": 1.6216974922223882e-05, "loss": 2.3024, "step": 17820 }, { "epoch": 0.82, "learning_rate": 1.6138066196715573e-05, "loss": 2.2837, "step": 17830 }, { "epoch": 0.82, "learning_rate": 1.6059333063272796e-05, "loss": 2.3029, "step": 17840 }, { "epoch": 0.82, "learning_rate": 1.598077568674887e-05, "loss": 2.3029, "step": 17850 }, { "epoch": 0.82, "learning_rate": 1.5902394231629026e-05, "loss": 2.2956, "step": 17860 }, { "epoch": 0.82, "learning_rate": 1.5824188862030198e-05, "loss": 2.3181, "step": 17870 }, { "epoch": 0.82, "learning_rate": 1.574615974170057e-05, "loss": 2.3091, "step": 17880 }, { "epoch": 0.82, "learning_rate": 1.566830703401937e-05, "loss": 2.2924, "step": 17890 }, { "epoch": 0.82, "learning_rate": 1.5590630901996385e-05, "loss": 2.2766, "step": 17900 }, { "epoch": 0.82, "learning_rate": 1.5513131508271695e-05, "loss": 2.3049, "step": 17910 }, { "epoch": 0.82, "learning_rate": 1.5435809015115354e-05, "loss": 2.2889, "step": 17920 }, { "epoch": 0.82, "learning_rate": 1.535866358442698e-05, "loss": 2.3061, "step": 17930 }, { "epoch": 0.82, "learning_rate": 1.528169537773545e-05, "loss": 2.3004, "step": 17940 }, { "epoch": 0.82, "learning_rate": 1.520490455619863e-05, "loss": 2.2653, "step": 17950 }, { "epoch": 0.82, "learning_rate": 1.512829128060288e-05, "loss": 2.306, "step": 17960 }, { "epoch": 0.82, "learning_rate": 1.5051855711362905e-05, "loss": 2.2967, "step": 17970 }, { "epoch": 0.82, "learning_rate": 1.4975598008521218e-05, "loss": 2.3043, "step": 17980 }, { "epoch": 0.82, "learning_rate": 1.4899518331748019e-05, "loss": 2.2996, "step": 17990 }, { "epoch": 0.83, "learning_rate": 1.482361684034067e-05, "loss": 2.3013, "step": 18000 }, { "epoch": 0.83, "eval_accuracy": 0.5487760253723954, "eval_loss": 2.1599340438842773, "eval_runtime": 10.3838, "eval_samples_per_second": 126.832, "eval_steps_per_second": 1.059, "step": 18000 }, { "epoch": 0.83, "learning_rate": 1.4747893693223513e-05, "loss": 2.2948, "step": 18010 }, { "epoch": 0.83, "learning_rate": 1.4672349048947409e-05, "loss": 2.3178, "step": 18020 }, { "epoch": 0.83, "learning_rate": 1.4596983065689463e-05, "loss": 2.3007, "step": 18030 }, { "epoch": 0.83, "learning_rate": 1.4521795901252777e-05, "loss": 2.3079, "step": 18040 }, { "epoch": 0.83, "learning_rate": 1.4446787713065935e-05, "loss": 2.3128, "step": 18050 }, { "epoch": 0.83, "learning_rate": 1.4371958658182871e-05, "loss": 2.3104, "step": 18060 }, { "epoch": 0.83, "learning_rate": 1.4297308893282369e-05, "loss": 2.3258, "step": 18070 }, { "epoch": 0.83, "learning_rate": 1.4222838574667819e-05, "loss": 2.3231, "step": 18080 }, { "epoch": 0.83, "learning_rate": 1.4148547858266958e-05, "loss": 2.3075, "step": 18090 }, { "epoch": 0.83, "learning_rate": 1.4074436899631383e-05, "loss": 2.3009, "step": 18100 }, { "epoch": 0.83, "learning_rate": 1.400050585393633e-05, "loss": 2.3045, "step": 18110 }, { "epoch": 0.83, "learning_rate": 1.3926754875980374e-05, "loss": 2.2964, "step": 18120 }, { "epoch": 0.83, "learning_rate": 1.3853184120184993e-05, "loss": 2.296, "step": 18130 }, { "epoch": 0.83, "learning_rate": 1.3779793740594371e-05, "loss": 2.2795, "step": 18140 }, { "epoch": 0.83, "learning_rate": 1.3706583890874958e-05, "loss": 2.2873, "step": 18150 }, { "epoch": 0.83, "learning_rate": 1.363355472431529e-05, "loss": 2.2935, "step": 18160 }, { "epoch": 0.83, "learning_rate": 1.3560706393825474e-05, "loss": 2.2991, "step": 18170 }, { "epoch": 0.83, "learning_rate": 1.3488039051937084e-05, "loss": 2.2907, "step": 18180 }, { "epoch": 0.83, "learning_rate": 1.3415552850802648e-05, "loss": 2.3092, "step": 18190 }, { "epoch": 0.83, "learning_rate": 1.3343247942195513e-05, "loss": 2.3221, "step": 18200 }, { "epoch": 0.83, "learning_rate": 1.3271124477509333e-05, "loss": 2.2877, "step": 18210 }, { "epoch": 0.84, "learning_rate": 1.3199182607757888e-05, "loss": 2.2859, "step": 18220 }, { "epoch": 0.84, "learning_rate": 1.312742248357477e-05, "loss": 2.2861, "step": 18230 }, { "epoch": 0.84, "learning_rate": 1.305584425521299e-05, "loss": 2.3274, "step": 18240 }, { "epoch": 0.84, "learning_rate": 1.2984448072544686e-05, "loss": 2.2981, "step": 18250 }, { "epoch": 0.84, "learning_rate": 1.291323408506082e-05, "loss": 2.3004, "step": 18260 }, { "epoch": 0.84, "learning_rate": 1.2842202441870965e-05, "loss": 2.2897, "step": 18270 }, { "epoch": 0.84, "learning_rate": 1.2771353291702759e-05, "loss": 2.3131, "step": 18280 }, { "epoch": 0.84, "learning_rate": 1.2700686782901838e-05, "loss": 2.3092, "step": 18290 }, { "epoch": 0.84, "learning_rate": 1.2630203063431356e-05, "loss": 2.2759, "step": 18300 }, { "epoch": 0.84, "learning_rate": 1.2559902280871794e-05, "loss": 2.3059, "step": 18310 }, { "epoch": 0.84, "learning_rate": 1.2489784582420527e-05, "loss": 2.3305, "step": 18320 }, { "epoch": 0.84, "learning_rate": 1.2419850114891662e-05, "loss": 2.2879, "step": 18330 }, { "epoch": 0.84, "learning_rate": 1.2350099024715567e-05, "loss": 2.2835, "step": 18340 }, { "epoch": 0.84, "learning_rate": 1.2280531457938727e-05, "loss": 2.3077, "step": 18350 }, { "epoch": 0.84, "learning_rate": 1.2211147560223313e-05, "loss": 2.2912, "step": 18360 }, { "epoch": 0.84, "learning_rate": 1.214194747684697e-05, "loss": 2.2876, "step": 18370 }, { "epoch": 0.84, "learning_rate": 1.2072931352702387e-05, "loss": 2.3263, "step": 18380 }, { "epoch": 0.84, "learning_rate": 1.2004099332297191e-05, "loss": 2.3033, "step": 18390 }, { "epoch": 0.84, "learning_rate": 1.1935451559753441e-05, "loss": 2.2875, "step": 18400 }, { "epoch": 0.84, "learning_rate": 1.1866988178807426e-05, "loss": 2.3081, "step": 18410 }, { "epoch": 0.84, "learning_rate": 1.1798709332809366e-05, "loss": 2.2874, "step": 18420 }, { "epoch": 0.84, "learning_rate": 1.173061516472309e-05, "loss": 2.2866, "step": 18430 }, { "epoch": 0.85, "learning_rate": 1.1662705817125774e-05, "loss": 2.3109, "step": 18440 }, { "epoch": 0.85, "learning_rate": 1.1594981432207563e-05, "loss": 2.3099, "step": 18450 }, { "epoch": 0.85, "learning_rate": 1.152744215177136e-05, "loss": 2.3134, "step": 18460 }, { "epoch": 0.85, "learning_rate": 1.146008811723247e-05, "loss": 2.3128, "step": 18470 }, { "epoch": 0.85, "learning_rate": 1.1392919469618369e-05, "loss": 2.2952, "step": 18480 }, { "epoch": 0.85, "learning_rate": 1.1325936349568278e-05, "loss": 2.3005, "step": 18490 }, { "epoch": 0.85, "learning_rate": 1.1259138897333054e-05, "loss": 2.3217, "step": 18500 }, { "epoch": 0.85, "learning_rate": 1.1192527252774732e-05, "loss": 2.3002, "step": 18510 }, { "epoch": 0.85, "learning_rate": 1.1126101555366342e-05, "loss": 2.3062, "step": 18520 }, { "epoch": 0.85, "learning_rate": 1.1059861944191518e-05, "loss": 2.3026, "step": 18530 }, { "epoch": 0.85, "learning_rate": 1.0993808557944341e-05, "loss": 2.2716, "step": 18540 }, { "epoch": 0.85, "learning_rate": 1.0927941534928899e-05, "loss": 2.3138, "step": 18550 }, { "epoch": 0.85, "learning_rate": 1.0862261013059082e-05, "loss": 2.2864, "step": 18560 }, { "epoch": 0.85, "learning_rate": 1.079676712985832e-05, "loss": 2.2994, "step": 18570 }, { "epoch": 0.85, "learning_rate": 1.0731460022459227e-05, "loss": 2.2817, "step": 18580 }, { "epoch": 0.85, "learning_rate": 1.066633982760331e-05, "loss": 2.3024, "step": 18590 }, { "epoch": 0.85, "learning_rate": 1.060140668164079e-05, "loss": 2.3003, "step": 18600 }, { "epoch": 0.85, "learning_rate": 1.0536660720530156e-05, "loss": 2.2977, "step": 18610 }, { "epoch": 0.85, "learning_rate": 1.0472102079838052e-05, "loss": 2.2878, "step": 18620 }, { "epoch": 0.85, "learning_rate": 1.0407730894738821e-05, "loss": 2.3066, "step": 18630 }, { "epoch": 0.85, "learning_rate": 1.0343547300014344e-05, "loss": 2.3014, "step": 18640 }, { "epoch": 0.86, "learning_rate": 1.0279551430053747e-05, "loss": 2.2983, "step": 18650 }, { "epoch": 0.86, "learning_rate": 1.0215743418853053e-05, "loss": 2.2866, "step": 18660 }, { "epoch": 0.86, "learning_rate": 1.0152123400014979e-05, "loss": 2.2904, "step": 18670 }, { "epoch": 0.86, "learning_rate": 1.008869150674856e-05, "loss": 2.3142, "step": 18680 }, { "epoch": 0.86, "learning_rate": 1.002544787186902e-05, "loss": 2.3045, "step": 18690 }, { "epoch": 0.86, "learning_rate": 9.962392627797301e-06, "loss": 2.297, "step": 18700 }, { "epoch": 0.86, "learning_rate": 9.89952590655998e-06, "loss": 2.3033, "step": 18710 }, { "epoch": 0.86, "learning_rate": 9.836847839788832e-06, "loss": 2.2794, "step": 18720 }, { "epoch": 0.86, "learning_rate": 9.774358558720653e-06, "loss": 2.2866, "step": 18730 }, { "epoch": 0.86, "learning_rate": 9.71205819419696e-06, "loss": 2.3037, "step": 18740 }, { "epoch": 0.86, "learning_rate": 9.649946876663706e-06, "loss": 2.3129, "step": 18750 }, { "epoch": 0.86, "learning_rate": 9.588024736170986e-06, "loss": 2.3076, "step": 18760 }, { "epoch": 0.86, "learning_rate": 9.526291902372853e-06, "loss": 2.3311, "step": 18770 }, { "epoch": 0.86, "learning_rate": 9.46474850452691e-06, "loss": 2.3084, "step": 18780 }, { "epoch": 0.86, "learning_rate": 9.403394671494204e-06, "loss": 2.2871, "step": 18790 }, { "epoch": 0.86, "learning_rate": 9.34223053173875e-06, "loss": 2.2848, "step": 18800 }, { "epoch": 0.86, "learning_rate": 9.281256213327516e-06, "loss": 2.305, "step": 18810 }, { "epoch": 0.86, "learning_rate": 9.220471843929901e-06, "loss": 2.3053, "step": 18820 }, { "epoch": 0.86, "learning_rate": 9.15987755081763e-06, "loss": 2.3283, "step": 18830 }, { "epoch": 0.86, "learning_rate": 9.099473460864461e-06, "loss": 2.3151, "step": 18840 }, { "epoch": 0.86, "learning_rate": 9.039259700545866e-06, "loss": 2.3139, "step": 18850 }, { "epoch": 0.86, "learning_rate": 8.979236395938828e-06, "loss": 2.3077, "step": 18860 }, { "epoch": 0.87, "learning_rate": 8.919403672721527e-06, "loss": 2.2889, "step": 18870 }, { "epoch": 0.87, "learning_rate": 8.859761656173127e-06, "loss": 2.2996, "step": 18880 }, { "epoch": 0.87, "learning_rate": 8.800310471173446e-06, "loss": 2.3225, "step": 18890 }, { "epoch": 0.87, "learning_rate": 8.741050242202774e-06, "loss": 2.306, "step": 18900 }, { "epoch": 0.87, "learning_rate": 8.681981093341529e-06, "loss": 2.2895, "step": 18910 }, { "epoch": 0.87, "learning_rate": 8.623103148270096e-06, "loss": 2.3268, "step": 18920 }, { "epoch": 0.87, "learning_rate": 8.56441653026846e-06, "loss": 2.3031, "step": 18930 }, { "epoch": 0.87, "learning_rate": 8.505921362216052e-06, "loss": 2.3062, "step": 18940 }, { "epoch": 0.87, "learning_rate": 8.447617766591387e-06, "loss": 2.299, "step": 18950 }, { "epoch": 0.87, "learning_rate": 8.389505865471915e-06, "loss": 2.3101, "step": 18960 }, { "epoch": 0.87, "learning_rate": 8.331585780533657e-06, "loss": 2.3182, "step": 18970 }, { "epoch": 0.87, "learning_rate": 8.273857633051064e-06, "loss": 2.2893, "step": 18980 }, { "epoch": 0.87, "learning_rate": 8.216321543896632e-06, "loss": 2.3137, "step": 18990 }, { "epoch": 0.87, "learning_rate": 8.15897763354081e-06, "loss": 2.2957, "step": 19000 }, { "epoch": 0.87, "learning_rate": 8.101826022051561e-06, "loss": 2.2961, "step": 19010 }, { "epoch": 0.87, "learning_rate": 8.044866829094311e-06, "loss": 2.3183, "step": 19020 }, { "epoch": 0.87, "learning_rate": 7.988100173931501e-06, "loss": 2.2996, "step": 19030 }, { "epoch": 0.87, "learning_rate": 7.931526175422466e-06, "loss": 2.2866, "step": 19040 }, { "epoch": 0.87, "learning_rate": 7.875144952023195e-06, "loss": 2.3124, "step": 19050 }, { "epoch": 0.87, "learning_rate": 7.818956621785989e-06, "loss": 2.3046, "step": 19060 }, { "epoch": 0.87, "learning_rate": 7.762961302359273e-06, "loss": 2.2859, "step": 19070 }, { "epoch": 0.87, "learning_rate": 7.707159110987328e-06, "loss": 2.2784, "step": 19080 }, { "epoch": 0.88, "learning_rate": 7.651550164510124e-06, "loss": 2.2708, "step": 19090 }, { "epoch": 0.88, "learning_rate": 7.596134579362923e-06, "loss": 2.2869, "step": 19100 }, { "epoch": 0.88, "learning_rate": 7.5409124715762e-06, "loss": 2.3068, "step": 19110 }, { "epoch": 0.88, "learning_rate": 7.485883956775252e-06, "loss": 2.2944, "step": 19120 }, { "epoch": 0.88, "learning_rate": 7.431049150180102e-06, "loss": 2.2998, "step": 19130 }, { "epoch": 0.88, "learning_rate": 7.376408166605098e-06, "loss": 2.3286, "step": 19140 }, { "epoch": 0.88, "learning_rate": 7.321961120458842e-06, "loss": 2.289, "step": 19150 }, { "epoch": 0.88, "learning_rate": 7.2677081257437886e-06, "loss": 2.2907, "step": 19160 }, { "epoch": 0.88, "learning_rate": 7.213649296056157e-06, "loss": 2.2979, "step": 19170 }, { "epoch": 0.88, "learning_rate": 7.1597847445855515e-06, "loss": 2.2939, "step": 19180 }, { "epoch": 0.88, "learning_rate": 7.106114584114831e-06, "loss": 2.3037, "step": 19190 }, { "epoch": 0.88, "learning_rate": 7.052638927019817e-06, "loss": 2.291, "step": 19200 }, { "epoch": 0.88, "learning_rate": 6.999357885269109e-06, "loss": 2.3114, "step": 19210 }, { "epoch": 0.88, "learning_rate": 6.946271570423768e-06, "loss": 2.2876, "step": 19220 }, { "epoch": 0.88, "learning_rate": 6.893380093637158e-06, "loss": 2.2974, "step": 19230 }, { "epoch": 0.88, "learning_rate": 6.840683565654682e-06, "loss": 2.3158, "step": 19240 }, { "epoch": 0.88, "learning_rate": 6.7881820968135336e-06, "loss": 2.2808, "step": 19250 }, { "epoch": 0.88, "learning_rate": 6.735875797042535e-06, "loss": 2.3028, "step": 19260 }, { "epoch": 0.88, "learning_rate": 6.683764775861823e-06, "loss": 2.2873, "step": 19270 }, { "epoch": 0.88, "learning_rate": 6.631849142382674e-06, "loss": 2.309, "step": 19280 }, { "epoch": 0.88, "learning_rate": 6.580129005307234e-06, "loss": 2.2927, "step": 19290 }, { "epoch": 0.88, "learning_rate": 6.528604472928346e-06, "loss": 2.293, "step": 19300 }, { "epoch": 0.89, "learning_rate": 6.477275653129233e-06, "loss": 2.3311, "step": 19310 }, { "epoch": 0.89, "learning_rate": 6.426142653383427e-06, "loss": 2.3267, "step": 19320 }, { "epoch": 0.89, "learning_rate": 6.375205580754351e-06, "loss": 2.2962, "step": 19330 }, { "epoch": 0.89, "learning_rate": 6.324464541895247e-06, "loss": 2.3228, "step": 19340 }, { "epoch": 0.89, "learning_rate": 6.273919643048865e-06, "loss": 2.3013, "step": 19350 }, { "epoch": 0.89, "learning_rate": 6.223570990047323e-06, "loss": 2.2951, "step": 19360 }, { "epoch": 0.89, "learning_rate": 6.173418688311772e-06, "loss": 2.276, "step": 19370 }, { "epoch": 0.89, "learning_rate": 6.123462842852257e-06, "loss": 2.2913, "step": 19380 }, { "epoch": 0.89, "learning_rate": 6.073703558267518e-06, "loss": 2.2927, "step": 19390 }, { "epoch": 0.89, "learning_rate": 6.024140938744693e-06, "loss": 2.2891, "step": 19400 }, { "epoch": 0.89, "learning_rate": 5.974775088059115e-06, "loss": 2.2829, "step": 19410 }, { "epoch": 0.89, "learning_rate": 5.9256061095742e-06, "loss": 2.2801, "step": 19420 }, { "epoch": 0.89, "learning_rate": 5.8766341062410635e-06, "loss": 2.3173, "step": 19430 }, { "epoch": 0.89, "learning_rate": 5.827859180598427e-06, "loss": 2.2969, "step": 19440 }, { "epoch": 0.89, "learning_rate": 5.779281434772376e-06, "loss": 2.2965, "step": 19450 }, { "epoch": 0.89, "learning_rate": 5.730900970476105e-06, "loss": 2.3169, "step": 19460 }, { "epoch": 0.89, "learning_rate": 5.682717889009781e-06, "loss": 2.3081, "step": 19470 }, { "epoch": 0.89, "learning_rate": 5.634732291260225e-06, "loss": 2.3215, "step": 19480 }, { "epoch": 0.89, "learning_rate": 5.5869442777008345e-06, "loss": 2.2931, "step": 19490 }, { "epoch": 0.89, "learning_rate": 5.5393539483912234e-06, "loss": 2.2812, "step": 19500 }, { "epoch": 0.89, "learning_rate": 5.491961402977164e-06, "loss": 2.2991, "step": 19510 }, { "epoch": 0.89, "learning_rate": 5.444766740690244e-06, "loss": 2.2931, "step": 19520 }, { "epoch": 0.9, "learning_rate": 5.3977700603477736e-06, "loss": 2.3059, "step": 19530 }, { "epoch": 0.9, "learning_rate": 5.350971460352472e-06, "loss": 2.2928, "step": 19540 }, { "epoch": 0.9, "learning_rate": 5.304371038692313e-06, "loss": 2.2998, "step": 19550 }, { "epoch": 0.9, "learning_rate": 5.25796889294039e-06, "loss": 2.2951, "step": 19560 }, { "epoch": 0.9, "learning_rate": 5.211765120254574e-06, "loss": 2.307, "step": 19570 }, { "epoch": 0.9, "learning_rate": 5.16575981737738e-06, "loss": 2.3008, "step": 19580 }, { "epoch": 0.9, "learning_rate": 5.1199530806358085e-06, "loss": 2.2797, "step": 19590 }, { "epoch": 0.9, "learning_rate": 5.074345005941039e-06, "loss": 2.3225, "step": 19600 }, { "epoch": 0.9, "learning_rate": 5.02893568878835e-06, "loss": 2.3083, "step": 19610 }, { "epoch": 0.9, "learning_rate": 4.983725224256808e-06, "loss": 2.3095, "step": 19620 }, { "epoch": 0.9, "learning_rate": 4.938713707009124e-06, "loss": 2.2676, "step": 19630 }, { "epoch": 0.9, "learning_rate": 4.893901231291487e-06, "loss": 2.2795, "step": 19640 }, { "epoch": 0.9, "learning_rate": 4.849287890933263e-06, "loss": 2.3209, "step": 19650 }, { "epoch": 0.9, "learning_rate": 4.8048737793469415e-06, "loss": 2.3125, "step": 19660 }, { "epoch": 0.9, "learning_rate": 4.760658989527778e-06, "loss": 2.311, "step": 19670 }, { "epoch": 0.9, "learning_rate": 4.716643614053773e-06, "loss": 2.2761, "step": 19680 }, { "epoch": 0.9, "learning_rate": 4.672827745085306e-06, "loss": 2.3034, "step": 19690 }, { "epoch": 0.9, "learning_rate": 4.629211474365081e-06, "loss": 2.3094, "step": 19700 }, { "epoch": 0.9, "learning_rate": 4.585794893217854e-06, "loss": 2.3131, "step": 19710 }, { "epoch": 0.9, "learning_rate": 4.542578092550276e-06, "loss": 2.3131, "step": 19720 }, { "epoch": 0.9, "learning_rate": 4.499561162850663e-06, "loss": 2.301, "step": 19730 }, { "epoch": 0.91, "learning_rate": 4.4567441941889e-06, "loss": 2.2857, "step": 19740 }, { "epoch": 0.91, "learning_rate": 4.414127276216107e-06, "loss": 2.2717, "step": 19750 }, { "epoch": 0.91, "learning_rate": 4.371710498164627e-06, "loss": 2.3076, "step": 19760 }, { "epoch": 0.91, "learning_rate": 4.329493948847663e-06, "loss": 2.306, "step": 19770 }, { "epoch": 0.91, "learning_rate": 4.287477716659216e-06, "loss": 2.3287, "step": 19780 }, { "epoch": 0.91, "learning_rate": 4.245661889573838e-06, "loss": 2.3002, "step": 19790 }, { "epoch": 0.91, "learning_rate": 4.2040465551465014e-06, "loss": 2.3036, "step": 19800 }, { "epoch": 0.91, "learning_rate": 4.16263180051234e-06, "loss": 2.3191, "step": 19810 }, { "epoch": 0.91, "learning_rate": 4.121417712386544e-06, "loss": 2.3023, "step": 19820 }, { "epoch": 0.91, "learning_rate": 4.080404377064129e-06, "loss": 2.3016, "step": 19830 }, { "epoch": 0.91, "learning_rate": 4.039591880419746e-06, "loss": 2.3256, "step": 19840 }, { "epoch": 0.91, "learning_rate": 3.998980307907574e-06, "loss": 2.2932, "step": 19850 }, { "epoch": 0.91, "learning_rate": 3.958569744561047e-06, "loss": 2.3295, "step": 19860 }, { "epoch": 0.91, "learning_rate": 3.918360274992772e-06, "loss": 2.3096, "step": 19870 }, { "epoch": 0.91, "learning_rate": 3.878351983394246e-06, "loss": 2.2909, "step": 19880 }, { "epoch": 0.91, "learning_rate": 3.838544953535772e-06, "loss": 2.3159, "step": 19890 }, { "epoch": 0.91, "learning_rate": 3.7989392687662106e-06, "loss": 2.312, "step": 19900 }, { "epoch": 0.91, "learning_rate": 3.7595350120128937e-06, "loss": 2.3168, "step": 19910 }, { "epoch": 0.91, "learning_rate": 3.720332265781357e-06, "loss": 2.2988, "step": 19920 }, { "epoch": 0.91, "learning_rate": 3.6813311121552175e-06, "loss": 2.3071, "step": 19930 }, { "epoch": 0.91, "learning_rate": 3.6425316327959978e-06, "loss": 2.2792, "step": 19940 }, { "epoch": 0.91, "learning_rate": 3.603933908942958e-06, "loss": 2.2842, "step": 19950 }, { "epoch": 0.92, "learning_rate": 3.5655380214129063e-06, "loss": 2.2876, "step": 19960 }, { "epoch": 0.92, "learning_rate": 3.5273440506000453e-06, "loss": 2.3033, "step": 19970 }, { "epoch": 0.92, "learning_rate": 3.4893520764757824e-06, "loss": 2.3115, "step": 19980 }, { "epoch": 0.92, "learning_rate": 3.451562178588619e-06, "loss": 2.3205, "step": 19990 }, { "epoch": 0.92, "learning_rate": 3.4139744360639047e-06, "loss": 2.3075, "step": 20000 }, { "epoch": 0.92, "eval_accuracy": 0.5488688041410504, "eval_loss": 2.159656524658203, "eval_runtime": 10.5788, "eval_samples_per_second": 124.494, "eval_steps_per_second": 1.04, "step": 20000 }, { "epoch": 0.92, "learning_rate": 3.376588927603741e-06, "loss": 2.2854, "step": 20010 }, { "epoch": 0.92, "learning_rate": 3.3394057314867554e-06, "loss": 2.3114, "step": 20020 }, { "epoch": 0.92, "learning_rate": 3.3024249255680152e-06, "loss": 2.3001, "step": 20030 }, { "epoch": 0.92, "learning_rate": 3.2656465872787702e-06, "loss": 2.3023, "step": 20040 }, { "epoch": 0.92, "learning_rate": 3.2290707936263766e-06, "loss": 2.3016, "step": 20050 }, { "epoch": 0.92, "learning_rate": 3.192697621194063e-06, "loss": 2.2972, "step": 20060 }, { "epoch": 0.92, "learning_rate": 3.1565271461408084e-06, "loss": 2.2982, "step": 20070 }, { "epoch": 0.92, "learning_rate": 3.1205594442012433e-06, "loss": 2.2858, "step": 20080 }, { "epoch": 0.92, "learning_rate": 3.084794590685336e-06, "loss": 2.3109, "step": 20090 }, { "epoch": 0.92, "learning_rate": 3.049232660478385e-06, "loss": 2.2748, "step": 20100 }, { "epoch": 0.92, "learning_rate": 3.0138737280407724e-06, "loss": 2.3026, "step": 20110 }, { "epoch": 0.92, "learning_rate": 2.9787178674078765e-06, "loss": 2.3137, "step": 20120 }, { "epoch": 0.92, "learning_rate": 2.943765152189837e-06, "loss": 2.2967, "step": 20130 }, { "epoch": 0.92, "learning_rate": 2.9090156555714675e-06, "loss": 2.2888, "step": 20140 }, { "epoch": 0.92, "learning_rate": 2.8744694503120564e-06, "loss": 2.3102, "step": 20150 }, { "epoch": 0.92, "learning_rate": 2.840126608745286e-06, "loss": 2.2931, "step": 20160 }, { "epoch": 0.92, "learning_rate": 2.8059872027789593e-06, "loss": 2.3183, "step": 20170 }, { "epoch": 0.93, "learning_rate": 2.7720513038949735e-06, "loss": 2.2839, "step": 20180 }, { "epoch": 0.93, "learning_rate": 2.738318983149113e-06, "loss": 2.3115, "step": 20190 }, { "epoch": 0.93, "learning_rate": 2.704790311170868e-06, "loss": 2.3032, "step": 20200 }, { "epoch": 0.93, "learning_rate": 2.671465358163383e-06, "loss": 2.3031, "step": 20210 }, { "epoch": 0.93, "learning_rate": 2.638344193903197e-06, "loss": 2.3108, "step": 20220 }, { "epoch": 0.93, "learning_rate": 2.60542688774017e-06, "loss": 2.3263, "step": 20230 }, { "epoch": 0.93, "learning_rate": 2.572713508597324e-06, "loss": 2.345, "step": 20240 }, { "epoch": 0.93, "learning_rate": 2.5402041249707133e-06, "loss": 2.2901, "step": 20250 }, { "epoch": 0.93, "learning_rate": 2.5078988049292206e-06, "loss": 2.3061, "step": 20260 }, { "epoch": 0.93, "learning_rate": 2.4757976161144836e-06, "loss": 2.286, "step": 20270 }, { "epoch": 0.93, "learning_rate": 2.443900625740714e-06, "loss": 2.2841, "step": 20280 }, { "epoch": 0.93, "learning_rate": 2.4122079005945763e-06, "loss": 2.3022, "step": 20290 }, { "epoch": 0.93, "learning_rate": 2.380719507035023e-06, "loss": 2.2793, "step": 20300 }, { "epoch": 0.93, "learning_rate": 2.349435510993192e-06, "loss": 2.3001, "step": 20310 }, { "epoch": 0.93, "learning_rate": 2.3183559779722418e-06, "loss": 2.2981, "step": 20320 }, { "epoch": 0.93, "learning_rate": 2.287480973047229e-06, "loss": 2.317, "step": 20330 }, { "epoch": 0.93, "learning_rate": 2.2568105608649417e-06, "loss": 2.3118, "step": 20340 }, { "epoch": 0.93, "learning_rate": 2.22634480564381e-06, "loss": 2.3064, "step": 20350 }, { "epoch": 0.93, "learning_rate": 2.19608377117374e-06, "loss": 2.2939, "step": 20360 }, { "epoch": 0.93, "learning_rate": 2.166027520815972e-06, "loss": 2.2939, "step": 20370 }, { "epoch": 0.93, "learning_rate": 2.1361761175030195e-06, "loss": 2.3047, "step": 20380 }, { "epoch": 0.93, "learning_rate": 2.106529623738407e-06, "loss": 2.3041, "step": 20390 }, { "epoch": 0.94, "learning_rate": 2.077088101596658e-06, "loss": 2.309, "step": 20400 }, { "epoch": 0.94, "learning_rate": 2.0478516127231397e-06, "loss": 2.3226, "step": 20410 }, { "epoch": 0.94, "learning_rate": 2.0188202183338613e-06, "loss": 2.2901, "step": 20420 }, { "epoch": 0.94, "learning_rate": 1.989993979215454e-06, "loss": 2.3097, "step": 20430 }, { "epoch": 0.94, "learning_rate": 1.9613729557249605e-06, "loss": 2.281, "step": 20440 }, { "epoch": 0.94, "learning_rate": 1.932957207789732e-06, "loss": 2.2964, "step": 20450 }, { "epoch": 0.94, "learning_rate": 1.9047467949073416e-06, "loss": 2.2927, "step": 20460 }, { "epoch": 0.94, "learning_rate": 1.8767417761453855e-06, "loss": 2.3154, "step": 20470 }, { "epoch": 0.94, "learning_rate": 1.8489422101414467e-06, "loss": 2.2862, "step": 20480 }, { "epoch": 0.94, "learning_rate": 1.8213481551028867e-06, "loss": 2.3085, "step": 20490 }, { "epoch": 0.94, "learning_rate": 1.7939596688067772e-06, "loss": 2.2972, "step": 20500 }, { "epoch": 0.94, "learning_rate": 1.7667768085997793e-06, "loss": 2.2874, "step": 20510 }, { "epoch": 0.94, "learning_rate": 1.739799631397998e-06, "loss": 2.2767, "step": 20520 }, { "epoch": 0.94, "learning_rate": 1.7130281936868608e-06, "loss": 2.2985, "step": 20530 }, { "epoch": 0.94, "learning_rate": 1.6864625515210287e-06, "loss": 2.2929, "step": 20540 }, { "epoch": 0.94, "learning_rate": 1.6601027605242624e-06, "loss": 2.287, "step": 20550 }, { "epoch": 0.94, "learning_rate": 1.633948875889313e-06, "loss": 2.3101, "step": 20560 }, { "epoch": 0.94, "learning_rate": 1.6080009523777862e-06, "loss": 2.2839, "step": 20570 }, { "epoch": 0.94, "learning_rate": 1.5822590443200446e-06, "loss": 2.2993, "step": 20580 }, { "epoch": 0.94, "learning_rate": 1.556723205615085e-06, "loss": 2.3081, "step": 20590 }, { "epoch": 0.94, "learning_rate": 1.5313934897304595e-06, "loss": 2.2871, "step": 20600 }, { "epoch": 0.94, "learning_rate": 1.5062699497020993e-06, "loss": 2.2979, "step": 20610 }, { "epoch": 0.95, "learning_rate": 1.4813526381342702e-06, "loss": 2.2898, "step": 20620 }, { "epoch": 0.95, "learning_rate": 1.456641607199416e-06, "loss": 2.301, "step": 20630 }, { "epoch": 0.95, "learning_rate": 1.4321369086380488e-06, "loss": 2.2861, "step": 20640 }, { "epoch": 0.95, "learning_rate": 1.4078385937587035e-06, "loss": 2.2985, "step": 20650 }, { "epoch": 0.95, "learning_rate": 1.383746713437728e-06, "loss": 2.2992, "step": 20660 }, { "epoch": 0.95, "learning_rate": 1.3598613181192821e-06, "loss": 2.2902, "step": 20670 }, { "epoch": 0.95, "learning_rate": 1.3361824578151494e-06, "loss": 2.3184, "step": 20680 }, { "epoch": 0.95, "learning_rate": 1.3127101821046927e-06, "loss": 2.3046, "step": 20690 }, { "epoch": 0.95, "learning_rate": 1.2894445401346989e-06, "loss": 2.2876, "step": 20700 }, { "epoch": 0.95, "learning_rate": 1.2663855806193114e-06, "loss": 2.3064, "step": 20710 }, { "epoch": 0.95, "learning_rate": 1.2435333518398984e-06, "loss": 2.2963, "step": 20720 }, { "epoch": 0.95, "learning_rate": 1.220887901645018e-06, "loss": 2.2857, "step": 20730 }, { "epoch": 0.95, "learning_rate": 1.1984492774502088e-06, "loss": 2.3012, "step": 20740 }, { "epoch": 0.95, "learning_rate": 1.1762175262380215e-06, "loss": 2.3118, "step": 20750 }, { "epoch": 0.95, "learning_rate": 1.1541926945577653e-06, "loss": 2.3125, "step": 20760 }, { "epoch": 0.95, "learning_rate": 1.1323748285255953e-06, "loss": 2.279, "step": 20770 }, { "epoch": 0.95, "learning_rate": 1.1107639738242248e-06, "loss": 2.3304, "step": 20780 }, { "epoch": 0.95, "learning_rate": 1.0893601757029913e-06, "loss": 2.3059, "step": 20790 }, { "epoch": 0.95, "learning_rate": 1.0681634789776351e-06, "loss": 2.2903, "step": 20800 }, { "epoch": 0.95, "learning_rate": 1.0471739280303316e-06, "loss": 2.2834, "step": 20810 }, { "epoch": 0.95, "learning_rate": 1.0263915668094593e-06, "loss": 2.2779, "step": 20820 }, { "epoch": 0.96, "learning_rate": 1.005816438829621e-06, "loss": 2.3038, "step": 20830 }, { "epoch": 0.96, "learning_rate": 9.854485871715003e-07, "loss": 2.3111, "step": 20840 }, { "epoch": 0.96, "learning_rate": 9.652880544817612e-07, "loss": 2.3159, "step": 20850 }, { "epoch": 0.96, "learning_rate": 9.453348829730258e-07, "loss": 2.3179, "step": 20860 }, { "epoch": 0.96, "learning_rate": 9.255891144236861e-07, "loss": 2.3159, "step": 20870 }, { "epoch": 0.96, "learning_rate": 9.060507901778925e-07, "loss": 2.3088, "step": 20880 }, { "epoch": 0.96, "learning_rate": 8.867199511454316e-07, "loss": 2.3019, "step": 20890 }, { "epoch": 0.96, "learning_rate": 8.675966378016931e-07, "loss": 2.3064, "step": 20900 }, { "epoch": 0.96, "learning_rate": 8.486808901874921e-07, "loss": 2.3027, "step": 20910 }, { "epoch": 0.96, "learning_rate": 8.299727479090801e-07, "loss": 2.3103, "step": 20920 }, { "epoch": 0.96, "learning_rate": 8.114722501379679e-07, "loss": 2.3127, "step": 20930 }, { "epoch": 0.96, "learning_rate": 7.931794356109579e-07, "loss": 2.2974, "step": 20940 }, { "epoch": 0.96, "learning_rate": 7.750943426299562e-07, "loss": 2.2706, "step": 20950 }, { "epoch": 0.96, "learning_rate": 7.572170090619612e-07, "loss": 2.2955, "step": 20960 }, { "epoch": 0.96, "learning_rate": 7.395474723389306e-07, "loss": 2.2929, "step": 20970 }, { "epoch": 0.96, "learning_rate": 7.220857694577698e-07, "loss": 2.2928, "step": 20980 }, { "epoch": 0.96, "learning_rate": 7.048319369801881e-07, "loss": 2.2958, "step": 20990 }, { "epoch": 0.96, "learning_rate": 6.877860110326651e-07, "loss": 2.2842, "step": 21000 }, { "epoch": 0.96, "learning_rate": 6.709480273063507e-07, "loss": 2.2989, "step": 21010 }, { "epoch": 0.96, "learning_rate": 6.543180210570099e-07, "loss": 2.2944, "step": 21020 }, { "epoch": 0.96, "learning_rate": 6.37896027104945e-07, "loss": 2.2812, "step": 21030 }, { "epoch": 0.96, "learning_rate": 6.216820798348955e-07, "loss": 2.2956, "step": 21040 }, { "epoch": 0.97, "learning_rate": 6.056762131960048e-07, "loss": 2.2813, "step": 21050 }, { "epoch": 0.97, "learning_rate": 5.898784607017205e-07, "loss": 2.2941, "step": 21060 }, { "epoch": 0.97, "learning_rate": 5.74288855429761e-07, "loss": 2.3113, "step": 21070 }, { "epoch": 0.97, "learning_rate": 5.589074300219932e-07, "loss": 2.2998, "step": 21080 }, { "epoch": 0.97, "learning_rate": 5.437342166844106e-07, "loss": 2.2744, "step": 21090 }, { "epoch": 0.97, "learning_rate": 5.287692471870331e-07, "loss": 2.3032, "step": 21100 }, { "epoch": 0.97, "learning_rate": 5.140125528638628e-07, "loss": 2.294, "step": 21110 }, { "epoch": 0.97, "learning_rate": 4.994641646128062e-07, "loss": 2.2723, "step": 21120 }, { "epoch": 0.97, "learning_rate": 4.851241128956186e-07, "loss": 2.3043, "step": 21130 }, { "epoch": 0.97, "learning_rate": 4.7099242773783746e-07, "loss": 2.2919, "step": 21140 }, { "epoch": 0.97, "learning_rate": 4.5706913872871626e-07, "loss": 2.2847, "step": 21150 }, { "epoch": 0.97, "learning_rate": 4.4335427502114613e-07, "loss": 2.3028, "step": 21160 }, { "epoch": 0.97, "learning_rate": 4.298478653316451e-07, "loss": 2.295, "step": 21170 }, { "epoch": 0.97, "learning_rate": 4.16549937940236e-07, "loss": 2.3075, "step": 21180 }, { "epoch": 0.97, "learning_rate": 4.0346052069042404e-07, "loss": 2.286, "step": 21190 }, { "epoch": 0.97, "learning_rate": 3.9057964098915265e-07, "loss": 2.3007, "step": 21200 }, { "epoch": 0.97, "learning_rate": 3.779073258066923e-07, "loss": 2.308, "step": 21210 }, { "epoch": 0.97, "learning_rate": 3.6544360167664047e-07, "loss": 2.3119, "step": 21220 }, { "epoch": 0.97, "learning_rate": 3.5318849469582194e-07, "loss": 2.3065, "step": 21230 }, { "epoch": 0.97, "learning_rate": 3.411420305242774e-07, "loss": 2.3026, "step": 21240 }, { "epoch": 0.97, "learning_rate": 3.293042343851638e-07, "loss": 2.314, "step": 21250 }, { "epoch": 0.97, "learning_rate": 3.176751310647319e-07, "loss": 2.3058, "step": 21260 }, { "epoch": 0.98, "learning_rate": 3.0625474491227104e-07, "loss": 2.3121, "step": 21270 }, { "epoch": 0.98, "learning_rate": 2.950430998400644e-07, "loss": 2.3067, "step": 21280 }, { "epoch": 0.98, "learning_rate": 2.840402193232894e-07, "loss": 2.3006, "step": 21290 }, { "epoch": 0.98, "learning_rate": 2.7324612640005077e-07, "loss": 2.2893, "step": 21300 }, { "epoch": 0.98, "learning_rate": 2.626608436712696e-07, "loss": 2.2836, "step": 21310 }, { "epoch": 0.98, "learning_rate": 2.5228439330065025e-07, "loss": 2.2999, "step": 21320 }, { "epoch": 0.98, "learning_rate": 2.421167970146576e-07, "loss": 2.3, "step": 21330 }, { "epoch": 0.98, "learning_rate": 2.321580761024289e-07, "loss": 2.305, "step": 21340 }, { "epoch": 0.98, "learning_rate": 2.2240825141577327e-07, "loss": 2.3297, "step": 21350 }, { "epoch": 0.98, "learning_rate": 2.128673433690942e-07, "loss": 2.3106, "step": 21360 }, { "epoch": 0.98, "learning_rate": 2.0353537193936735e-07, "loss": 2.2859, "step": 21370 }, { "epoch": 0.98, "learning_rate": 1.9441235666609602e-07, "loss": 2.2827, "step": 21380 }, { "epoch": 0.98, "learning_rate": 1.8549831665124474e-07, "loss": 2.3143, "step": 21390 }, { "epoch": 0.98, "learning_rate": 1.767932705592723e-07, "loss": 2.2779, "step": 21400 }, { "epoch": 0.98, "learning_rate": 1.682972366169655e-07, "loss": 2.2856, "step": 21410 }, { "epoch": 0.98, "learning_rate": 1.6001023261354997e-07, "loss": 2.3009, "step": 21420 }, { "epoch": 0.98, "learning_rate": 1.5193227590053484e-07, "loss": 2.2864, "step": 21430 }, { "epoch": 0.98, "learning_rate": 1.4406338339173487e-07, "loss": 2.3091, "step": 21440 }, { "epoch": 0.98, "learning_rate": 1.3640357156321504e-07, "loss": 2.2979, "step": 21450 }, { "epoch": 0.98, "learning_rate": 1.2895285645326828e-07, "loss": 2.3053, "step": 21460 }, { "epoch": 0.98, "learning_rate": 1.217112536623821e-07, "loss": 2.3144, "step": 21470 }, { "epoch": 0.98, "learning_rate": 1.146787783531833e-07, "loss": 2.29, "step": 21480 }, { "epoch": 0.99, "learning_rate": 1.0785544525044878e-07, "loss": 2.2866, "step": 21490 }, { "epoch": 0.99, "learning_rate": 1.0124126864100581e-07, "loss": 2.2878, "step": 21500 }, { "epoch": 0.99, "learning_rate": 9.483626237379862e-08, "loss": 2.3039, "step": 21510 }, { "epoch": 0.99, "learning_rate": 8.864043985975511e-08, "loss": 2.296, "step": 21520 }, { "epoch": 0.99, "learning_rate": 8.265381407185357e-08, "loss": 2.3034, "step": 21530 }, { "epoch": 0.99, "learning_rate": 7.687639754501152e-08, "loss": 2.2954, "step": 21540 }, { "epoch": 0.99, "learning_rate": 7.130820237611913e-08, "loss": 2.2945, "step": 21550 }, { "epoch": 0.99, "learning_rate": 6.594924022400583e-08, "loss": 2.2996, "step": 21560 }, { "epoch": 0.99, "learning_rate": 6.07995223093849e-08, "loss": 2.2881, "step": 21570 }, { "epoch": 0.99, "learning_rate": 5.5859059414853364e-08, "loss": 2.3122, "step": 21580 }, { "epoch": 0.99, "learning_rate": 5.112786188486984e-08, "loss": 2.3026, "step": 21590 }, { "epoch": 0.99, "learning_rate": 4.660593962572124e-08, "loss": 2.3117, "step": 21600 }, { "epoch": 0.99, "learning_rate": 4.229330210552273e-08, "loss": 2.2909, "step": 21610 }, { "epoch": 0.99, "learning_rate": 3.818995835417338e-08, "loss": 2.3051, "step": 21620 }, { "epoch": 0.99, "learning_rate": 3.42959169633339e-08, "loss": 2.3006, "step": 21630 }, { "epoch": 0.99, "learning_rate": 3.0611186086459963e-08, "loss": 2.2787, "step": 21640 }, { "epoch": 0.99, "learning_rate": 2.7135773438702328e-08, "loss": 2.2938, "step": 21650 }, { "epoch": 0.99, "learning_rate": 2.3869686296984495e-08, "loss": 2.3208, "step": 21660 }, { "epoch": 0.99, "learning_rate": 2.0812931499913922e-08, "loss": 2.306, "step": 21670 }, { "epoch": 0.99, "learning_rate": 1.796551544777092e-08, "loss": 2.2907, "step": 21680 }, { "epoch": 0.99, "learning_rate": 1.532744410257525e-08, "loss": 2.3234, "step": 21690 }, { "epoch": 0.99, "learning_rate": 1.2898722987952916e-08, "loss": 2.2921, "step": 21700 }, { "epoch": 1.0, "learning_rate": 1.067935718922497e-08, "loss": 2.2851, "step": 21710 }, { "epoch": 1.0, "learning_rate": 8.669351353363108e-09, "loss": 2.319, "step": 21720 }, { "epoch": 1.0, "learning_rate": 6.868709688945263e-09, "loss": 2.3035, "step": 21730 }, { "epoch": 1.0, "learning_rate": 5.27743596622221e-09, "loss": 2.3272, "step": 21740 }, { "epoch": 1.0, "learning_rate": 3.8955335170065554e-09, "loss": 2.3093, "step": 21750 }, { "epoch": 1.0, "learning_rate": 2.723005234794851e-09, "loss": 2.3023, "step": 21760 }, { "epoch": 1.0, "learning_rate": 1.7598535746232713e-09, "loss": 2.3055, "step": 21770 }, { "epoch": 1.0, "learning_rate": 1.006080553167532e-09, "loss": 2.2972, "step": 21780 }, { "epoch": 1.0, "learning_rate": 4.616877487095828e-10, "loss": 2.3003, "step": 21790 }, { "epoch": 1.0, "learning_rate": 1.266763011043004e-10, "loss": 2.3028, "step": 21800 }, { "epoch": 1.0, "learning_rate": 1.0469118016942503e-12, "loss": 2.2949, "step": 21810 }, { "epoch": 1.0, "step": 21811, "total_flos": 1.3200227455536307e+20, "train_loss": 2.360168211507445, "train_runtime": 23329.1279, "train_samples_per_second": 119.668, "train_steps_per_second": 0.935 } ], "logging_steps": 10, "max_steps": 21811, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "total_flos": 1.3200227455536307e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }