{ "best_metric": 0.5851995594482614, "best_model_checkpoint": "vit-base-patch16-224-vit-base-patch16\\checkpoint-16086", "epoch": 2.9995804391403667, "eval_steps": 500, "global_step": 16086, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.107520198881293e-07, "loss": 7.9302, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.215040397762586e-07, "loss": 7.9301, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.32256059664388e-07, "loss": 7.93, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.2430080795525172e-06, "loss": 7.9296, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.5537600994406465e-06, "loss": 7.9293, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.864512119328776e-06, "loss": 7.9286, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.175264139216905e-06, "loss": 7.9281, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.4860161591050345e-06, "loss": 7.9278, "step": 80 }, { "epoch": 0.02, "learning_rate": 2.7967681789931635e-06, "loss": 7.926, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.107520198881293e-06, "loss": 7.9253, "step": 100 }, { "epoch": 0.02, "learning_rate": 3.418272218769422e-06, "loss": 7.9242, "step": 110 }, { "epoch": 0.02, "learning_rate": 3.729024238657552e-06, "loss": 7.9225, "step": 120 }, { "epoch": 0.02, "learning_rate": 4.0397762585456806e-06, "loss": 7.9207, "step": 130 }, { "epoch": 0.03, "learning_rate": 4.35052827843381e-06, "loss": 7.9191, "step": 140 }, { "epoch": 0.03, "learning_rate": 4.6612802983219395e-06, "loss": 7.917, "step": 150 }, { "epoch": 0.03, "learning_rate": 4.972032318210069e-06, "loss": 7.9149, "step": 160 }, { "epoch": 0.03, "learning_rate": 5.282784338098198e-06, "loss": 7.9116, "step": 170 }, { "epoch": 0.03, "learning_rate": 5.593536357986327e-06, "loss": 7.9083, "step": 180 }, { "epoch": 0.04, "learning_rate": 5.9042883778744565e-06, "loss": 7.9065, "step": 190 }, { "epoch": 0.04, "learning_rate": 6.215040397762586e-06, "loss": 7.9019, "step": 200 }, { "epoch": 0.04, "learning_rate": 6.5257924176507155e-06, "loss": 7.8957, "step": 210 }, { "epoch": 0.04, "learning_rate": 6.836544437538844e-06, "loss": 7.8927, "step": 220 }, { "epoch": 0.04, "learning_rate": 7.1472964574269735e-06, "loss": 7.8868, "step": 230 }, { "epoch": 0.04, "learning_rate": 7.458048477315104e-06, "loss": 7.8831, "step": 240 }, { "epoch": 0.05, "learning_rate": 7.768800497203232e-06, "loss": 7.8775, "step": 250 }, { "epoch": 0.05, "learning_rate": 8.079552517091361e-06, "loss": 7.8719, "step": 260 }, { "epoch": 0.05, "learning_rate": 8.39030453697949e-06, "loss": 7.8652, "step": 270 }, { "epoch": 0.05, "learning_rate": 8.70105655686762e-06, "loss": 7.8573, "step": 280 }, { "epoch": 0.05, "learning_rate": 9.01180857675575e-06, "loss": 7.8496, "step": 290 }, { "epoch": 0.06, "learning_rate": 9.322560596643879e-06, "loss": 7.8454, "step": 300 }, { "epoch": 0.06, "learning_rate": 9.633312616532007e-06, "loss": 7.8381, "step": 310 }, { "epoch": 0.06, "learning_rate": 9.944064636420138e-06, "loss": 7.8295, "step": 320 }, { "epoch": 0.06, "learning_rate": 1.0254816656308266e-05, "loss": 7.8215, "step": 330 }, { "epoch": 0.06, "learning_rate": 1.0565568676196395e-05, "loss": 7.811, "step": 340 }, { "epoch": 0.07, "learning_rate": 1.0876320696084526e-05, "loss": 7.8033, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.1187072715972654e-05, "loss": 7.7903, "step": 360 }, { "epoch": 0.07, "learning_rate": 1.1497824735860784e-05, "loss": 7.7835, "step": 370 }, { "epoch": 0.07, "learning_rate": 1.1808576755748913e-05, "loss": 7.7719, "step": 380 }, { "epoch": 0.07, "learning_rate": 1.2119328775637043e-05, "loss": 7.7589, "step": 390 }, { "epoch": 0.07, "learning_rate": 1.2430080795525172e-05, "loss": 7.7576, "step": 400 }, { "epoch": 0.08, "learning_rate": 1.27408328154133e-05, "loss": 7.7456, "step": 410 }, { "epoch": 0.08, "learning_rate": 1.3051584835301431e-05, "loss": 7.7309, "step": 420 }, { "epoch": 0.08, "learning_rate": 1.3362336855189559e-05, "loss": 7.724, "step": 430 }, { "epoch": 0.08, "learning_rate": 1.3673088875077688e-05, "loss": 7.7139, "step": 440 }, { "epoch": 0.08, "learning_rate": 1.398384089496582e-05, "loss": 7.7025, "step": 450 }, { "epoch": 0.09, "learning_rate": 1.4294592914853947e-05, "loss": 7.6857, "step": 460 }, { "epoch": 0.09, "learning_rate": 1.4605344934742077e-05, "loss": 7.6762, "step": 470 }, { "epoch": 0.09, "learning_rate": 1.4916096954630208e-05, "loss": 7.6618, "step": 480 }, { "epoch": 0.09, "learning_rate": 1.5226848974518334e-05, "loss": 7.6498, "step": 490 }, { "epoch": 0.09, "learning_rate": 1.5537600994406463e-05, "loss": 7.6415, "step": 500 }, { "epoch": 0.1, "learning_rate": 1.5848353014294593e-05, "loss": 7.6319, "step": 510 }, { "epoch": 0.1, "learning_rate": 1.6159105034182722e-05, "loss": 7.6112, "step": 520 }, { "epoch": 0.1, "learning_rate": 1.646985705407085e-05, "loss": 7.6032, "step": 530 }, { "epoch": 0.1, "learning_rate": 1.678060907395898e-05, "loss": 7.5935, "step": 540 }, { "epoch": 0.1, "learning_rate": 1.709136109384711e-05, "loss": 7.5846, "step": 550 }, { "epoch": 0.1, "learning_rate": 1.740211311373524e-05, "loss": 7.5674, "step": 560 }, { "epoch": 0.11, "learning_rate": 1.771286513362337e-05, "loss": 7.5512, "step": 570 }, { "epoch": 0.11, "learning_rate": 1.80236171535115e-05, "loss": 7.541, "step": 580 }, { "epoch": 0.11, "learning_rate": 1.833436917339963e-05, "loss": 7.5262, "step": 590 }, { "epoch": 0.11, "learning_rate": 1.8645121193287758e-05, "loss": 7.5138, "step": 600 }, { "epoch": 0.11, "learning_rate": 1.8955873213175887e-05, "loss": 7.4991, "step": 610 }, { "epoch": 0.12, "learning_rate": 1.9266625233064014e-05, "loss": 7.4734, "step": 620 }, { "epoch": 0.12, "learning_rate": 1.9577377252952146e-05, "loss": 7.4607, "step": 630 }, { "epoch": 0.12, "learning_rate": 1.9888129272840276e-05, "loss": 7.4621, "step": 640 }, { "epoch": 0.12, "learning_rate": 2.0198881292728402e-05, "loss": 7.4499, "step": 650 }, { "epoch": 0.12, "learning_rate": 2.050963331261653e-05, "loss": 7.4228, "step": 660 }, { "epoch": 0.12, "learning_rate": 2.0820385332504664e-05, "loss": 7.4174, "step": 670 }, { "epoch": 0.13, "learning_rate": 2.113113735239279e-05, "loss": 7.3955, "step": 680 }, { "epoch": 0.13, "learning_rate": 2.144188937228092e-05, "loss": 7.387, "step": 690 }, { "epoch": 0.13, "learning_rate": 2.1752641392169053e-05, "loss": 7.3643, "step": 700 }, { "epoch": 0.13, "learning_rate": 2.206339341205718e-05, "loss": 7.3718, "step": 710 }, { "epoch": 0.13, "learning_rate": 2.2374145431945308e-05, "loss": 7.3485, "step": 720 }, { "epoch": 0.14, "learning_rate": 2.2684897451833438e-05, "loss": 7.3414, "step": 730 }, { "epoch": 0.14, "learning_rate": 2.2995649471721567e-05, "loss": 7.3117, "step": 740 }, { "epoch": 0.14, "learning_rate": 2.3306401491609697e-05, "loss": 7.2965, "step": 750 }, { "epoch": 0.14, "learning_rate": 2.3617153511497826e-05, "loss": 7.3053, "step": 760 }, { "epoch": 0.14, "learning_rate": 2.3927905531385956e-05, "loss": 7.2712, "step": 770 }, { "epoch": 0.15, "learning_rate": 2.4238657551274085e-05, "loss": 7.2515, "step": 780 }, { "epoch": 0.15, "learning_rate": 2.4549409571162214e-05, "loss": 7.2411, "step": 790 }, { "epoch": 0.15, "learning_rate": 2.4860161591050344e-05, "loss": 7.2357, "step": 800 }, { "epoch": 0.15, "learning_rate": 2.5170913610938473e-05, "loss": 7.2151, "step": 810 }, { "epoch": 0.15, "learning_rate": 2.54816656308266e-05, "loss": 7.1943, "step": 820 }, { "epoch": 0.15, "learning_rate": 2.579241765071473e-05, "loss": 7.1974, "step": 830 }, { "epoch": 0.16, "learning_rate": 2.6103169670602862e-05, "loss": 7.2105, "step": 840 }, { "epoch": 0.16, "learning_rate": 2.6413921690490988e-05, "loss": 7.156, "step": 850 }, { "epoch": 0.16, "learning_rate": 2.6724673710379117e-05, "loss": 7.1484, "step": 860 }, { "epoch": 0.16, "learning_rate": 2.703542573026725e-05, "loss": 7.1293, "step": 870 }, { "epoch": 0.16, "learning_rate": 2.7346177750155376e-05, "loss": 7.1283, "step": 880 }, { "epoch": 0.17, "learning_rate": 2.7656929770043506e-05, "loss": 7.1159, "step": 890 }, { "epoch": 0.17, "learning_rate": 2.796768178993164e-05, "loss": 7.0768, "step": 900 }, { "epoch": 0.17, "learning_rate": 2.8278433809819765e-05, "loss": 7.0734, "step": 910 }, { "epoch": 0.17, "learning_rate": 2.8589185829707894e-05, "loss": 7.068, "step": 920 }, { "epoch": 0.17, "learning_rate": 2.8899937849596027e-05, "loss": 7.0497, "step": 930 }, { "epoch": 0.18, "learning_rate": 2.9210689869484153e-05, "loss": 7.0373, "step": 940 }, { "epoch": 0.18, "learning_rate": 2.9521441889372283e-05, "loss": 7.0332, "step": 950 }, { "epoch": 0.18, "learning_rate": 2.9832193909260415e-05, "loss": 7.0304, "step": 960 }, { "epoch": 0.18, "learning_rate": 3.014294592914854e-05, "loss": 7.0186, "step": 970 }, { "epoch": 0.18, "learning_rate": 3.0453697949036668e-05, "loss": 6.9994, "step": 980 }, { "epoch": 0.18, "learning_rate": 3.0764449968924804e-05, "loss": 6.988, "step": 990 }, { "epoch": 0.19, "learning_rate": 3.1075201988812927e-05, "loss": 6.9575, "step": 1000 }, { "epoch": 0.19, "learning_rate": 3.1385954008701056e-05, "loss": 6.9407, "step": 1010 }, { "epoch": 0.19, "learning_rate": 3.1696706028589185e-05, "loss": 6.9424, "step": 1020 }, { "epoch": 0.19, "learning_rate": 3.2007458048477315e-05, "loss": 6.9267, "step": 1030 }, { "epoch": 0.19, "learning_rate": 3.2318210068365444e-05, "loss": 6.9348, "step": 1040 }, { "epoch": 0.2, "learning_rate": 3.2628962088253574e-05, "loss": 6.9101, "step": 1050 }, { "epoch": 0.2, "learning_rate": 3.29397141081417e-05, "loss": 6.8636, "step": 1060 }, { "epoch": 0.2, "learning_rate": 3.325046612802983e-05, "loss": 6.8813, "step": 1070 }, { "epoch": 0.2, "learning_rate": 3.356121814791796e-05, "loss": 6.8706, "step": 1080 }, { "epoch": 0.2, "learning_rate": 3.387197016780609e-05, "loss": 6.8722, "step": 1090 }, { "epoch": 0.21, "learning_rate": 3.418272218769422e-05, "loss": 6.8492, "step": 1100 }, { "epoch": 0.21, "learning_rate": 3.449347420758235e-05, "loss": 6.8014, "step": 1110 }, { "epoch": 0.21, "learning_rate": 3.480422622747048e-05, "loss": 6.8025, "step": 1120 }, { "epoch": 0.21, "learning_rate": 3.511497824735861e-05, "loss": 6.8108, "step": 1130 }, { "epoch": 0.21, "learning_rate": 3.542573026724674e-05, "loss": 6.8155, "step": 1140 }, { "epoch": 0.21, "learning_rate": 3.573648228713487e-05, "loss": 6.7615, "step": 1150 }, { "epoch": 0.22, "learning_rate": 3.6047234307023e-05, "loss": 6.7385, "step": 1160 }, { "epoch": 0.22, "learning_rate": 3.635798632691113e-05, "loss": 6.7358, "step": 1170 }, { "epoch": 0.22, "learning_rate": 3.666873834679926e-05, "loss": 6.7388, "step": 1180 }, { "epoch": 0.22, "learning_rate": 3.6979490366687386e-05, "loss": 6.7352, "step": 1190 }, { "epoch": 0.22, "learning_rate": 3.7290242386575516e-05, "loss": 6.7093, "step": 1200 }, { "epoch": 0.23, "learning_rate": 3.7600994406463645e-05, "loss": 6.7211, "step": 1210 }, { "epoch": 0.23, "learning_rate": 3.7911746426351775e-05, "loss": 6.6963, "step": 1220 }, { "epoch": 0.23, "learning_rate": 3.8222498446239904e-05, "loss": 6.6921, "step": 1230 }, { "epoch": 0.23, "learning_rate": 3.853325046612803e-05, "loss": 6.6363, "step": 1240 }, { "epoch": 0.23, "learning_rate": 3.884400248601616e-05, "loss": 6.6501, "step": 1250 }, { "epoch": 0.23, "learning_rate": 3.915475450590429e-05, "loss": 6.6199, "step": 1260 }, { "epoch": 0.24, "learning_rate": 3.9465506525792415e-05, "loss": 6.5996, "step": 1270 }, { "epoch": 0.24, "learning_rate": 3.977625854568055e-05, "loss": 6.6274, "step": 1280 }, { "epoch": 0.24, "learning_rate": 4.008701056556868e-05, "loss": 6.5743, "step": 1290 }, { "epoch": 0.24, "learning_rate": 4.0397762585456804e-05, "loss": 6.5633, "step": 1300 }, { "epoch": 0.24, "learning_rate": 4.070851460534494e-05, "loss": 6.5602, "step": 1310 }, { "epoch": 0.25, "learning_rate": 4.101926662523306e-05, "loss": 6.5558, "step": 1320 }, { "epoch": 0.25, "learning_rate": 4.133001864512119e-05, "loss": 6.5785, "step": 1330 }, { "epoch": 0.25, "learning_rate": 4.164077066500933e-05, "loss": 6.5247, "step": 1340 }, { "epoch": 0.25, "learning_rate": 4.195152268489745e-05, "loss": 6.5409, "step": 1350 }, { "epoch": 0.25, "learning_rate": 4.226227470478558e-05, "loss": 6.4857, "step": 1360 }, { "epoch": 0.26, "learning_rate": 4.257302672467372e-05, "loss": 6.5344, "step": 1370 }, { "epoch": 0.26, "learning_rate": 4.288377874456184e-05, "loss": 6.5113, "step": 1380 }, { "epoch": 0.26, "learning_rate": 4.319453076444997e-05, "loss": 6.4928, "step": 1390 }, { "epoch": 0.26, "learning_rate": 4.3505282784338105e-05, "loss": 6.4826, "step": 1400 }, { "epoch": 0.26, "learning_rate": 4.381603480422623e-05, "loss": 6.4621, "step": 1410 }, { "epoch": 0.26, "learning_rate": 4.412678682411436e-05, "loss": 6.4211, "step": 1420 }, { "epoch": 0.27, "learning_rate": 4.4437538844002494e-05, "loss": 6.4753, "step": 1430 }, { "epoch": 0.27, "learning_rate": 4.4748290863890616e-05, "loss": 6.4343, "step": 1440 }, { "epoch": 0.27, "learning_rate": 4.5059042883778746e-05, "loss": 6.4252, "step": 1450 }, { "epoch": 0.27, "learning_rate": 4.5369794903666875e-05, "loss": 6.3907, "step": 1460 }, { "epoch": 0.27, "learning_rate": 4.5680546923555005e-05, "loss": 6.383, "step": 1470 }, { "epoch": 0.28, "learning_rate": 4.5991298943443134e-05, "loss": 6.378, "step": 1480 }, { "epoch": 0.28, "learning_rate": 4.6302050963331264e-05, "loss": 6.3726, "step": 1490 }, { "epoch": 0.28, "learning_rate": 4.661280298321939e-05, "loss": 6.3738, "step": 1500 }, { "epoch": 0.28, "learning_rate": 4.692355500310752e-05, "loss": 6.3397, "step": 1510 }, { "epoch": 0.28, "learning_rate": 4.723430702299565e-05, "loss": 6.351, "step": 1520 }, { "epoch": 0.29, "learning_rate": 4.754505904288378e-05, "loss": 6.3207, "step": 1530 }, { "epoch": 0.29, "learning_rate": 4.785581106277191e-05, "loss": 6.3161, "step": 1540 }, { "epoch": 0.29, "learning_rate": 4.816656308266004e-05, "loss": 6.3103, "step": 1550 }, { "epoch": 0.29, "learning_rate": 4.847731510254817e-05, "loss": 6.291, "step": 1560 }, { "epoch": 0.29, "learning_rate": 4.87880671224363e-05, "loss": 6.2851, "step": 1570 }, { "epoch": 0.29, "learning_rate": 4.909881914232443e-05, "loss": 6.2605, "step": 1580 }, { "epoch": 0.3, "learning_rate": 4.940957116221256e-05, "loss": 6.2434, "step": 1590 }, { "epoch": 0.3, "learning_rate": 4.972032318210069e-05, "loss": 6.2739, "step": 1600 }, { "epoch": 0.3, "learning_rate": 4.999654624576915e-05, "loss": 6.2309, "step": 1610 }, { "epoch": 0.3, "learning_rate": 4.996200870346067e-05, "loss": 6.2764, "step": 1620 }, { "epoch": 0.3, "learning_rate": 4.992747116115217e-05, "loss": 6.1911, "step": 1630 }, { "epoch": 0.31, "learning_rate": 4.989293361884368e-05, "loss": 6.193, "step": 1640 }, { "epoch": 0.31, "learning_rate": 4.98583960765352e-05, "loss": 6.1912, "step": 1650 }, { "epoch": 0.31, "learning_rate": 4.982385853422671e-05, "loss": 6.1854, "step": 1660 }, { "epoch": 0.31, "learning_rate": 4.978932099191822e-05, "loss": 6.1694, "step": 1670 }, { "epoch": 0.31, "learning_rate": 4.9754783449609724e-05, "loss": 6.1337, "step": 1680 }, { "epoch": 0.32, "learning_rate": 4.972024590730124e-05, "loss": 6.1062, "step": 1690 }, { "epoch": 0.32, "learning_rate": 4.968570836499275e-05, "loss": 6.132, "step": 1700 }, { "epoch": 0.32, "learning_rate": 4.965117082268426e-05, "loss": 6.178, "step": 1710 }, { "epoch": 0.32, "learning_rate": 4.961663328037577e-05, "loss": 6.1392, "step": 1720 }, { "epoch": 0.32, "learning_rate": 4.958209573806728e-05, "loss": 6.1331, "step": 1730 }, { "epoch": 0.32, "learning_rate": 4.954755819575879e-05, "loss": 6.0742, "step": 1740 }, { "epoch": 0.33, "learning_rate": 4.95130206534503e-05, "loss": 6.0837, "step": 1750 }, { "epoch": 0.33, "learning_rate": 4.947848311114181e-05, "loss": 6.0774, "step": 1760 }, { "epoch": 0.33, "learning_rate": 4.944394556883332e-05, "loss": 6.0711, "step": 1770 }, { "epoch": 0.33, "learning_rate": 4.9409408026524834e-05, "loss": 6.0922, "step": 1780 }, { "epoch": 0.33, "learning_rate": 4.9374870484216344e-05, "loss": 6.041, "step": 1790 }, { "epoch": 0.34, "learning_rate": 4.9340332941907854e-05, "loss": 6.0081, "step": 1800 }, { "epoch": 0.34, "learning_rate": 4.930579539959937e-05, "loss": 5.9962, "step": 1810 }, { "epoch": 0.34, "learning_rate": 4.9271257857290875e-05, "loss": 6.0346, "step": 1820 }, { "epoch": 0.34, "learning_rate": 4.9236720314982385e-05, "loss": 6.0221, "step": 1830 }, { "epoch": 0.34, "learning_rate": 4.92021827726739e-05, "loss": 6.0185, "step": 1840 }, { "epoch": 0.34, "learning_rate": 4.916764523036541e-05, "loss": 5.9659, "step": 1850 }, { "epoch": 0.35, "learning_rate": 4.913310768805692e-05, "loss": 5.9443, "step": 1860 }, { "epoch": 0.35, "learning_rate": 4.9098570145748426e-05, "loss": 5.9759, "step": 1870 }, { "epoch": 0.35, "learning_rate": 4.9064032603439943e-05, "loss": 5.973, "step": 1880 }, { "epoch": 0.35, "learning_rate": 4.9029495061131454e-05, "loss": 5.9397, "step": 1890 }, { "epoch": 0.35, "learning_rate": 4.8994957518822964e-05, "loss": 5.9718, "step": 1900 }, { "epoch": 0.36, "learning_rate": 4.8960419976514474e-05, "loss": 5.8958, "step": 1910 }, { "epoch": 0.36, "learning_rate": 4.8925882434205985e-05, "loss": 5.9235, "step": 1920 }, { "epoch": 0.36, "learning_rate": 4.8891344891897495e-05, "loss": 5.8631, "step": 1930 }, { "epoch": 0.36, "learning_rate": 4.8856807349589005e-05, "loss": 5.8879, "step": 1940 }, { "epoch": 0.36, "learning_rate": 4.8822269807280516e-05, "loss": 5.9051, "step": 1950 }, { "epoch": 0.37, "learning_rate": 4.8787732264972026e-05, "loss": 5.8506, "step": 1960 }, { "epoch": 0.37, "learning_rate": 4.8753194722663536e-05, "loss": 5.8902, "step": 1970 }, { "epoch": 0.37, "learning_rate": 4.8718657180355047e-05, "loss": 5.8543, "step": 1980 }, { "epoch": 0.37, "learning_rate": 4.868411963804656e-05, "loss": 5.8779, "step": 1990 }, { "epoch": 0.37, "learning_rate": 4.8649582095738074e-05, "loss": 5.859, "step": 2000 }, { "epoch": 0.37, "learning_rate": 4.861504455342958e-05, "loss": 5.8649, "step": 2010 }, { "epoch": 0.38, "learning_rate": 4.858050701112109e-05, "loss": 5.8307, "step": 2020 }, { "epoch": 0.38, "learning_rate": 4.8545969468812605e-05, "loss": 5.7816, "step": 2030 }, { "epoch": 0.38, "learning_rate": 4.8511431926504115e-05, "loss": 5.7834, "step": 2040 }, { "epoch": 0.38, "learning_rate": 4.847689438419562e-05, "loss": 5.8585, "step": 2050 }, { "epoch": 0.38, "learning_rate": 4.844235684188713e-05, "loss": 5.758, "step": 2060 }, { "epoch": 0.39, "learning_rate": 4.8407819299578646e-05, "loss": 5.7945, "step": 2070 }, { "epoch": 0.39, "learning_rate": 4.8373281757270157e-05, "loss": 5.7647, "step": 2080 }, { "epoch": 0.39, "learning_rate": 4.833874421496167e-05, "loss": 5.7845, "step": 2090 }, { "epoch": 0.39, "learning_rate": 4.830420667265318e-05, "loss": 5.7382, "step": 2100 }, { "epoch": 0.39, "learning_rate": 4.826966913034469e-05, "loss": 5.7743, "step": 2110 }, { "epoch": 0.4, "learning_rate": 4.82351315880362e-05, "loss": 5.7199, "step": 2120 }, { "epoch": 0.4, "learning_rate": 4.820059404572771e-05, "loss": 5.7205, "step": 2130 }, { "epoch": 0.4, "learning_rate": 4.816605650341922e-05, "loss": 5.7001, "step": 2140 }, { "epoch": 0.4, "learning_rate": 4.813151896111073e-05, "loss": 5.7475, "step": 2150 }, { "epoch": 0.4, "learning_rate": 4.809698141880224e-05, "loss": 5.735, "step": 2160 }, { "epoch": 0.4, "learning_rate": 4.806244387649375e-05, "loss": 5.7608, "step": 2170 }, { "epoch": 0.41, "learning_rate": 4.8027906334185266e-05, "loss": 5.6663, "step": 2180 }, { "epoch": 0.41, "learning_rate": 4.799336879187678e-05, "loss": 5.6878, "step": 2190 }, { "epoch": 0.41, "learning_rate": 4.795883124956828e-05, "loss": 5.6914, "step": 2200 }, { "epoch": 0.41, "learning_rate": 4.792429370725979e-05, "loss": 5.6306, "step": 2210 }, { "epoch": 0.41, "learning_rate": 4.788975616495131e-05, "loss": 5.6178, "step": 2220 }, { "epoch": 0.42, "learning_rate": 4.785521862264282e-05, "loss": 5.679, "step": 2230 }, { "epoch": 0.42, "learning_rate": 4.782068108033432e-05, "loss": 5.6543, "step": 2240 }, { "epoch": 0.42, "learning_rate": 4.778614353802583e-05, "loss": 5.6911, "step": 2250 }, { "epoch": 0.42, "learning_rate": 4.775160599571735e-05, "loss": 5.6754, "step": 2260 }, { "epoch": 0.42, "learning_rate": 4.771706845340886e-05, "loss": 5.6252, "step": 2270 }, { "epoch": 0.43, "learning_rate": 4.768253091110037e-05, "loss": 5.6094, "step": 2280 }, { "epoch": 0.43, "learning_rate": 4.764799336879188e-05, "loss": 5.599, "step": 2290 }, { "epoch": 0.43, "learning_rate": 4.761345582648339e-05, "loss": 5.6413, "step": 2300 }, { "epoch": 0.43, "learning_rate": 4.75789182841749e-05, "loss": 5.6193, "step": 2310 }, { "epoch": 0.43, "learning_rate": 4.754438074186641e-05, "loss": 5.5898, "step": 2320 }, { "epoch": 0.43, "learning_rate": 4.750984319955792e-05, "loss": 5.572, "step": 2330 }, { "epoch": 0.44, "learning_rate": 4.747530565724943e-05, "loss": 5.6013, "step": 2340 }, { "epoch": 0.44, "learning_rate": 4.744076811494094e-05, "loss": 5.5543, "step": 2350 }, { "epoch": 0.44, "learning_rate": 4.740623057263245e-05, "loss": 5.5415, "step": 2360 }, { "epoch": 0.44, "learning_rate": 4.737169303032397e-05, "loss": 5.5246, "step": 2370 }, { "epoch": 0.44, "learning_rate": 4.733715548801548e-05, "loss": 5.5657, "step": 2380 }, { "epoch": 0.45, "learning_rate": 4.730261794570698e-05, "loss": 5.5453, "step": 2390 }, { "epoch": 0.45, "learning_rate": 4.7268080403398493e-05, "loss": 5.5467, "step": 2400 }, { "epoch": 0.45, "learning_rate": 4.723354286109001e-05, "loss": 5.5455, "step": 2410 }, { "epoch": 0.45, "learning_rate": 4.719900531878152e-05, "loss": 5.5253, "step": 2420 }, { "epoch": 0.45, "learning_rate": 4.7164467776473024e-05, "loss": 5.4831, "step": 2430 }, { "epoch": 0.45, "learning_rate": 4.7129930234164535e-05, "loss": 5.4704, "step": 2440 }, { "epoch": 0.46, "learning_rate": 4.709539269185605e-05, "loss": 5.4801, "step": 2450 }, { "epoch": 0.46, "learning_rate": 4.706085514954756e-05, "loss": 5.48, "step": 2460 }, { "epoch": 0.46, "learning_rate": 4.702631760723907e-05, "loss": 5.5388, "step": 2470 }, { "epoch": 0.46, "learning_rate": 4.699178006493058e-05, "loss": 5.4883, "step": 2480 }, { "epoch": 0.46, "learning_rate": 4.695724252262209e-05, "loss": 5.4321, "step": 2490 }, { "epoch": 0.47, "learning_rate": 4.69227049803136e-05, "loss": 5.4297, "step": 2500 }, { "epoch": 0.47, "learning_rate": 4.6888167438005114e-05, "loss": 5.4174, "step": 2510 }, { "epoch": 0.47, "learning_rate": 4.6853629895696624e-05, "loss": 5.5185, "step": 2520 }, { "epoch": 0.47, "learning_rate": 4.6819092353388134e-05, "loss": 5.4269, "step": 2530 }, { "epoch": 0.47, "learning_rate": 4.6784554811079645e-05, "loss": 5.4688, "step": 2540 }, { "epoch": 0.48, "learning_rate": 4.6750017268771155e-05, "loss": 5.384, "step": 2550 }, { "epoch": 0.48, "learning_rate": 4.671547972646267e-05, "loss": 5.4662, "step": 2560 }, { "epoch": 0.48, "learning_rate": 4.668094218415418e-05, "loss": 5.4473, "step": 2570 }, { "epoch": 0.48, "learning_rate": 4.6646404641845686e-05, "loss": 5.4024, "step": 2580 }, { "epoch": 0.48, "learning_rate": 4.6611867099537196e-05, "loss": 5.3608, "step": 2590 }, { "epoch": 0.48, "learning_rate": 4.657732955722871e-05, "loss": 5.4895, "step": 2600 }, { "epoch": 0.49, "learning_rate": 4.6542792014920224e-05, "loss": 5.3868, "step": 2610 }, { "epoch": 0.49, "learning_rate": 4.650825447261173e-05, "loss": 5.3681, "step": 2620 }, { "epoch": 0.49, "learning_rate": 4.647371693030324e-05, "loss": 5.4223, "step": 2630 }, { "epoch": 0.49, "learning_rate": 4.6439179387994755e-05, "loss": 5.412, "step": 2640 }, { "epoch": 0.49, "learning_rate": 4.6404641845686265e-05, "loss": 5.3381, "step": 2650 }, { "epoch": 0.5, "learning_rate": 4.6370104303377775e-05, "loss": 5.3195, "step": 2660 }, { "epoch": 0.5, "learning_rate": 4.6335566761069285e-05, "loss": 5.3945, "step": 2670 }, { "epoch": 0.5, "learning_rate": 4.6301029218760796e-05, "loss": 5.3316, "step": 2680 }, { "epoch": 0.5, "learning_rate": 4.6266491676452306e-05, "loss": 5.3232, "step": 2690 }, { "epoch": 0.5, "learning_rate": 4.6231954134143816e-05, "loss": 5.3246, "step": 2700 }, { "epoch": 0.51, "learning_rate": 4.619741659183533e-05, "loss": 5.3445, "step": 2710 }, { "epoch": 0.51, "learning_rate": 4.616287904952684e-05, "loss": 5.2847, "step": 2720 }, { "epoch": 0.51, "learning_rate": 4.612834150721835e-05, "loss": 5.2795, "step": 2730 }, { "epoch": 0.51, "learning_rate": 4.609380396490986e-05, "loss": 5.2559, "step": 2740 }, { "epoch": 0.51, "learning_rate": 4.6059266422601375e-05, "loss": 5.3091, "step": 2750 }, { "epoch": 0.51, "learning_rate": 4.6024728880292885e-05, "loss": 5.2441, "step": 2760 }, { "epoch": 0.52, "learning_rate": 4.599019133798439e-05, "loss": 5.2534, "step": 2770 }, { "epoch": 0.52, "learning_rate": 4.59556537956759e-05, "loss": 5.2869, "step": 2780 }, { "epoch": 0.52, "learning_rate": 4.5921116253367416e-05, "loss": 5.2629, "step": 2790 }, { "epoch": 0.52, "learning_rate": 4.5886578711058926e-05, "loss": 5.2835, "step": 2800 }, { "epoch": 0.52, "learning_rate": 4.585204116875043e-05, "loss": 5.2437, "step": 2810 }, { "epoch": 0.53, "learning_rate": 4.581750362644194e-05, "loss": 5.2736, "step": 2820 }, { "epoch": 0.53, "learning_rate": 4.578296608413346e-05, "loss": 5.2331, "step": 2830 }, { "epoch": 0.53, "learning_rate": 4.574842854182497e-05, "loss": 5.2059, "step": 2840 }, { "epoch": 0.53, "learning_rate": 4.571389099951648e-05, "loss": 5.2348, "step": 2850 }, { "epoch": 0.53, "learning_rate": 4.567935345720799e-05, "loss": 5.2183, "step": 2860 }, { "epoch": 0.54, "learning_rate": 4.56448159148995e-05, "loss": 5.1723, "step": 2870 }, { "epoch": 0.54, "learning_rate": 4.561027837259101e-05, "loss": 5.206, "step": 2880 }, { "epoch": 0.54, "learning_rate": 4.557574083028252e-05, "loss": 5.276, "step": 2890 }, { "epoch": 0.54, "learning_rate": 4.554120328797403e-05, "loss": 5.1271, "step": 2900 }, { "epoch": 0.54, "learning_rate": 4.550666574566554e-05, "loss": 5.1887, "step": 2910 }, { "epoch": 0.54, "learning_rate": 4.547212820335705e-05, "loss": 5.2678, "step": 2920 }, { "epoch": 0.55, "learning_rate": 4.543759066104856e-05, "loss": 5.2341, "step": 2930 }, { "epoch": 0.55, "learning_rate": 4.540305311874008e-05, "loss": 5.2136, "step": 2940 }, { "epoch": 0.55, "learning_rate": 4.536851557643158e-05, "loss": 5.1565, "step": 2950 }, { "epoch": 0.55, "learning_rate": 4.533397803412309e-05, "loss": 5.1883, "step": 2960 }, { "epoch": 0.55, "learning_rate": 4.52994404918146e-05, "loss": 5.187, "step": 2970 }, { "epoch": 0.56, "learning_rate": 4.526490294950612e-05, "loss": 5.145, "step": 2980 }, { "epoch": 0.56, "learning_rate": 4.523036540719763e-05, "loss": 5.1121, "step": 2990 }, { "epoch": 0.56, "learning_rate": 4.519582786488913e-05, "loss": 5.063, "step": 3000 }, { "epoch": 0.56, "learning_rate": 4.516129032258064e-05, "loss": 5.157, "step": 3010 }, { "epoch": 0.56, "learning_rate": 4.512675278027216e-05, "loss": 5.1123, "step": 3020 }, { "epoch": 0.57, "learning_rate": 4.509221523796367e-05, "loss": 5.115, "step": 3030 }, { "epoch": 0.57, "learning_rate": 4.505767769565518e-05, "loss": 5.1296, "step": 3040 }, { "epoch": 0.57, "learning_rate": 4.502314015334669e-05, "loss": 5.1624, "step": 3050 }, { "epoch": 0.57, "learning_rate": 4.49886026110382e-05, "loss": 5.0889, "step": 3060 }, { "epoch": 0.57, "learning_rate": 4.495406506872971e-05, "loss": 5.0914, "step": 3070 }, { "epoch": 0.57, "learning_rate": 4.491952752642122e-05, "loss": 5.1042, "step": 3080 }, { "epoch": 0.58, "learning_rate": 4.488498998411273e-05, "loss": 5.0769, "step": 3090 }, { "epoch": 0.58, "learning_rate": 4.485045244180424e-05, "loss": 5.0678, "step": 3100 }, { "epoch": 0.58, "learning_rate": 4.481591489949575e-05, "loss": 5.0928, "step": 3110 }, { "epoch": 0.58, "learning_rate": 4.478137735718726e-05, "loss": 5.0958, "step": 3120 }, { "epoch": 0.58, "learning_rate": 4.474683981487878e-05, "loss": 5.0713, "step": 3130 }, { "epoch": 0.59, "learning_rate": 4.4712302272570284e-05, "loss": 5.0995, "step": 3140 }, { "epoch": 0.59, "learning_rate": 4.4677764730261794e-05, "loss": 5.0845, "step": 3150 }, { "epoch": 0.59, "learning_rate": 4.4643227187953304e-05, "loss": 5.0856, "step": 3160 }, { "epoch": 0.59, "learning_rate": 4.460868964564482e-05, "loss": 5.0638, "step": 3170 }, { "epoch": 0.59, "learning_rate": 4.457415210333633e-05, "loss": 5.0594, "step": 3180 }, { "epoch": 0.59, "learning_rate": 4.4539614561027835e-05, "loss": 5.0941, "step": 3190 }, { "epoch": 0.6, "learning_rate": 4.4505077018719346e-05, "loss": 5.0457, "step": 3200 }, { "epoch": 0.6, "learning_rate": 4.447053947641086e-05, "loss": 4.9635, "step": 3210 }, { "epoch": 0.6, "learning_rate": 4.443600193410237e-05, "loss": 4.9817, "step": 3220 }, { "epoch": 0.6, "learning_rate": 4.4401464391793883e-05, "loss": 4.989, "step": 3230 }, { "epoch": 0.6, "learning_rate": 4.4366926849485394e-05, "loss": 4.9672, "step": 3240 }, { "epoch": 0.61, "learning_rate": 4.4332389307176904e-05, "loss": 5.008, "step": 3250 }, { "epoch": 0.61, "learning_rate": 4.4297851764868414e-05, "loss": 4.9655, "step": 3260 }, { "epoch": 0.61, "learning_rate": 4.4263314222559925e-05, "loss": 4.9778, "step": 3270 }, { "epoch": 0.61, "learning_rate": 4.4228776680251435e-05, "loss": 4.9593, "step": 3280 }, { "epoch": 0.61, "learning_rate": 4.4194239137942945e-05, "loss": 4.9961, "step": 3290 }, { "epoch": 0.62, "learning_rate": 4.4159701595634456e-05, "loss": 4.9818, "step": 3300 }, { "epoch": 0.62, "learning_rate": 4.4125164053325966e-05, "loss": 4.913, "step": 3310 }, { "epoch": 0.62, "learning_rate": 4.409062651101748e-05, "loss": 4.9754, "step": 3320 }, { "epoch": 0.62, "learning_rate": 4.4056088968708987e-05, "loss": 4.9515, "step": 3330 }, { "epoch": 0.62, "learning_rate": 4.40215514264005e-05, "loss": 4.9495, "step": 3340 }, { "epoch": 0.62, "learning_rate": 4.398701388409201e-05, "loss": 4.9268, "step": 3350 }, { "epoch": 0.63, "learning_rate": 4.3952476341783524e-05, "loss": 4.9985, "step": 3360 }, { "epoch": 0.63, "learning_rate": 4.3917938799475035e-05, "loss": 4.9254, "step": 3370 }, { "epoch": 0.63, "learning_rate": 4.388340125716654e-05, "loss": 4.9554, "step": 3380 }, { "epoch": 0.63, "learning_rate": 4.384886371485805e-05, "loss": 4.9328, "step": 3390 }, { "epoch": 0.63, "learning_rate": 4.3814326172549566e-05, "loss": 4.9389, "step": 3400 }, { "epoch": 0.64, "learning_rate": 4.3779788630241076e-05, "loss": 4.9226, "step": 3410 }, { "epoch": 0.64, "learning_rate": 4.3745251087932586e-05, "loss": 4.9134, "step": 3420 }, { "epoch": 0.64, "learning_rate": 4.3710713545624097e-05, "loss": 4.9435, "step": 3430 }, { "epoch": 0.64, "learning_rate": 4.367617600331561e-05, "loss": 4.9202, "step": 3440 }, { "epoch": 0.64, "learning_rate": 4.364163846100712e-05, "loss": 4.9282, "step": 3450 }, { "epoch": 0.65, "learning_rate": 4.360710091869863e-05, "loss": 4.8598, "step": 3460 }, { "epoch": 0.65, "learning_rate": 4.357256337639014e-05, "loss": 4.9474, "step": 3470 }, { "epoch": 0.65, "learning_rate": 4.353802583408165e-05, "loss": 4.8958, "step": 3480 }, { "epoch": 0.65, "learning_rate": 4.350348829177316e-05, "loss": 4.8854, "step": 3490 }, { "epoch": 0.65, "learning_rate": 4.346895074946467e-05, "loss": 4.8184, "step": 3500 }, { "epoch": 0.65, "learning_rate": 4.3434413207156186e-05, "loss": 4.9108, "step": 3510 }, { "epoch": 0.66, "learning_rate": 4.339987566484769e-05, "loss": 4.8647, "step": 3520 }, { "epoch": 0.66, "learning_rate": 4.33653381225392e-05, "loss": 4.8807, "step": 3530 }, { "epoch": 0.66, "learning_rate": 4.333080058023071e-05, "loss": 4.8766, "step": 3540 }, { "epoch": 0.66, "learning_rate": 4.329626303792223e-05, "loss": 4.8508, "step": 3550 }, { "epoch": 0.66, "learning_rate": 4.326172549561374e-05, "loss": 4.8532, "step": 3560 }, { "epoch": 0.67, "learning_rate": 4.322718795330524e-05, "loss": 4.8687, "step": 3570 }, { "epoch": 0.67, "learning_rate": 4.319265041099675e-05, "loss": 4.7944, "step": 3580 }, { "epoch": 0.67, "learning_rate": 4.315811286868827e-05, "loss": 4.7731, "step": 3590 }, { "epoch": 0.67, "learning_rate": 4.312357532637978e-05, "loss": 4.8183, "step": 3600 }, { "epoch": 0.67, "learning_rate": 4.308903778407129e-05, "loss": 4.7874, "step": 3610 }, { "epoch": 0.68, "learning_rate": 4.30545002417628e-05, "loss": 4.7327, "step": 3620 }, { "epoch": 0.68, "learning_rate": 4.301996269945431e-05, "loss": 4.8096, "step": 3630 }, { "epoch": 0.68, "learning_rate": 4.298542515714582e-05, "loss": 4.738, "step": 3640 }, { "epoch": 0.68, "learning_rate": 4.295088761483733e-05, "loss": 4.8368, "step": 3650 }, { "epoch": 0.68, "learning_rate": 4.291635007252884e-05, "loss": 4.757, "step": 3660 }, { "epoch": 0.68, "learning_rate": 4.288181253022035e-05, "loss": 4.789, "step": 3670 }, { "epoch": 0.69, "learning_rate": 4.284727498791186e-05, "loss": 4.7926, "step": 3680 }, { "epoch": 0.69, "learning_rate": 4.281273744560337e-05, "loss": 4.7196, "step": 3690 }, { "epoch": 0.69, "learning_rate": 4.277819990329489e-05, "loss": 4.7615, "step": 3700 }, { "epoch": 0.69, "learning_rate": 4.274366236098639e-05, "loss": 4.8161, "step": 3710 }, { "epoch": 0.69, "learning_rate": 4.27091248186779e-05, "loss": 4.7221, "step": 3720 }, { "epoch": 0.7, "learning_rate": 4.267458727636941e-05, "loss": 4.7104, "step": 3730 }, { "epoch": 0.7, "learning_rate": 4.264004973406093e-05, "loss": 4.6939, "step": 3740 }, { "epoch": 0.7, "learning_rate": 4.260551219175244e-05, "loss": 4.7584, "step": 3750 }, { "epoch": 0.7, "learning_rate": 4.2570974649443944e-05, "loss": 4.7131, "step": 3760 }, { "epoch": 0.7, "learning_rate": 4.2536437107135454e-05, "loss": 4.7359, "step": 3770 }, { "epoch": 0.7, "learning_rate": 4.250189956482697e-05, "loss": 4.7093, "step": 3780 }, { "epoch": 0.71, "learning_rate": 4.246736202251848e-05, "loss": 4.6965, "step": 3790 }, { "epoch": 0.71, "learning_rate": 4.243282448020999e-05, "loss": 4.7546, "step": 3800 }, { "epoch": 0.71, "learning_rate": 4.23982869379015e-05, "loss": 4.6808, "step": 3810 }, { "epoch": 0.71, "learning_rate": 4.236374939559301e-05, "loss": 4.7021, "step": 3820 }, { "epoch": 0.71, "learning_rate": 4.232921185328452e-05, "loss": 4.6742, "step": 3830 }, { "epoch": 0.72, "learning_rate": 4.229467431097603e-05, "loss": 4.7074, "step": 3840 }, { "epoch": 0.72, "learning_rate": 4.226013676866754e-05, "loss": 4.719, "step": 3850 }, { "epoch": 0.72, "learning_rate": 4.2225599226359054e-05, "loss": 4.6518, "step": 3860 }, { "epoch": 0.72, "learning_rate": 4.2191061684050564e-05, "loss": 4.6734, "step": 3870 }, { "epoch": 0.72, "learning_rate": 4.2156524141742074e-05, "loss": 4.686, "step": 3880 }, { "epoch": 0.73, "learning_rate": 4.212198659943359e-05, "loss": 4.6587, "step": 3890 }, { "epoch": 0.73, "learning_rate": 4.2087449057125095e-05, "loss": 4.6642, "step": 3900 }, { "epoch": 0.73, "learning_rate": 4.2052911514816605e-05, "loss": 4.6555, "step": 3910 }, { "epoch": 0.73, "learning_rate": 4.2018373972508116e-05, "loss": 4.6315, "step": 3920 }, { "epoch": 0.73, "learning_rate": 4.198383643019963e-05, "loss": 4.6435, "step": 3930 }, { "epoch": 0.73, "learning_rate": 4.194929888789114e-05, "loss": 4.6456, "step": 3940 }, { "epoch": 0.74, "learning_rate": 4.1914761345582646e-05, "loss": 4.5384, "step": 3950 }, { "epoch": 0.74, "learning_rate": 4.188022380327416e-05, "loss": 4.6354, "step": 3960 }, { "epoch": 0.74, "learning_rate": 4.1845686260965674e-05, "loss": 4.5797, "step": 3970 }, { "epoch": 0.74, "learning_rate": 4.1811148718657184e-05, "loss": 4.6615, "step": 3980 }, { "epoch": 0.74, "learning_rate": 4.1776611176348694e-05, "loss": 4.6493, "step": 3990 }, { "epoch": 0.75, "learning_rate": 4.1742073634040205e-05, "loss": 4.5619, "step": 4000 }, { "epoch": 0.75, "learning_rate": 4.1707536091731715e-05, "loss": 4.5834, "step": 4010 }, { "epoch": 0.75, "learning_rate": 4.1672998549423225e-05, "loss": 4.6102, "step": 4020 }, { "epoch": 0.75, "learning_rate": 4.1638461007114736e-05, "loss": 4.6063, "step": 4030 }, { "epoch": 0.75, "learning_rate": 4.1603923464806246e-05, "loss": 4.5329, "step": 4040 }, { "epoch": 0.76, "learning_rate": 4.1569385922497756e-05, "loss": 4.6316, "step": 4050 }, { "epoch": 0.76, "learning_rate": 4.153484838018927e-05, "loss": 4.6018, "step": 4060 }, { "epoch": 0.76, "learning_rate": 4.150031083788078e-05, "loss": 4.5185, "step": 4070 }, { "epoch": 0.76, "learning_rate": 4.1465773295572294e-05, "loss": 4.572, "step": 4080 }, { "epoch": 0.76, "learning_rate": 4.14312357532638e-05, "loss": 4.5646, "step": 4090 }, { "epoch": 0.76, "learning_rate": 4.139669821095531e-05, "loss": 4.603, "step": 4100 }, { "epoch": 0.77, "learning_rate": 4.136216066864682e-05, "loss": 4.5372, "step": 4110 }, { "epoch": 0.77, "learning_rate": 4.1327623126338335e-05, "loss": 4.5963, "step": 4120 }, { "epoch": 0.77, "learning_rate": 4.1293085584029846e-05, "loss": 4.5808, "step": 4130 }, { "epoch": 0.77, "learning_rate": 4.125854804172135e-05, "loss": 4.497, "step": 4140 }, { "epoch": 0.77, "learning_rate": 4.122401049941286e-05, "loss": 4.5251, "step": 4150 }, { "epoch": 0.78, "learning_rate": 4.118947295710438e-05, "loss": 4.6056, "step": 4160 }, { "epoch": 0.78, "learning_rate": 4.115493541479589e-05, "loss": 4.5351, "step": 4170 }, { "epoch": 0.78, "learning_rate": 4.11203978724874e-05, "loss": 4.5328, "step": 4180 }, { "epoch": 0.78, "learning_rate": 4.108586033017891e-05, "loss": 4.5216, "step": 4190 }, { "epoch": 0.78, "learning_rate": 4.105132278787042e-05, "loss": 4.4807, "step": 4200 }, { "epoch": 0.79, "learning_rate": 4.101678524556193e-05, "loss": 4.4105, "step": 4210 }, { "epoch": 0.79, "learning_rate": 4.098224770325344e-05, "loss": 4.5167, "step": 4220 }, { "epoch": 0.79, "learning_rate": 4.094771016094495e-05, "loss": 4.5025, "step": 4230 }, { "epoch": 0.79, "learning_rate": 4.091317261863646e-05, "loss": 4.4726, "step": 4240 }, { "epoch": 0.79, "learning_rate": 4.087863507632797e-05, "loss": 4.5453, "step": 4250 }, { "epoch": 0.79, "learning_rate": 4.084409753401948e-05, "loss": 4.5499, "step": 4260 }, { "epoch": 0.8, "learning_rate": 4.0809559991711e-05, "loss": 4.477, "step": 4270 }, { "epoch": 0.8, "learning_rate": 4.07750224494025e-05, "loss": 4.4173, "step": 4280 }, { "epoch": 0.8, "learning_rate": 4.074048490709401e-05, "loss": 4.4168, "step": 4290 }, { "epoch": 0.8, "learning_rate": 4.070594736478552e-05, "loss": 4.4963, "step": 4300 }, { "epoch": 0.8, "learning_rate": 4.067140982247704e-05, "loss": 4.4329, "step": 4310 }, { "epoch": 0.81, "learning_rate": 4.063687228016855e-05, "loss": 4.4016, "step": 4320 }, { "epoch": 0.81, "learning_rate": 4.060233473786005e-05, "loss": 4.4456, "step": 4330 }, { "epoch": 0.81, "learning_rate": 4.056779719555156e-05, "loss": 4.5099, "step": 4340 }, { "epoch": 0.81, "learning_rate": 4.053325965324308e-05, "loss": 4.5314, "step": 4350 }, { "epoch": 0.81, "learning_rate": 4.049872211093459e-05, "loss": 4.3918, "step": 4360 }, { "epoch": 0.81, "learning_rate": 4.04641845686261e-05, "loss": 4.3786, "step": 4370 }, { "epoch": 0.82, "learning_rate": 4.042964702631761e-05, "loss": 4.3641, "step": 4380 }, { "epoch": 0.82, "learning_rate": 4.039510948400912e-05, "loss": 4.3287, "step": 4390 }, { "epoch": 0.82, "learning_rate": 4.036057194170063e-05, "loss": 4.3898, "step": 4400 }, { "epoch": 0.82, "learning_rate": 4.032603439939214e-05, "loss": 4.4319, "step": 4410 }, { "epoch": 0.82, "learning_rate": 4.029149685708365e-05, "loss": 4.4166, "step": 4420 }, { "epoch": 0.83, "learning_rate": 4.025695931477516e-05, "loss": 4.4221, "step": 4430 }, { "epoch": 0.83, "learning_rate": 4.022242177246667e-05, "loss": 4.4518, "step": 4440 }, { "epoch": 0.83, "learning_rate": 4.018788423015818e-05, "loss": 4.433, "step": 4450 }, { "epoch": 0.83, "learning_rate": 4.01533466878497e-05, "loss": 4.4263, "step": 4460 }, { "epoch": 0.83, "learning_rate": 4.01188091455412e-05, "loss": 4.44, "step": 4470 }, { "epoch": 0.84, "learning_rate": 4.0084271603232713e-05, "loss": 4.3684, "step": 4480 }, { "epoch": 0.84, "learning_rate": 4.0049734060924224e-05, "loss": 4.3712, "step": 4490 }, { "epoch": 0.84, "learning_rate": 4.001519651861574e-05, "loss": 4.3684, "step": 4500 }, { "epoch": 0.84, "learning_rate": 3.998065897630725e-05, "loss": 4.3534, "step": 4510 }, { "epoch": 0.84, "learning_rate": 3.9946121433998755e-05, "loss": 4.355, "step": 4520 }, { "epoch": 0.84, "learning_rate": 3.9911583891690265e-05, "loss": 4.3857, "step": 4530 }, { "epoch": 0.85, "learning_rate": 3.987704634938178e-05, "loss": 4.3869, "step": 4540 }, { "epoch": 0.85, "learning_rate": 3.984250880707329e-05, "loss": 4.3584, "step": 4550 }, { "epoch": 0.85, "learning_rate": 3.98079712647648e-05, "loss": 4.2255, "step": 4560 }, { "epoch": 0.85, "learning_rate": 3.977343372245631e-05, "loss": 4.3284, "step": 4570 }, { "epoch": 0.85, "learning_rate": 3.9738896180147823e-05, "loss": 4.3396, "step": 4580 }, { "epoch": 0.86, "learning_rate": 3.9704358637839334e-05, "loss": 4.3761, "step": 4590 }, { "epoch": 0.86, "learning_rate": 3.9669821095530844e-05, "loss": 4.3291, "step": 4600 }, { "epoch": 0.86, "learning_rate": 3.9635283553222354e-05, "loss": 4.3493, "step": 4610 }, { "epoch": 0.86, "learning_rate": 3.9600746010913865e-05, "loss": 4.3123, "step": 4620 }, { "epoch": 0.86, "learning_rate": 3.9566208468605375e-05, "loss": 4.3874, "step": 4630 }, { "epoch": 0.87, "learning_rate": 3.9531670926296885e-05, "loss": 4.3719, "step": 4640 }, { "epoch": 0.87, "learning_rate": 3.94971333839884e-05, "loss": 4.3051, "step": 4650 }, { "epoch": 0.87, "learning_rate": 3.9462595841679906e-05, "loss": 4.3216, "step": 4660 }, { "epoch": 0.87, "learning_rate": 3.9428058299371416e-05, "loss": 4.4278, "step": 4670 }, { "epoch": 0.87, "learning_rate": 3.9393520757062927e-05, "loss": 4.3334, "step": 4680 }, { "epoch": 0.87, "learning_rate": 3.9358983214754444e-05, "loss": 4.2799, "step": 4690 }, { "epoch": 0.88, "learning_rate": 3.9324445672445954e-05, "loss": 4.3025, "step": 4700 }, { "epoch": 0.88, "learning_rate": 3.928990813013746e-05, "loss": 4.2286, "step": 4710 }, { "epoch": 0.88, "learning_rate": 3.9255370587828975e-05, "loss": 4.286, "step": 4720 }, { "epoch": 0.88, "learning_rate": 3.9220833045520485e-05, "loss": 4.2102, "step": 4730 }, { "epoch": 0.88, "learning_rate": 3.9186295503211995e-05, "loss": 4.2735, "step": 4740 }, { "epoch": 0.89, "learning_rate": 3.91517579609035e-05, "loss": 4.3194, "step": 4750 }, { "epoch": 0.89, "learning_rate": 3.9117220418595016e-05, "loss": 4.2928, "step": 4760 }, { "epoch": 0.89, "learning_rate": 3.9082682876286526e-05, "loss": 4.348, "step": 4770 }, { "epoch": 0.89, "learning_rate": 3.9048145333978037e-05, "loss": 4.3026, "step": 4780 }, { "epoch": 0.89, "learning_rate": 3.901360779166955e-05, "loss": 4.2144, "step": 4790 }, { "epoch": 0.9, "learning_rate": 3.897907024936106e-05, "loss": 4.2993, "step": 4800 }, { "epoch": 0.9, "learning_rate": 3.894453270705257e-05, "loss": 4.2313, "step": 4810 }, { "epoch": 0.9, "learning_rate": 3.890999516474408e-05, "loss": 4.2205, "step": 4820 }, { "epoch": 0.9, "learning_rate": 3.887545762243559e-05, "loss": 4.2033, "step": 4830 }, { "epoch": 0.9, "learning_rate": 3.8840920080127105e-05, "loss": 4.2286, "step": 4840 }, { "epoch": 0.9, "learning_rate": 3.880638253781861e-05, "loss": 4.2882, "step": 4850 }, { "epoch": 0.91, "learning_rate": 3.877184499551012e-05, "loss": 4.2705, "step": 4860 }, { "epoch": 0.91, "learning_rate": 3.873730745320163e-05, "loss": 4.2006, "step": 4870 }, { "epoch": 0.91, "learning_rate": 3.8702769910893146e-05, "loss": 4.2344, "step": 4880 }, { "epoch": 0.91, "learning_rate": 3.866823236858466e-05, "loss": 4.2227, "step": 4890 }, { "epoch": 0.91, "learning_rate": 3.863369482627616e-05, "loss": 4.2481, "step": 4900 }, { "epoch": 0.92, "learning_rate": 3.859915728396768e-05, "loss": 4.2321, "step": 4910 }, { "epoch": 0.92, "learning_rate": 3.856461974165919e-05, "loss": 4.2588, "step": 4920 }, { "epoch": 0.92, "learning_rate": 3.85300821993507e-05, "loss": 4.1625, "step": 4930 }, { "epoch": 0.92, "learning_rate": 3.84955446570422e-05, "loss": 4.1685, "step": 4940 }, { "epoch": 0.92, "learning_rate": 3.846100711473372e-05, "loss": 4.2126, "step": 4950 }, { "epoch": 0.92, "learning_rate": 3.842646957242523e-05, "loss": 4.1852, "step": 4960 }, { "epoch": 0.93, "learning_rate": 3.839193203011674e-05, "loss": 4.0762, "step": 4970 }, { "epoch": 0.93, "learning_rate": 3.835739448780825e-05, "loss": 4.1486, "step": 4980 }, { "epoch": 0.93, "learning_rate": 3.832285694549976e-05, "loss": 4.2086, "step": 4990 }, { "epoch": 0.93, "learning_rate": 3.828831940319127e-05, "loss": 4.1741, "step": 5000 }, { "epoch": 0.93, "learning_rate": 3.825378186088278e-05, "loss": 4.1473, "step": 5010 }, { "epoch": 0.94, "learning_rate": 3.821924431857429e-05, "loss": 4.1852, "step": 5020 }, { "epoch": 0.94, "learning_rate": 3.818470677626581e-05, "loss": 4.126, "step": 5030 }, { "epoch": 0.94, "learning_rate": 3.815016923395731e-05, "loss": 4.2465, "step": 5040 }, { "epoch": 0.94, "learning_rate": 3.811563169164882e-05, "loss": 4.0662, "step": 5050 }, { "epoch": 0.94, "learning_rate": 3.808109414934033e-05, "loss": 4.1311, "step": 5060 }, { "epoch": 0.95, "learning_rate": 3.804655660703185e-05, "loss": 4.1458, "step": 5070 }, { "epoch": 0.95, "learning_rate": 3.801201906472336e-05, "loss": 4.1106, "step": 5080 }, { "epoch": 0.95, "learning_rate": 3.797748152241486e-05, "loss": 4.1293, "step": 5090 }, { "epoch": 0.95, "learning_rate": 3.794294398010638e-05, "loss": 4.0843, "step": 5100 }, { "epoch": 0.95, "learning_rate": 3.790840643779789e-05, "loss": 4.1247, "step": 5110 }, { "epoch": 0.95, "learning_rate": 3.78738688954894e-05, "loss": 4.1126, "step": 5120 }, { "epoch": 0.96, "learning_rate": 3.7839331353180904e-05, "loss": 4.0693, "step": 5130 }, { "epoch": 0.96, "learning_rate": 3.780479381087242e-05, "loss": 4.0749, "step": 5140 }, { "epoch": 0.96, "learning_rate": 3.777025626856393e-05, "loss": 4.1138, "step": 5150 }, { "epoch": 0.96, "learning_rate": 3.773571872625544e-05, "loss": 4.1342, "step": 5160 }, { "epoch": 0.96, "learning_rate": 3.770118118394695e-05, "loss": 4.2315, "step": 5170 }, { "epoch": 0.97, "learning_rate": 3.766664364163846e-05, "loss": 4.0744, "step": 5180 }, { "epoch": 0.97, "learning_rate": 3.763210609932997e-05, "loss": 4.1098, "step": 5190 }, { "epoch": 0.97, "learning_rate": 3.759756855702148e-05, "loss": 4.1641, "step": 5200 }, { "epoch": 0.97, "learning_rate": 3.7563031014712994e-05, "loss": 4.1207, "step": 5210 }, { "epoch": 0.97, "learning_rate": 3.752849347240451e-05, "loss": 4.0232, "step": 5220 }, { "epoch": 0.98, "learning_rate": 3.7493955930096014e-05, "loss": 4.1027, "step": 5230 }, { "epoch": 0.98, "learning_rate": 3.7459418387787525e-05, "loss": 4.1394, "step": 5240 }, { "epoch": 0.98, "learning_rate": 3.7424880845479035e-05, "loss": 4.0756, "step": 5250 }, { "epoch": 0.98, "learning_rate": 3.739034330317055e-05, "loss": 4.1204, "step": 5260 }, { "epoch": 0.98, "learning_rate": 3.735580576086206e-05, "loss": 4.1218, "step": 5270 }, { "epoch": 0.98, "learning_rate": 3.7321268218553566e-05, "loss": 4.0515, "step": 5280 }, { "epoch": 0.99, "learning_rate": 3.728673067624508e-05, "loss": 4.109, "step": 5290 }, { "epoch": 0.99, "learning_rate": 3.725219313393659e-05, "loss": 3.9909, "step": 5300 }, { "epoch": 0.99, "learning_rate": 3.7217655591628104e-05, "loss": 4.0671, "step": 5310 }, { "epoch": 0.99, "learning_rate": 3.718311804931961e-05, "loss": 4.1067, "step": 5320 }, { "epoch": 0.99, "learning_rate": 3.7148580507011124e-05, "loss": 4.0092, "step": 5330 }, { "epoch": 1.0, "learning_rate": 3.7114042964702634e-05, "loss": 4.0346, "step": 5340 }, { "epoch": 1.0, "learning_rate": 3.7079505422394145e-05, "loss": 4.0381, "step": 5350 }, { "epoch": 1.0, "learning_rate": 3.7044967880085655e-05, "loss": 4.0308, "step": 5360 }, { "epoch": 1.0, "eval_accuracy": 0.24909529553679133, "eval_loss": 3.6947672367095947, "eval_runtime": 8621.8692, "eval_samples_per_second": 8.846, "eval_steps_per_second": 0.277, "step": 5362 }, { "epoch": 1.0, "learning_rate": 3.7010430337777165e-05, "loss": 4.0648, "step": 5370 }, { "epoch": 1.0, "learning_rate": 3.6975892795468676e-05, "loss": 3.9987, "step": 5380 }, { "epoch": 1.01, "learning_rate": 3.6941355253160186e-05, "loss": 3.9417, "step": 5390 }, { "epoch": 1.01, "learning_rate": 3.6906817710851696e-05, "loss": 3.9734, "step": 5400 }, { "epoch": 1.01, "learning_rate": 3.6872280168543213e-05, "loss": 3.9119, "step": 5410 }, { "epoch": 1.01, "learning_rate": 3.683774262623472e-05, "loss": 3.9818, "step": 5420 }, { "epoch": 1.01, "learning_rate": 3.680320508392623e-05, "loss": 3.9722, "step": 5430 }, { "epoch": 1.01, "learning_rate": 3.676866754161774e-05, "loss": 3.9544, "step": 5440 }, { "epoch": 1.02, "learning_rate": 3.6734129999309255e-05, "loss": 3.9963, "step": 5450 }, { "epoch": 1.02, "learning_rate": 3.6699592457000765e-05, "loss": 3.9356, "step": 5460 }, { "epoch": 1.02, "learning_rate": 3.666505491469227e-05, "loss": 3.9639, "step": 5470 }, { "epoch": 1.02, "learning_rate": 3.6630517372383786e-05, "loss": 3.9783, "step": 5480 }, { "epoch": 1.02, "learning_rate": 3.6595979830075296e-05, "loss": 3.9439, "step": 5490 }, { "epoch": 1.03, "learning_rate": 3.6561442287766806e-05, "loss": 3.9195, "step": 5500 }, { "epoch": 1.03, "learning_rate": 3.652690474545831e-05, "loss": 3.8927, "step": 5510 }, { "epoch": 1.03, "learning_rate": 3.649236720314983e-05, "loss": 3.9244, "step": 5520 }, { "epoch": 1.03, "learning_rate": 3.645782966084134e-05, "loss": 3.9266, "step": 5530 }, { "epoch": 1.03, "learning_rate": 3.642329211853285e-05, "loss": 3.9866, "step": 5540 }, { "epoch": 1.03, "learning_rate": 3.638875457622436e-05, "loss": 3.8888, "step": 5550 }, { "epoch": 1.04, "learning_rate": 3.635421703391587e-05, "loss": 3.8811, "step": 5560 }, { "epoch": 1.04, "learning_rate": 3.631967949160738e-05, "loss": 3.959, "step": 5570 }, { "epoch": 1.04, "learning_rate": 3.628514194929889e-05, "loss": 4.0576, "step": 5580 }, { "epoch": 1.04, "learning_rate": 3.62506044069904e-05, "loss": 3.9046, "step": 5590 }, { "epoch": 1.04, "learning_rate": 3.6216066864681916e-05, "loss": 3.8684, "step": 5600 }, { "epoch": 1.05, "learning_rate": 3.618152932237342e-05, "loss": 3.9167, "step": 5610 }, { "epoch": 1.05, "learning_rate": 3.614699178006493e-05, "loss": 3.8899, "step": 5620 }, { "epoch": 1.05, "learning_rate": 3.611245423775644e-05, "loss": 3.9039, "step": 5630 }, { "epoch": 1.05, "learning_rate": 3.607791669544796e-05, "loss": 3.8306, "step": 5640 }, { "epoch": 1.05, "learning_rate": 3.604337915313946e-05, "loss": 3.8872, "step": 5650 }, { "epoch": 1.06, "learning_rate": 3.600884161083097e-05, "loss": 3.8607, "step": 5660 }, { "epoch": 1.06, "learning_rate": 3.597430406852249e-05, "loss": 3.9271, "step": 5670 }, { "epoch": 1.06, "learning_rate": 3.5939766526214e-05, "loss": 4.006, "step": 5680 }, { "epoch": 1.06, "learning_rate": 3.590522898390551e-05, "loss": 3.901, "step": 5690 }, { "epoch": 1.06, "learning_rate": 3.587069144159701e-05, "loss": 3.8321, "step": 5700 }, { "epoch": 1.06, "learning_rate": 3.583615389928853e-05, "loss": 3.8744, "step": 5710 }, { "epoch": 1.07, "learning_rate": 3.580161635698004e-05, "loss": 3.9017, "step": 5720 }, { "epoch": 1.07, "learning_rate": 3.576707881467155e-05, "loss": 3.8878, "step": 5730 }, { "epoch": 1.07, "learning_rate": 3.573254127236306e-05, "loss": 3.832, "step": 5740 }, { "epoch": 1.07, "learning_rate": 3.569800373005457e-05, "loss": 3.831, "step": 5750 }, { "epoch": 1.07, "learning_rate": 3.566346618774608e-05, "loss": 3.9113, "step": 5760 }, { "epoch": 1.08, "learning_rate": 3.562892864543759e-05, "loss": 3.796, "step": 5770 }, { "epoch": 1.08, "learning_rate": 3.55943911031291e-05, "loss": 3.8043, "step": 5780 }, { "epoch": 1.08, "learning_rate": 3.555985356082062e-05, "loss": 3.9136, "step": 5790 }, { "epoch": 1.08, "learning_rate": 3.552531601851212e-05, "loss": 3.7756, "step": 5800 }, { "epoch": 1.08, "learning_rate": 3.549077847620363e-05, "loss": 3.7959, "step": 5810 }, { "epoch": 1.09, "learning_rate": 3.545624093389514e-05, "loss": 3.7942, "step": 5820 }, { "epoch": 1.09, "learning_rate": 3.542170339158666e-05, "loss": 3.8605, "step": 5830 }, { "epoch": 1.09, "learning_rate": 3.5387165849278164e-05, "loss": 3.7954, "step": 5840 }, { "epoch": 1.09, "learning_rate": 3.5352628306969674e-05, "loss": 3.8062, "step": 5850 }, { "epoch": 1.09, "learning_rate": 3.531809076466119e-05, "loss": 3.8087, "step": 5860 }, { "epoch": 1.09, "learning_rate": 3.52835532223527e-05, "loss": 3.801, "step": 5870 }, { "epoch": 1.1, "learning_rate": 3.524901568004421e-05, "loss": 3.7685, "step": 5880 }, { "epoch": 1.1, "learning_rate": 3.5214478137735715e-05, "loss": 3.8729, "step": 5890 }, { "epoch": 1.1, "learning_rate": 3.517994059542723e-05, "loss": 3.8502, "step": 5900 }, { "epoch": 1.1, "learning_rate": 3.514540305311874e-05, "loss": 3.8123, "step": 5910 }, { "epoch": 1.1, "learning_rate": 3.511086551081025e-05, "loss": 3.7957, "step": 5920 }, { "epoch": 1.11, "learning_rate": 3.5076327968501763e-05, "loss": 3.7741, "step": 5930 }, { "epoch": 1.11, "learning_rate": 3.5041790426193274e-05, "loss": 3.7132, "step": 5940 }, { "epoch": 1.11, "learning_rate": 3.5007252883884784e-05, "loss": 3.7215, "step": 5950 }, { "epoch": 1.11, "learning_rate": 3.4972715341576294e-05, "loss": 3.773, "step": 5960 }, { "epoch": 1.11, "learning_rate": 3.4938177799267805e-05, "loss": 3.7669, "step": 5970 }, { "epoch": 1.12, "learning_rate": 3.490364025695932e-05, "loss": 3.7389, "step": 5980 }, { "epoch": 1.12, "learning_rate": 3.4869102714650825e-05, "loss": 3.748, "step": 5990 }, { "epoch": 1.12, "learning_rate": 3.4834565172342336e-05, "loss": 3.7029, "step": 6000 }, { "epoch": 1.12, "learning_rate": 3.4800027630033846e-05, "loss": 3.8851, "step": 6010 }, { "epoch": 1.12, "learning_rate": 3.476549008772536e-05, "loss": 3.8138, "step": 6020 }, { "epoch": 1.12, "learning_rate": 3.4730952545416867e-05, "loss": 3.7595, "step": 6030 }, { "epoch": 1.13, "learning_rate": 3.469641500310838e-05, "loss": 3.7483, "step": 6040 }, { "epoch": 1.13, "learning_rate": 3.4661877460799894e-05, "loss": 3.7428, "step": 6050 }, { "epoch": 1.13, "learning_rate": 3.4627339918491404e-05, "loss": 3.7077, "step": 6060 }, { "epoch": 1.13, "learning_rate": 3.4592802376182915e-05, "loss": 3.7716, "step": 6070 }, { "epoch": 1.13, "learning_rate": 3.455826483387442e-05, "loss": 3.7343, "step": 6080 }, { "epoch": 1.14, "learning_rate": 3.4523727291565935e-05, "loss": 3.7992, "step": 6090 }, { "epoch": 1.14, "learning_rate": 3.4489189749257446e-05, "loss": 3.6995, "step": 6100 }, { "epoch": 1.14, "learning_rate": 3.4454652206948956e-05, "loss": 3.7807, "step": 6110 }, { "epoch": 1.14, "learning_rate": 3.4420114664640466e-05, "loss": 3.7395, "step": 6120 }, { "epoch": 1.14, "learning_rate": 3.4385577122331976e-05, "loss": 3.8304, "step": 6130 }, { "epoch": 1.14, "learning_rate": 3.435103958002349e-05, "loss": 3.6993, "step": 6140 }, { "epoch": 1.15, "learning_rate": 3.4316502037715e-05, "loss": 3.6915, "step": 6150 }, { "epoch": 1.15, "learning_rate": 3.428196449540651e-05, "loss": 3.6993, "step": 6160 }, { "epoch": 1.15, "learning_rate": 3.4247426953098025e-05, "loss": 3.7521, "step": 6170 }, { "epoch": 1.15, "learning_rate": 3.421288941078953e-05, "loss": 3.6159, "step": 6180 }, { "epoch": 1.15, "learning_rate": 3.417835186848104e-05, "loss": 3.8004, "step": 6190 }, { "epoch": 1.16, "learning_rate": 3.414381432617255e-05, "loss": 3.7197, "step": 6200 }, { "epoch": 1.16, "learning_rate": 3.4109276783864066e-05, "loss": 3.6887, "step": 6210 }, { "epoch": 1.16, "learning_rate": 3.407473924155557e-05, "loss": 3.6608, "step": 6220 }, { "epoch": 1.16, "learning_rate": 3.404020169924708e-05, "loss": 3.6339, "step": 6230 }, { "epoch": 1.16, "learning_rate": 3.40056641569386e-05, "loss": 3.7329, "step": 6240 }, { "epoch": 1.17, "learning_rate": 3.397112661463011e-05, "loss": 3.7651, "step": 6250 }, { "epoch": 1.17, "learning_rate": 3.393658907232162e-05, "loss": 3.731, "step": 6260 }, { "epoch": 1.17, "learning_rate": 3.390205153001312e-05, "loss": 3.6192, "step": 6270 }, { "epoch": 1.17, "learning_rate": 3.386751398770464e-05, "loss": 3.6153, "step": 6280 }, { "epoch": 1.17, "learning_rate": 3.383297644539615e-05, "loss": 3.6365, "step": 6290 }, { "epoch": 1.17, "learning_rate": 3.379843890308766e-05, "loss": 3.6716, "step": 6300 }, { "epoch": 1.18, "learning_rate": 3.376390136077917e-05, "loss": 3.6605, "step": 6310 }, { "epoch": 1.18, "learning_rate": 3.372936381847068e-05, "loss": 3.7046, "step": 6320 }, { "epoch": 1.18, "learning_rate": 3.369482627616219e-05, "loss": 3.6256, "step": 6330 }, { "epoch": 1.18, "learning_rate": 3.36602887338537e-05, "loss": 3.6081, "step": 6340 }, { "epoch": 1.18, "learning_rate": 3.362575119154521e-05, "loss": 3.6484, "step": 6350 }, { "epoch": 1.19, "learning_rate": 3.359121364923673e-05, "loss": 3.6968, "step": 6360 }, { "epoch": 1.19, "learning_rate": 3.355667610692823e-05, "loss": 3.5797, "step": 6370 }, { "epoch": 1.19, "learning_rate": 3.352213856461974e-05, "loss": 3.6661, "step": 6380 }, { "epoch": 1.19, "learning_rate": 3.348760102231125e-05, "loss": 3.6663, "step": 6390 }, { "epoch": 1.19, "learning_rate": 3.345306348000277e-05, "loss": 3.5707, "step": 6400 }, { "epoch": 1.2, "learning_rate": 3.341852593769427e-05, "loss": 3.5989, "step": 6410 }, { "epoch": 1.2, "learning_rate": 3.338398839538578e-05, "loss": 3.6403, "step": 6420 }, { "epoch": 1.2, "learning_rate": 3.33494508530773e-05, "loss": 3.6176, "step": 6430 }, { "epoch": 1.2, "learning_rate": 3.331491331076881e-05, "loss": 3.5832, "step": 6440 }, { "epoch": 1.2, "learning_rate": 3.328037576846032e-05, "loss": 3.6051, "step": 6450 }, { "epoch": 1.2, "learning_rate": 3.3245838226151824e-05, "loss": 3.6246, "step": 6460 }, { "epoch": 1.21, "learning_rate": 3.321130068384334e-05, "loss": 3.6141, "step": 6470 }, { "epoch": 1.21, "learning_rate": 3.317676314153485e-05, "loss": 3.5999, "step": 6480 }, { "epoch": 1.21, "learning_rate": 3.314222559922636e-05, "loss": 3.6628, "step": 6490 }, { "epoch": 1.21, "learning_rate": 3.310768805691787e-05, "loss": 3.651, "step": 6500 }, { "epoch": 1.21, "learning_rate": 3.307315051460938e-05, "loss": 3.5738, "step": 6510 }, { "epoch": 1.22, "learning_rate": 3.303861297230089e-05, "loss": 3.5479, "step": 6520 }, { "epoch": 1.22, "learning_rate": 3.30040754299924e-05, "loss": 3.5372, "step": 6530 }, { "epoch": 1.22, "learning_rate": 3.296953788768391e-05, "loss": 3.6088, "step": 6540 }, { "epoch": 1.22, "learning_rate": 3.293500034537542e-05, "loss": 3.5082, "step": 6550 }, { "epoch": 1.22, "learning_rate": 3.2900462803066934e-05, "loss": 3.5443, "step": 6560 }, { "epoch": 1.23, "learning_rate": 3.2865925260758444e-05, "loss": 3.4684, "step": 6570 }, { "epoch": 1.23, "learning_rate": 3.2831387718449954e-05, "loss": 3.5836, "step": 6580 }, { "epoch": 1.23, "learning_rate": 3.279685017614147e-05, "loss": 3.5457, "step": 6590 }, { "epoch": 1.23, "learning_rate": 3.2762312633832975e-05, "loss": 3.5694, "step": 6600 }, { "epoch": 1.23, "learning_rate": 3.2727775091524485e-05, "loss": 3.4931, "step": 6610 }, { "epoch": 1.23, "learning_rate": 3.2693237549216e-05, "loss": 3.6173, "step": 6620 }, { "epoch": 1.24, "learning_rate": 3.265870000690751e-05, "loss": 3.5387, "step": 6630 }, { "epoch": 1.24, "learning_rate": 3.262416246459902e-05, "loss": 3.552, "step": 6640 }, { "epoch": 1.24, "learning_rate": 3.2589624922290526e-05, "loss": 3.5856, "step": 6650 }, { "epoch": 1.24, "learning_rate": 3.2555087379982044e-05, "loss": 3.5191, "step": 6660 }, { "epoch": 1.24, "learning_rate": 3.2520549837673554e-05, "loss": 3.4632, "step": 6670 }, { "epoch": 1.25, "learning_rate": 3.2486012295365064e-05, "loss": 3.557, "step": 6680 }, { "epoch": 1.25, "learning_rate": 3.2451474753056574e-05, "loss": 3.4683, "step": 6690 }, { "epoch": 1.25, "learning_rate": 3.2416937210748085e-05, "loss": 3.6178, "step": 6700 }, { "epoch": 1.25, "learning_rate": 3.2382399668439595e-05, "loss": 3.509, "step": 6710 }, { "epoch": 1.25, "learning_rate": 3.2347862126131105e-05, "loss": 3.5511, "step": 6720 }, { "epoch": 1.25, "learning_rate": 3.2313324583822616e-05, "loss": 3.5375, "step": 6730 }, { "epoch": 1.26, "learning_rate": 3.2278787041514126e-05, "loss": 3.5648, "step": 6740 }, { "epoch": 1.26, "learning_rate": 3.2244249499205636e-05, "loss": 3.4806, "step": 6750 }, { "epoch": 1.26, "learning_rate": 3.220971195689715e-05, "loss": 3.5598, "step": 6760 }, { "epoch": 1.26, "learning_rate": 3.217517441458866e-05, "loss": 3.5497, "step": 6770 }, { "epoch": 1.26, "learning_rate": 3.2140636872280174e-05, "loss": 3.5869, "step": 6780 }, { "epoch": 1.27, "learning_rate": 3.210609932997168e-05, "loss": 3.46, "step": 6790 }, { "epoch": 1.27, "learning_rate": 3.207156178766319e-05, "loss": 3.4238, "step": 6800 }, { "epoch": 1.27, "learning_rate": 3.2037024245354705e-05, "loss": 3.5371, "step": 6810 }, { "epoch": 1.27, "learning_rate": 3.2002486703046215e-05, "loss": 3.5355, "step": 6820 }, { "epoch": 1.27, "learning_rate": 3.1967949160737726e-05, "loss": 3.481, "step": 6830 }, { "epoch": 1.28, "learning_rate": 3.193341161842923e-05, "loss": 3.3692, "step": 6840 }, { "epoch": 1.28, "learning_rate": 3.1898874076120746e-05, "loss": 3.4681, "step": 6850 }, { "epoch": 1.28, "learning_rate": 3.1864336533812257e-05, "loss": 3.4593, "step": 6860 }, { "epoch": 1.28, "learning_rate": 3.182979899150377e-05, "loss": 3.4291, "step": 6870 }, { "epoch": 1.28, "learning_rate": 3.179526144919528e-05, "loss": 3.452, "step": 6880 }, { "epoch": 1.28, "learning_rate": 3.176072390688679e-05, "loss": 3.4585, "step": 6890 }, { "epoch": 1.29, "learning_rate": 3.17261863645783e-05, "loss": 3.4393, "step": 6900 }, { "epoch": 1.29, "learning_rate": 3.169164882226981e-05, "loss": 3.472, "step": 6910 }, { "epoch": 1.29, "learning_rate": 3.165711127996132e-05, "loss": 3.4524, "step": 6920 }, { "epoch": 1.29, "learning_rate": 3.162257373765283e-05, "loss": 3.4865, "step": 6930 }, { "epoch": 1.29, "learning_rate": 3.158803619534434e-05, "loss": 3.442, "step": 6940 }, { "epoch": 1.3, "learning_rate": 3.155349865303585e-05, "loss": 3.4376, "step": 6950 }, { "epoch": 1.3, "learning_rate": 3.151896111072736e-05, "loss": 3.4278, "step": 6960 }, { "epoch": 1.3, "learning_rate": 3.148442356841888e-05, "loss": 3.342, "step": 6970 }, { "epoch": 1.3, "learning_rate": 3.144988602611038e-05, "loss": 3.4077, "step": 6980 }, { "epoch": 1.3, "learning_rate": 3.141534848380189e-05, "loss": 3.3748, "step": 6990 }, { "epoch": 1.31, "learning_rate": 3.138081094149341e-05, "loss": 3.3983, "step": 7000 }, { "epoch": 1.31, "learning_rate": 3.134627339918492e-05, "loss": 3.4114, "step": 7010 }, { "epoch": 1.31, "learning_rate": 3.131173585687643e-05, "loss": 3.5379, "step": 7020 }, { "epoch": 1.31, "learning_rate": 3.127719831456793e-05, "loss": 3.446, "step": 7030 }, { "epoch": 1.31, "learning_rate": 3.124266077225945e-05, "loss": 3.3867, "step": 7040 }, { "epoch": 1.31, "learning_rate": 3.120812322995096e-05, "loss": 3.434, "step": 7050 }, { "epoch": 1.32, "learning_rate": 3.117358568764247e-05, "loss": 3.425, "step": 7060 }, { "epoch": 1.32, "learning_rate": 3.113904814533398e-05, "loss": 3.4585, "step": 7070 }, { "epoch": 1.32, "learning_rate": 3.110451060302549e-05, "loss": 3.4087, "step": 7080 }, { "epoch": 1.32, "learning_rate": 3.1069973060717e-05, "loss": 3.4151, "step": 7090 }, { "epoch": 1.32, "learning_rate": 3.103543551840851e-05, "loss": 3.3507, "step": 7100 }, { "epoch": 1.33, "learning_rate": 3.100089797610002e-05, "loss": 3.5211, "step": 7110 }, { "epoch": 1.33, "learning_rate": 3.096636043379153e-05, "loss": 3.3704, "step": 7120 }, { "epoch": 1.33, "learning_rate": 3.093182289148304e-05, "loss": 3.4302, "step": 7130 }, { "epoch": 1.33, "learning_rate": 3.089728534917455e-05, "loss": 3.4675, "step": 7140 }, { "epoch": 1.33, "learning_rate": 3.086274780686606e-05, "loss": 3.4799, "step": 7150 }, { "epoch": 1.34, "learning_rate": 3.082821026455758e-05, "loss": 3.4777, "step": 7160 }, { "epoch": 1.34, "learning_rate": 3.079367272224908e-05, "loss": 3.4224, "step": 7170 }, { "epoch": 1.34, "learning_rate": 3.0759135179940593e-05, "loss": 3.3939, "step": 7180 }, { "epoch": 1.34, "learning_rate": 3.072459763763211e-05, "loss": 3.4045, "step": 7190 }, { "epoch": 1.34, "learning_rate": 3.069006009532362e-05, "loss": 3.3775, "step": 7200 }, { "epoch": 1.34, "learning_rate": 3.065552255301513e-05, "loss": 3.3832, "step": 7210 }, { "epoch": 1.35, "learning_rate": 3.0620985010706635e-05, "loss": 3.3987, "step": 7220 }, { "epoch": 1.35, "learning_rate": 3.058644746839815e-05, "loss": 3.3736, "step": 7230 }, { "epoch": 1.35, "learning_rate": 3.055190992608966e-05, "loss": 3.4009, "step": 7240 }, { "epoch": 1.35, "learning_rate": 3.0517372383781172e-05, "loss": 3.4189, "step": 7250 }, { "epoch": 1.35, "learning_rate": 3.0482834841472686e-05, "loss": 3.3918, "step": 7260 }, { "epoch": 1.36, "learning_rate": 3.044829729916419e-05, "loss": 3.38, "step": 7270 }, { "epoch": 1.36, "learning_rate": 3.0413759756855703e-05, "loss": 3.3672, "step": 7280 }, { "epoch": 1.36, "learning_rate": 3.0379222214547214e-05, "loss": 3.3829, "step": 7290 }, { "epoch": 1.36, "learning_rate": 3.0344684672238727e-05, "loss": 3.3583, "step": 7300 }, { "epoch": 1.36, "learning_rate": 3.0310147129930234e-05, "loss": 3.2762, "step": 7310 }, { "epoch": 1.36, "learning_rate": 3.0275609587621745e-05, "loss": 3.2542, "step": 7320 }, { "epoch": 1.37, "learning_rate": 3.024107204531326e-05, "loss": 3.2616, "step": 7330 }, { "epoch": 1.37, "learning_rate": 3.020653450300477e-05, "loss": 3.4247, "step": 7340 }, { "epoch": 1.37, "learning_rate": 3.017199696069628e-05, "loss": 3.2569, "step": 7350 }, { "epoch": 1.37, "learning_rate": 3.0137459418387786e-05, "loss": 3.2765, "step": 7360 }, { "epoch": 1.37, "learning_rate": 3.01029218760793e-05, "loss": 3.4377, "step": 7370 }, { "epoch": 1.38, "learning_rate": 3.006838433377081e-05, "loss": 3.289, "step": 7380 }, { "epoch": 1.38, "learning_rate": 3.0033846791462324e-05, "loss": 3.4032, "step": 7390 }, { "epoch": 1.38, "learning_rate": 2.9999309249153834e-05, "loss": 3.3145, "step": 7400 }, { "epoch": 1.38, "learning_rate": 2.996477170684534e-05, "loss": 3.3723, "step": 7410 }, { "epoch": 1.38, "learning_rate": 2.993023416453685e-05, "loss": 3.3719, "step": 7420 }, { "epoch": 1.39, "learning_rate": 2.9895696622228365e-05, "loss": 3.3268, "step": 7430 }, { "epoch": 1.39, "learning_rate": 2.9861159079919875e-05, "loss": 3.4391, "step": 7440 }, { "epoch": 1.39, "learning_rate": 2.982662153761139e-05, "loss": 3.2366, "step": 7450 }, { "epoch": 1.39, "learning_rate": 2.9792083995302892e-05, "loss": 3.3724, "step": 7460 }, { "epoch": 1.39, "learning_rate": 2.9757546452994406e-05, "loss": 3.3473, "step": 7470 }, { "epoch": 1.39, "learning_rate": 2.9723008910685916e-05, "loss": 3.2653, "step": 7480 }, { "epoch": 1.4, "learning_rate": 2.968847136837743e-05, "loss": 3.2872, "step": 7490 }, { "epoch": 1.4, "learning_rate": 2.9653933826068937e-05, "loss": 3.2932, "step": 7500 }, { "epoch": 1.4, "learning_rate": 2.9619396283760447e-05, "loss": 3.2541, "step": 7510 }, { "epoch": 1.4, "learning_rate": 2.958485874145196e-05, "loss": 3.3233, "step": 7520 }, { "epoch": 1.4, "learning_rate": 2.955032119914347e-05, "loss": 3.3025, "step": 7530 }, { "epoch": 1.41, "learning_rate": 2.9515783656834982e-05, "loss": 3.2965, "step": 7540 }, { "epoch": 1.41, "learning_rate": 2.948124611452649e-05, "loss": 3.2564, "step": 7550 }, { "epoch": 1.41, "learning_rate": 2.9446708572218002e-05, "loss": 3.3116, "step": 7560 }, { "epoch": 1.41, "learning_rate": 2.9412171029909513e-05, "loss": 3.2414, "step": 7570 }, { "epoch": 1.41, "learning_rate": 2.9377633487601026e-05, "loss": 3.3608, "step": 7580 }, { "epoch": 1.42, "learning_rate": 2.9343095945292537e-05, "loss": 3.3019, "step": 7590 }, { "epoch": 1.42, "learning_rate": 2.9308558402984044e-05, "loss": 3.3488, "step": 7600 }, { "epoch": 1.42, "learning_rate": 2.9274020860675554e-05, "loss": 3.3367, "step": 7610 }, { "epoch": 1.42, "learning_rate": 2.9239483318367068e-05, "loss": 3.2071, "step": 7620 }, { "epoch": 1.42, "learning_rate": 2.9204945776058578e-05, "loss": 3.2963, "step": 7630 }, { "epoch": 1.42, "learning_rate": 2.9170408233750085e-05, "loss": 3.2445, "step": 7640 }, { "epoch": 1.43, "learning_rate": 2.9135870691441595e-05, "loss": 3.3087, "step": 7650 }, { "epoch": 1.43, "learning_rate": 2.910133314913311e-05, "loss": 3.3239, "step": 7660 }, { "epoch": 1.43, "learning_rate": 2.906679560682462e-05, "loss": 3.2231, "step": 7670 }, { "epoch": 1.43, "learning_rate": 2.9032258064516133e-05, "loss": 3.3682, "step": 7680 }, { "epoch": 1.43, "learning_rate": 2.899772052220764e-05, "loss": 3.1886, "step": 7690 }, { "epoch": 1.44, "learning_rate": 2.896318297989915e-05, "loss": 3.285, "step": 7700 }, { "epoch": 1.44, "learning_rate": 2.8928645437590664e-05, "loss": 3.1984, "step": 7710 }, { "epoch": 1.44, "learning_rate": 2.8894107895282174e-05, "loss": 3.2885, "step": 7720 }, { "epoch": 1.44, "learning_rate": 2.8859570352973685e-05, "loss": 3.2339, "step": 7730 }, { "epoch": 1.44, "learning_rate": 2.882503281066519e-05, "loss": 3.3014, "step": 7740 }, { "epoch": 1.45, "learning_rate": 2.8790495268356705e-05, "loss": 3.3142, "step": 7750 }, { "epoch": 1.45, "learning_rate": 2.8755957726048215e-05, "loss": 3.2698, "step": 7760 }, { "epoch": 1.45, "learning_rate": 2.872142018373973e-05, "loss": 3.2825, "step": 7770 }, { "epoch": 1.45, "learning_rate": 2.868688264143124e-05, "loss": 3.2214, "step": 7780 }, { "epoch": 1.45, "learning_rate": 2.8652345099122746e-05, "loss": 3.1402, "step": 7790 }, { "epoch": 1.45, "learning_rate": 2.8617807556814257e-05, "loss": 3.2267, "step": 7800 }, { "epoch": 1.46, "learning_rate": 2.858327001450577e-05, "loss": 3.185, "step": 7810 }, { "epoch": 1.46, "learning_rate": 2.854873247219728e-05, "loss": 3.1691, "step": 7820 }, { "epoch": 1.46, "learning_rate": 2.8514194929888788e-05, "loss": 3.1357, "step": 7830 }, { "epoch": 1.46, "learning_rate": 2.8479657387580298e-05, "loss": 3.2274, "step": 7840 }, { "epoch": 1.46, "learning_rate": 2.844511984527181e-05, "loss": 3.2324, "step": 7850 }, { "epoch": 1.47, "learning_rate": 2.8410582302963322e-05, "loss": 3.2761, "step": 7860 }, { "epoch": 1.47, "learning_rate": 2.8376044760654836e-05, "loss": 3.2883, "step": 7870 }, { "epoch": 1.47, "learning_rate": 2.8341507218346343e-05, "loss": 3.2104, "step": 7880 }, { "epoch": 1.47, "learning_rate": 2.8306969676037853e-05, "loss": 3.2581, "step": 7890 }, { "epoch": 1.47, "learning_rate": 2.8272432133729367e-05, "loss": 3.265, "step": 7900 }, { "epoch": 1.47, "learning_rate": 2.8237894591420877e-05, "loss": 3.2577, "step": 7910 }, { "epoch": 1.48, "learning_rate": 2.8203357049112387e-05, "loss": 3.2683, "step": 7920 }, { "epoch": 1.48, "learning_rate": 2.8168819506803894e-05, "loss": 3.1734, "step": 7930 }, { "epoch": 1.48, "learning_rate": 2.8134281964495408e-05, "loss": 3.2041, "step": 7940 }, { "epoch": 1.48, "learning_rate": 2.8099744422186918e-05, "loss": 3.1578, "step": 7950 }, { "epoch": 1.48, "learning_rate": 2.8065206879878432e-05, "loss": 3.2432, "step": 7960 }, { "epoch": 1.49, "learning_rate": 2.8030669337569942e-05, "loss": 3.2978, "step": 7970 }, { "epoch": 1.49, "learning_rate": 2.799613179526145e-05, "loss": 3.201, "step": 7980 }, { "epoch": 1.49, "learning_rate": 2.796159425295296e-05, "loss": 3.2955, "step": 7990 }, { "epoch": 1.49, "learning_rate": 2.7927056710644473e-05, "loss": 3.206, "step": 8000 }, { "epoch": 1.49, "learning_rate": 2.7892519168335984e-05, "loss": 3.1621, "step": 8010 }, { "epoch": 1.5, "learning_rate": 2.785798162602749e-05, "loss": 3.2604, "step": 8020 }, { "epoch": 1.5, "learning_rate": 2.7823444083719004e-05, "loss": 3.2261, "step": 8030 }, { "epoch": 1.5, "learning_rate": 2.7788906541410514e-05, "loss": 3.1247, "step": 8040 }, { "epoch": 1.5, "learning_rate": 2.7754368999102025e-05, "loss": 3.1877, "step": 8050 }, { "epoch": 1.5, "learning_rate": 2.771983145679354e-05, "loss": 3.1831, "step": 8060 }, { "epoch": 1.5, "learning_rate": 2.7685293914485045e-05, "loss": 3.177, "step": 8070 }, { "epoch": 1.51, "learning_rate": 2.7650756372176556e-05, "loss": 3.1021, "step": 8080 }, { "epoch": 1.51, "learning_rate": 2.761621882986807e-05, "loss": 3.1824, "step": 8090 }, { "epoch": 1.51, "learning_rate": 2.758168128755958e-05, "loss": 3.1609, "step": 8100 }, { "epoch": 1.51, "learning_rate": 2.754714374525109e-05, "loss": 3.2185, "step": 8110 }, { "epoch": 1.51, "learning_rate": 2.7512606202942597e-05, "loss": 3.1663, "step": 8120 }, { "epoch": 1.52, "learning_rate": 2.747806866063411e-05, "loss": 3.1356, "step": 8130 }, { "epoch": 1.52, "learning_rate": 2.744353111832562e-05, "loss": 3.1245, "step": 8140 }, { "epoch": 1.52, "learning_rate": 2.7408993576017135e-05, "loss": 3.1758, "step": 8150 }, { "epoch": 1.52, "learning_rate": 2.7374456033708645e-05, "loss": 3.0987, "step": 8160 }, { "epoch": 1.52, "learning_rate": 2.7339918491400152e-05, "loss": 3.0779, "step": 8170 }, { "epoch": 1.53, "learning_rate": 2.7305380949091662e-05, "loss": 3.1655, "step": 8180 }, { "epoch": 1.53, "learning_rate": 2.7270843406783176e-05, "loss": 3.1217, "step": 8190 }, { "epoch": 1.53, "learning_rate": 2.7236305864474686e-05, "loss": 3.1073, "step": 8200 }, { "epoch": 1.53, "learning_rate": 2.7201768322166193e-05, "loss": 3.1061, "step": 8210 }, { "epoch": 1.53, "learning_rate": 2.7167230779857707e-05, "loss": 3.1527, "step": 8220 }, { "epoch": 1.53, "learning_rate": 2.7132693237549217e-05, "loss": 3.1558, "step": 8230 }, { "epoch": 1.54, "learning_rate": 2.7098155695240728e-05, "loss": 3.1518, "step": 8240 }, { "epoch": 1.54, "learning_rate": 2.706361815293224e-05, "loss": 3.1196, "step": 8250 }, { "epoch": 1.54, "learning_rate": 2.7029080610623748e-05, "loss": 3.1993, "step": 8260 }, { "epoch": 1.54, "learning_rate": 2.699454306831526e-05, "loss": 3.1277, "step": 8270 }, { "epoch": 1.54, "learning_rate": 2.6960005526006772e-05, "loss": 3.1874, "step": 8280 }, { "epoch": 1.55, "learning_rate": 2.6925467983698282e-05, "loss": 3.1914, "step": 8290 }, { "epoch": 1.55, "learning_rate": 2.6890930441389793e-05, "loss": 3.0947, "step": 8300 }, { "epoch": 1.55, "learning_rate": 2.68563928990813e-05, "loss": 3.0878, "step": 8310 }, { "epoch": 1.55, "learning_rate": 2.6821855356772813e-05, "loss": 3.1066, "step": 8320 }, { "epoch": 1.55, "learning_rate": 2.6787317814464324e-05, "loss": 3.1181, "step": 8330 }, { "epoch": 1.56, "learning_rate": 2.6752780272155837e-05, "loss": 3.1704, "step": 8340 }, { "epoch": 1.56, "learning_rate": 2.6718242729847348e-05, "loss": 3.1091, "step": 8350 }, { "epoch": 1.56, "learning_rate": 2.6683705187538855e-05, "loss": 3.0995, "step": 8360 }, { "epoch": 1.56, "learning_rate": 2.6649167645230365e-05, "loss": 3.1144, "step": 8370 }, { "epoch": 1.56, "learning_rate": 2.661463010292188e-05, "loss": 3.0199, "step": 8380 }, { "epoch": 1.56, "learning_rate": 2.658009256061339e-05, "loss": 3.118, "step": 8390 }, { "epoch": 1.57, "learning_rate": 2.6545555018304896e-05, "loss": 3.1443, "step": 8400 }, { "epoch": 1.57, "learning_rate": 2.651101747599641e-05, "loss": 3.1003, "step": 8410 }, { "epoch": 1.57, "learning_rate": 2.647647993368792e-05, "loss": 3.1032, "step": 8420 }, { "epoch": 1.57, "learning_rate": 2.644194239137943e-05, "loss": 3.0726, "step": 8430 }, { "epoch": 1.57, "learning_rate": 2.6407404849070944e-05, "loss": 3.1226, "step": 8440 }, { "epoch": 1.58, "learning_rate": 2.637286730676245e-05, "loss": 3.1293, "step": 8450 }, { "epoch": 1.58, "learning_rate": 2.633832976445396e-05, "loss": 2.9997, "step": 8460 }, { "epoch": 1.58, "learning_rate": 2.6303792222145475e-05, "loss": 3.0414, "step": 8470 }, { "epoch": 1.58, "learning_rate": 2.6269254679836985e-05, "loss": 3.11, "step": 8480 }, { "epoch": 1.58, "learning_rate": 2.6234717137528496e-05, "loss": 3.1564, "step": 8490 }, { "epoch": 1.59, "learning_rate": 2.6200179595220002e-05, "loss": 3.0275, "step": 8500 }, { "epoch": 1.59, "learning_rate": 2.6165642052911516e-05, "loss": 3.1224, "step": 8510 }, { "epoch": 1.59, "learning_rate": 2.6131104510603027e-05, "loss": 3.089, "step": 8520 }, { "epoch": 1.59, "learning_rate": 2.609656696829454e-05, "loss": 2.9951, "step": 8530 }, { "epoch": 1.59, "learning_rate": 2.6062029425986044e-05, "loss": 3.0263, "step": 8540 }, { "epoch": 1.59, "learning_rate": 2.6027491883677557e-05, "loss": 3.0849, "step": 8550 }, { "epoch": 1.6, "learning_rate": 2.5992954341369068e-05, "loss": 3.0331, "step": 8560 }, { "epoch": 1.6, "learning_rate": 2.595841679906058e-05, "loss": 3.0678, "step": 8570 }, { "epoch": 1.6, "learning_rate": 2.5923879256752092e-05, "loss": 3.0979, "step": 8580 }, { "epoch": 1.6, "learning_rate": 2.58893417144436e-05, "loss": 3.1191, "step": 8590 }, { "epoch": 1.6, "learning_rate": 2.5854804172135112e-05, "loss": 3.0135, "step": 8600 }, { "epoch": 1.61, "learning_rate": 2.5820266629826623e-05, "loss": 3.0578, "step": 8610 }, { "epoch": 1.61, "learning_rate": 2.5785729087518133e-05, "loss": 3.1627, "step": 8620 }, { "epoch": 1.61, "learning_rate": 2.5751191545209647e-05, "loss": 3.0275, "step": 8630 }, { "epoch": 1.61, "learning_rate": 2.5716654002901154e-05, "loss": 2.9919, "step": 8640 }, { "epoch": 1.61, "learning_rate": 2.5682116460592664e-05, "loss": 3.0629, "step": 8650 }, { "epoch": 1.61, "learning_rate": 2.5647578918284178e-05, "loss": 3.0972, "step": 8660 }, { "epoch": 1.62, "learning_rate": 2.5613041375975688e-05, "loss": 3.1405, "step": 8670 }, { "epoch": 1.62, "learning_rate": 2.5578503833667202e-05, "loss": 3.0783, "step": 8680 }, { "epoch": 1.62, "learning_rate": 2.5543966291358705e-05, "loss": 3.0817, "step": 8690 }, { "epoch": 1.62, "learning_rate": 2.550942874905022e-05, "loss": 3.0429, "step": 8700 }, { "epoch": 1.62, "learning_rate": 2.547489120674173e-05, "loss": 3.0268, "step": 8710 }, { "epoch": 1.63, "learning_rate": 2.5440353664433243e-05, "loss": 3.0695, "step": 8720 }, { "epoch": 1.63, "learning_rate": 2.5405816122124747e-05, "loss": 3.0673, "step": 8730 }, { "epoch": 1.63, "learning_rate": 2.537127857981626e-05, "loss": 2.9537, "step": 8740 }, { "epoch": 1.63, "learning_rate": 2.533674103750777e-05, "loss": 2.982, "step": 8750 }, { "epoch": 1.63, "learning_rate": 2.5302203495199284e-05, "loss": 3.0885, "step": 8760 }, { "epoch": 1.64, "learning_rate": 2.5267665952890795e-05, "loss": 3.0187, "step": 8770 }, { "epoch": 1.64, "learning_rate": 2.52331284105823e-05, "loss": 2.9979, "step": 8780 }, { "epoch": 1.64, "learning_rate": 2.5198590868273815e-05, "loss": 2.998, "step": 8790 }, { "epoch": 1.64, "learning_rate": 2.5164053325965326e-05, "loss": 2.9708, "step": 8800 }, { "epoch": 1.64, "learning_rate": 2.5129515783656836e-05, "loss": 2.9856, "step": 8810 }, { "epoch": 1.64, "learning_rate": 2.509497824134835e-05, "loss": 3.0117, "step": 8820 }, { "epoch": 1.65, "learning_rate": 2.5060440699039856e-05, "loss": 3.0323, "step": 8830 }, { "epoch": 1.65, "learning_rate": 2.5025903156731367e-05, "loss": 3.0227, "step": 8840 }, { "epoch": 1.65, "learning_rate": 2.499136561442288e-05, "loss": 3.0074, "step": 8850 }, { "epoch": 1.65, "learning_rate": 2.4956828072114387e-05, "loss": 3.0091, "step": 8860 }, { "epoch": 1.65, "learning_rate": 2.49222905298059e-05, "loss": 3.1571, "step": 8870 }, { "epoch": 1.66, "learning_rate": 2.488775298749741e-05, "loss": 3.0275, "step": 8880 }, { "epoch": 1.66, "learning_rate": 2.4853215445188922e-05, "loss": 3.0307, "step": 8890 }, { "epoch": 1.66, "learning_rate": 2.4818677902880432e-05, "loss": 2.9366, "step": 8900 }, { "epoch": 1.66, "learning_rate": 2.4784140360571942e-05, "loss": 2.9924, "step": 8910 }, { "epoch": 1.66, "learning_rate": 2.4749602818263453e-05, "loss": 3.0313, "step": 8920 }, { "epoch": 1.67, "learning_rate": 2.4715065275954966e-05, "loss": 2.9134, "step": 8930 }, { "epoch": 1.67, "learning_rate": 2.4680527733646473e-05, "loss": 2.9929, "step": 8940 }, { "epoch": 1.67, "learning_rate": 2.4645990191337987e-05, "loss": 2.9983, "step": 8950 }, { "epoch": 1.67, "learning_rate": 2.4611452649029494e-05, "loss": 2.9756, "step": 8960 }, { "epoch": 1.67, "learning_rate": 2.4576915106721008e-05, "loss": 2.9688, "step": 8970 }, { "epoch": 1.67, "learning_rate": 2.4542377564412518e-05, "loss": 2.9359, "step": 8980 }, { "epoch": 1.68, "learning_rate": 2.4507840022104028e-05, "loss": 3.0948, "step": 8990 }, { "epoch": 1.68, "learning_rate": 2.447330247979554e-05, "loss": 2.9085, "step": 9000 }, { "epoch": 1.68, "learning_rate": 2.443876493748705e-05, "loss": 3.053, "step": 9010 }, { "epoch": 1.68, "learning_rate": 2.4404227395178563e-05, "loss": 3.0007, "step": 9020 }, { "epoch": 1.68, "learning_rate": 2.436968985287007e-05, "loss": 2.9837, "step": 9030 }, { "epoch": 1.69, "learning_rate": 2.4335152310561583e-05, "loss": 2.9764, "step": 9040 }, { "epoch": 1.69, "learning_rate": 2.430061476825309e-05, "loss": 2.955, "step": 9050 }, { "epoch": 1.69, "learning_rate": 2.4266077225944604e-05, "loss": 2.9645, "step": 9060 }, { "epoch": 1.69, "learning_rate": 2.4231539683636114e-05, "loss": 3.0081, "step": 9070 }, { "epoch": 1.69, "learning_rate": 2.4197002141327624e-05, "loss": 3.0566, "step": 9080 }, { "epoch": 1.7, "learning_rate": 2.4162464599019135e-05, "loss": 2.8261, "step": 9090 }, { "epoch": 1.7, "learning_rate": 2.4127927056710645e-05, "loss": 2.9972, "step": 9100 }, { "epoch": 1.7, "learning_rate": 2.4093389514402155e-05, "loss": 2.9635, "step": 9110 }, { "epoch": 1.7, "learning_rate": 2.4058851972093666e-05, "loss": 2.9686, "step": 9120 }, { "epoch": 1.7, "learning_rate": 2.4024314429785176e-05, "loss": 2.952, "step": 9130 }, { "epoch": 1.7, "learning_rate": 2.398977688747669e-05, "loss": 2.9526, "step": 9140 }, { "epoch": 1.71, "learning_rate": 2.3955239345168197e-05, "loss": 3.0147, "step": 9150 }, { "epoch": 1.71, "learning_rate": 2.392070180285971e-05, "loss": 2.8927, "step": 9160 }, { "epoch": 1.71, "learning_rate": 2.388616426055122e-05, "loss": 2.8888, "step": 9170 }, { "epoch": 1.71, "learning_rate": 2.385162671824273e-05, "loss": 2.9943, "step": 9180 }, { "epoch": 1.71, "learning_rate": 2.381708917593424e-05, "loss": 2.8836, "step": 9190 }, { "epoch": 1.72, "learning_rate": 2.378255163362575e-05, "loss": 3.0489, "step": 9200 }, { "epoch": 1.72, "learning_rate": 2.3748014091317265e-05, "loss": 3.009, "step": 9210 }, { "epoch": 1.72, "learning_rate": 2.3713476549008772e-05, "loss": 2.8603, "step": 9220 }, { "epoch": 1.72, "learning_rate": 2.3678939006700286e-05, "loss": 2.9036, "step": 9230 }, { "epoch": 1.72, "learning_rate": 2.3644401464391793e-05, "loss": 2.9626, "step": 9240 }, { "epoch": 1.72, "learning_rate": 2.3609863922083307e-05, "loss": 2.9827, "step": 9250 }, { "epoch": 1.73, "learning_rate": 2.3575326379774817e-05, "loss": 3.0024, "step": 9260 }, { "epoch": 1.73, "learning_rate": 2.3540788837466327e-05, "loss": 2.9592, "step": 9270 }, { "epoch": 1.73, "learning_rate": 2.3506251295157838e-05, "loss": 2.9028, "step": 9280 }, { "epoch": 1.73, "learning_rate": 2.3471713752849348e-05, "loss": 2.8719, "step": 9290 }, { "epoch": 1.73, "learning_rate": 2.3437176210540858e-05, "loss": 2.9314, "step": 9300 }, { "epoch": 1.74, "learning_rate": 2.340263866823237e-05, "loss": 3.0222, "step": 9310 }, { "epoch": 1.74, "learning_rate": 2.336810112592388e-05, "loss": 2.9664, "step": 9320 }, { "epoch": 1.74, "learning_rate": 2.3333563583615393e-05, "loss": 2.8377, "step": 9330 }, { "epoch": 1.74, "learning_rate": 2.32990260413069e-05, "loss": 2.9357, "step": 9340 }, { "epoch": 1.74, "learning_rate": 2.3264488498998413e-05, "loss": 2.8697, "step": 9350 }, { "epoch": 1.75, "learning_rate": 2.3229950956689923e-05, "loss": 2.909, "step": 9360 }, { "epoch": 1.75, "learning_rate": 2.3195413414381434e-05, "loss": 2.9791, "step": 9370 }, { "epoch": 1.75, "learning_rate": 2.3160875872072944e-05, "loss": 2.9093, "step": 9380 }, { "epoch": 1.75, "learning_rate": 2.3126338329764454e-05, "loss": 2.843, "step": 9390 }, { "epoch": 1.75, "learning_rate": 2.3091800787455968e-05, "loss": 2.889, "step": 9400 }, { "epoch": 1.75, "learning_rate": 2.3057263245147475e-05, "loss": 2.8633, "step": 9410 }, { "epoch": 1.76, "learning_rate": 2.302272570283899e-05, "loss": 2.9043, "step": 9420 }, { "epoch": 1.76, "learning_rate": 2.2988188160530496e-05, "loss": 2.8618, "step": 9430 }, { "epoch": 1.76, "learning_rate": 2.295365061822201e-05, "loss": 2.8755, "step": 9440 }, { "epoch": 1.76, "learning_rate": 2.291911307591352e-05, "loss": 2.8721, "step": 9450 }, { "epoch": 1.76, "learning_rate": 2.288457553360503e-05, "loss": 2.926, "step": 9460 }, { "epoch": 1.77, "learning_rate": 2.285003799129654e-05, "loss": 2.8687, "step": 9470 }, { "epoch": 1.77, "learning_rate": 2.281550044898805e-05, "loss": 2.9855, "step": 9480 }, { "epoch": 1.77, "learning_rate": 2.278096290667956e-05, "loss": 2.7932, "step": 9490 }, { "epoch": 1.77, "learning_rate": 2.274642536437107e-05, "loss": 2.8963, "step": 9500 }, { "epoch": 1.77, "learning_rate": 2.271188782206258e-05, "loss": 2.886, "step": 9510 }, { "epoch": 1.78, "learning_rate": 2.2677350279754095e-05, "loss": 2.8421, "step": 9520 }, { "epoch": 1.78, "learning_rate": 2.2642812737445606e-05, "loss": 2.9048, "step": 9530 }, { "epoch": 1.78, "learning_rate": 2.2608275195137116e-05, "loss": 2.8581, "step": 9540 }, { "epoch": 1.78, "learning_rate": 2.2573737652828626e-05, "loss": 2.8165, "step": 9550 }, { "epoch": 1.78, "learning_rate": 2.2539200110520137e-05, "loss": 2.8677, "step": 9560 }, { "epoch": 1.78, "learning_rate": 2.2504662568211647e-05, "loss": 2.7541, "step": 9570 }, { "epoch": 1.79, "learning_rate": 2.2470125025903157e-05, "loss": 2.907, "step": 9580 }, { "epoch": 1.79, "learning_rate": 2.243558748359467e-05, "loss": 2.9712, "step": 9590 }, { "epoch": 1.79, "learning_rate": 2.2401049941286178e-05, "loss": 2.945, "step": 9600 }, { "epoch": 1.79, "learning_rate": 2.236651239897769e-05, "loss": 2.9733, "step": 9610 }, { "epoch": 1.79, "learning_rate": 2.23319748566692e-05, "loss": 2.7933, "step": 9620 }, { "epoch": 1.8, "learning_rate": 2.2297437314360712e-05, "loss": 2.9037, "step": 9630 }, { "epoch": 1.8, "learning_rate": 2.2262899772052222e-05, "loss": 2.8159, "step": 9640 }, { "epoch": 1.8, "learning_rate": 2.2228362229743733e-05, "loss": 2.9867, "step": 9650 }, { "epoch": 1.8, "learning_rate": 2.2193824687435243e-05, "loss": 2.7537, "step": 9660 }, { "epoch": 1.8, "learning_rate": 2.2159287145126753e-05, "loss": 2.859, "step": 9670 }, { "epoch": 1.81, "learning_rate": 2.2124749602818264e-05, "loss": 2.8736, "step": 9680 }, { "epoch": 1.81, "learning_rate": 2.2090212060509774e-05, "loss": 2.8086, "step": 9690 }, { "epoch": 1.81, "learning_rate": 2.2055674518201284e-05, "loss": 2.913, "step": 9700 }, { "epoch": 1.81, "learning_rate": 2.2021136975892798e-05, "loss": 2.7818, "step": 9710 }, { "epoch": 1.81, "learning_rate": 2.198659943358431e-05, "loss": 2.8732, "step": 9720 }, { "epoch": 1.81, "learning_rate": 2.195206189127582e-05, "loss": 2.8173, "step": 9730 }, { "epoch": 1.82, "learning_rate": 2.191752434896733e-05, "loss": 2.8845, "step": 9740 }, { "epoch": 1.82, "learning_rate": 2.188298680665884e-05, "loss": 2.8686, "step": 9750 }, { "epoch": 1.82, "learning_rate": 2.184844926435035e-05, "loss": 2.9331, "step": 9760 }, { "epoch": 1.82, "learning_rate": 2.181391172204186e-05, "loss": 2.8525, "step": 9770 }, { "epoch": 1.82, "learning_rate": 2.1779374179733374e-05, "loss": 2.8304, "step": 9780 }, { "epoch": 1.83, "learning_rate": 2.174483663742488e-05, "loss": 2.7739, "step": 9790 }, { "epoch": 1.83, "learning_rate": 2.1710299095116394e-05, "loss": 2.9298, "step": 9800 }, { "epoch": 1.83, "learning_rate": 2.16757615528079e-05, "loss": 2.824, "step": 9810 }, { "epoch": 1.83, "learning_rate": 2.1641224010499415e-05, "loss": 2.7872, "step": 9820 }, { "epoch": 1.83, "learning_rate": 2.1606686468190925e-05, "loss": 2.7732, "step": 9830 }, { "epoch": 1.83, "learning_rate": 2.1572148925882436e-05, "loss": 2.7426, "step": 9840 }, { "epoch": 1.84, "learning_rate": 2.1537611383573946e-05, "loss": 2.82, "step": 9850 }, { "epoch": 1.84, "learning_rate": 2.1503073841265456e-05, "loss": 2.7538, "step": 9860 }, { "epoch": 1.84, "learning_rate": 2.1468536298956967e-05, "loss": 2.7856, "step": 9870 }, { "epoch": 1.84, "learning_rate": 2.1433998756648477e-05, "loss": 2.7411, "step": 9880 }, { "epoch": 1.84, "learning_rate": 2.1399461214339987e-05, "loss": 2.7934, "step": 9890 }, { "epoch": 1.85, "learning_rate": 2.13649236720315e-05, "loss": 2.8427, "step": 9900 }, { "epoch": 1.85, "learning_rate": 2.133038612972301e-05, "loss": 2.7685, "step": 9910 }, { "epoch": 1.85, "learning_rate": 2.129584858741452e-05, "loss": 2.7284, "step": 9920 }, { "epoch": 1.85, "learning_rate": 2.1261311045106032e-05, "loss": 2.8034, "step": 9930 }, { "epoch": 1.85, "learning_rate": 2.1226773502797542e-05, "loss": 2.8175, "step": 9940 }, { "epoch": 1.86, "learning_rate": 2.1192235960489052e-05, "loss": 2.7895, "step": 9950 }, { "epoch": 1.86, "learning_rate": 2.1157698418180563e-05, "loss": 2.8621, "step": 9960 }, { "epoch": 1.86, "learning_rate": 2.1123160875872076e-05, "loss": 2.7962, "step": 9970 }, { "epoch": 1.86, "learning_rate": 2.1088623333563583e-05, "loss": 2.7863, "step": 9980 }, { "epoch": 1.86, "learning_rate": 2.1054085791255097e-05, "loss": 2.7884, "step": 9990 }, { "epoch": 1.86, "learning_rate": 2.1019548248946604e-05, "loss": 2.7566, "step": 10000 }, { "epoch": 1.87, "learning_rate": 2.0985010706638118e-05, "loss": 2.8012, "step": 10010 }, { "epoch": 1.87, "learning_rate": 2.0950473164329625e-05, "loss": 2.8358, "step": 10020 }, { "epoch": 1.87, "learning_rate": 2.091593562202114e-05, "loss": 2.8367, "step": 10030 }, { "epoch": 1.87, "learning_rate": 2.088139807971265e-05, "loss": 2.7646, "step": 10040 }, { "epoch": 1.87, "learning_rate": 2.084686053740416e-05, "loss": 2.8934, "step": 10050 }, { "epoch": 1.88, "learning_rate": 2.081232299509567e-05, "loss": 2.8152, "step": 10060 }, { "epoch": 1.88, "learning_rate": 2.077778545278718e-05, "loss": 2.7449, "step": 10070 }, { "epoch": 1.88, "learning_rate": 2.074324791047869e-05, "loss": 2.7978, "step": 10080 }, { "epoch": 1.88, "learning_rate": 2.0708710368170204e-05, "loss": 2.7717, "step": 10090 }, { "epoch": 1.88, "learning_rate": 2.0674172825861714e-05, "loss": 2.8873, "step": 10100 }, { "epoch": 1.89, "learning_rate": 2.0639635283553224e-05, "loss": 2.8055, "step": 10110 }, { "epoch": 1.89, "learning_rate": 2.0605097741244735e-05, "loss": 2.6969, "step": 10120 }, { "epoch": 1.89, "learning_rate": 2.0570560198936245e-05, "loss": 2.7373, "step": 10130 }, { "epoch": 1.89, "learning_rate": 2.0536022656627755e-05, "loss": 2.7297, "step": 10140 }, { "epoch": 1.89, "learning_rate": 2.0501485114319265e-05, "loss": 2.824, "step": 10150 }, { "epoch": 1.89, "learning_rate": 2.046694757201078e-05, "loss": 2.6395, "step": 10160 }, { "epoch": 1.9, "learning_rate": 2.0432410029702286e-05, "loss": 2.7764, "step": 10170 }, { "epoch": 1.9, "learning_rate": 2.03978724873938e-05, "loss": 2.7993, "step": 10180 }, { "epoch": 1.9, "learning_rate": 2.0363334945085307e-05, "loss": 2.7092, "step": 10190 }, { "epoch": 1.9, "learning_rate": 2.032879740277682e-05, "loss": 2.8668, "step": 10200 }, { "epoch": 1.9, "learning_rate": 2.0294259860468327e-05, "loss": 2.7189, "step": 10210 }, { "epoch": 1.91, "learning_rate": 2.025972231815984e-05, "loss": 2.6979, "step": 10220 }, { "epoch": 1.91, "learning_rate": 2.022518477585135e-05, "loss": 2.7583, "step": 10230 }, { "epoch": 1.91, "learning_rate": 2.0190647233542862e-05, "loss": 2.8106, "step": 10240 }, { "epoch": 1.91, "learning_rate": 2.0156109691234372e-05, "loss": 2.7257, "step": 10250 }, { "epoch": 1.91, "learning_rate": 2.0121572148925882e-05, "loss": 2.6996, "step": 10260 }, { "epoch": 1.92, "learning_rate": 2.0087034606617393e-05, "loss": 2.6771, "step": 10270 }, { "epoch": 1.92, "learning_rate": 2.0052497064308906e-05, "loss": 2.8394, "step": 10280 }, { "epoch": 1.92, "learning_rate": 2.0017959522000417e-05, "loss": 2.7741, "step": 10290 }, { "epoch": 1.92, "learning_rate": 1.9983421979691927e-05, "loss": 2.7705, "step": 10300 }, { "epoch": 1.92, "learning_rate": 1.9948884437383437e-05, "loss": 2.7212, "step": 10310 }, { "epoch": 1.92, "learning_rate": 1.9914346895074948e-05, "loss": 2.773, "step": 10320 }, { "epoch": 1.93, "learning_rate": 1.9879809352766458e-05, "loss": 2.7534, "step": 10330 }, { "epoch": 1.93, "learning_rate": 1.9845271810457968e-05, "loss": 2.7153, "step": 10340 }, { "epoch": 1.93, "learning_rate": 1.9810734268149482e-05, "loss": 2.7342, "step": 10350 }, { "epoch": 1.93, "learning_rate": 1.977619672584099e-05, "loss": 2.7553, "step": 10360 }, { "epoch": 1.93, "learning_rate": 1.9741659183532503e-05, "loss": 2.7939, "step": 10370 }, { "epoch": 1.94, "learning_rate": 1.970712164122401e-05, "loss": 2.7801, "step": 10380 }, { "epoch": 1.94, "learning_rate": 1.9672584098915523e-05, "loss": 2.786, "step": 10390 }, { "epoch": 1.94, "learning_rate": 1.963804655660703e-05, "loss": 2.7158, "step": 10400 }, { "epoch": 1.94, "learning_rate": 1.9603509014298544e-05, "loss": 2.7806, "step": 10410 }, { "epoch": 1.94, "learning_rate": 1.9568971471990054e-05, "loss": 2.7648, "step": 10420 }, { "epoch": 1.94, "learning_rate": 1.9534433929681564e-05, "loss": 2.8309, "step": 10430 }, { "epoch": 1.95, "learning_rate": 1.9499896387373075e-05, "loss": 2.6813, "step": 10440 }, { "epoch": 1.95, "learning_rate": 1.9465358845064585e-05, "loss": 2.6845, "step": 10450 }, { "epoch": 1.95, "learning_rate": 1.9430821302756095e-05, "loss": 2.7858, "step": 10460 }, { "epoch": 1.95, "learning_rate": 1.939628376044761e-05, "loss": 2.8503, "step": 10470 }, { "epoch": 1.95, "learning_rate": 1.936174621813912e-05, "loss": 2.7545, "step": 10480 }, { "epoch": 1.96, "learning_rate": 1.932720867583063e-05, "loss": 2.6919, "step": 10490 }, { "epoch": 1.96, "learning_rate": 1.929267113352214e-05, "loss": 2.8175, "step": 10500 }, { "epoch": 1.96, "learning_rate": 1.925813359121365e-05, "loss": 2.8181, "step": 10510 }, { "epoch": 1.96, "learning_rate": 1.922359604890516e-05, "loss": 2.7865, "step": 10520 }, { "epoch": 1.96, "learning_rate": 1.918905850659667e-05, "loss": 2.8738, "step": 10530 }, { "epoch": 1.97, "learning_rate": 1.9154520964288185e-05, "loss": 2.7771, "step": 10540 }, { "epoch": 1.97, "learning_rate": 1.911998342197969e-05, "loss": 2.6842, "step": 10550 }, { "epoch": 1.97, "learning_rate": 1.9085445879671205e-05, "loss": 2.7889, "step": 10560 }, { "epoch": 1.97, "learning_rate": 1.9050908337362712e-05, "loss": 2.7784, "step": 10570 }, { "epoch": 1.97, "learning_rate": 1.9016370795054226e-05, "loss": 2.746, "step": 10580 }, { "epoch": 1.97, "learning_rate": 1.8981833252745733e-05, "loss": 2.733, "step": 10590 }, { "epoch": 1.98, "learning_rate": 1.8947295710437247e-05, "loss": 2.8084, "step": 10600 }, { "epoch": 1.98, "learning_rate": 1.8912758168128757e-05, "loss": 2.6748, "step": 10610 }, { "epoch": 1.98, "learning_rate": 1.8878220625820267e-05, "loss": 2.6682, "step": 10620 }, { "epoch": 1.98, "learning_rate": 1.8843683083511778e-05, "loss": 2.7535, "step": 10630 }, { "epoch": 1.98, "learning_rate": 1.8809145541203288e-05, "loss": 2.8174, "step": 10640 }, { "epoch": 1.99, "learning_rate": 1.8774607998894798e-05, "loss": 2.7326, "step": 10650 }, { "epoch": 1.99, "learning_rate": 1.874007045658631e-05, "loss": 2.6306, "step": 10660 }, { "epoch": 1.99, "learning_rate": 1.8705532914277822e-05, "loss": 2.6619, "step": 10670 }, { "epoch": 1.99, "learning_rate": 1.8670995371969333e-05, "loss": 2.6543, "step": 10680 }, { "epoch": 1.99, "learning_rate": 1.8636457829660843e-05, "loss": 2.7638, "step": 10690 }, { "epoch": 2.0, "learning_rate": 1.8601920287352353e-05, "loss": 2.7623, "step": 10700 }, { "epoch": 2.0, "learning_rate": 1.8567382745043863e-05, "loss": 2.7837, "step": 10710 }, { "epoch": 2.0, "learning_rate": 1.8532845202735374e-05, "loss": 2.694, "step": 10720 }, { "epoch": 2.0, "eval_accuracy": 0.5199166098494782, "eval_loss": 2.258553981781006, "eval_runtime": 8350.0446, "eval_samples_per_second": 9.134, "eval_steps_per_second": 0.286, "step": 10725 }, { "epoch": 2.0, "learning_rate": 1.8498307660426887e-05, "loss": 2.7086, "step": 10730 }, { "epoch": 2.0, "learning_rate": 1.8463770118118394e-05, "loss": 2.5868, "step": 10740 }, { "epoch": 2.0, "learning_rate": 1.8429232575809908e-05, "loss": 2.6158, "step": 10750 }, { "epoch": 2.01, "learning_rate": 1.8394695033501415e-05, "loss": 2.5816, "step": 10760 }, { "epoch": 2.01, "learning_rate": 1.836015749119293e-05, "loss": 2.6675, "step": 10770 }, { "epoch": 2.01, "learning_rate": 1.8325619948884436e-05, "loss": 2.6007, "step": 10780 }, { "epoch": 2.01, "learning_rate": 1.829108240657595e-05, "loss": 2.7189, "step": 10790 }, { "epoch": 2.01, "learning_rate": 1.825654486426746e-05, "loss": 2.6269, "step": 10800 }, { "epoch": 2.02, "learning_rate": 1.822200732195897e-05, "loss": 2.6914, "step": 10810 }, { "epoch": 2.02, "learning_rate": 1.818746977965048e-05, "loss": 2.6807, "step": 10820 }, { "epoch": 2.02, "learning_rate": 1.815293223734199e-05, "loss": 2.6789, "step": 10830 }, { "epoch": 2.02, "learning_rate": 1.81183946950335e-05, "loss": 2.6979, "step": 10840 }, { "epoch": 2.02, "learning_rate": 1.808385715272501e-05, "loss": 2.7043, "step": 10850 }, { "epoch": 2.03, "learning_rate": 1.8049319610416525e-05, "loss": 2.5784, "step": 10860 }, { "epoch": 2.03, "learning_rate": 1.8014782068108035e-05, "loss": 2.765, "step": 10870 }, { "epoch": 2.03, "learning_rate": 1.7980244525799546e-05, "loss": 2.7079, "step": 10880 }, { "epoch": 2.03, "learning_rate": 1.7945706983491056e-05, "loss": 2.5952, "step": 10890 }, { "epoch": 2.03, "learning_rate": 1.7911169441182566e-05, "loss": 2.5811, "step": 10900 }, { "epoch": 2.03, "learning_rate": 1.7876631898874077e-05, "loss": 2.5979, "step": 10910 }, { "epoch": 2.04, "learning_rate": 1.784209435656559e-05, "loss": 2.5244, "step": 10920 }, { "epoch": 2.04, "learning_rate": 1.7807556814257097e-05, "loss": 2.5945, "step": 10930 }, { "epoch": 2.04, "learning_rate": 1.777301927194861e-05, "loss": 2.6323, "step": 10940 }, { "epoch": 2.04, "learning_rate": 1.7738481729640118e-05, "loss": 2.7229, "step": 10950 }, { "epoch": 2.04, "learning_rate": 1.770394418733163e-05, "loss": 2.6336, "step": 10960 }, { "epoch": 2.05, "learning_rate": 1.766940664502314e-05, "loss": 2.7559, "step": 10970 }, { "epoch": 2.05, "learning_rate": 1.7634869102714652e-05, "loss": 2.5932, "step": 10980 }, { "epoch": 2.05, "learning_rate": 1.7600331560406162e-05, "loss": 2.6705, "step": 10990 }, { "epoch": 2.05, "learning_rate": 1.7565794018097673e-05, "loss": 2.6559, "step": 11000 }, { "epoch": 2.05, "learning_rate": 1.7531256475789183e-05, "loss": 2.6464, "step": 11010 }, { "epoch": 2.05, "learning_rate": 1.7496718933480693e-05, "loss": 2.612, "step": 11020 }, { "epoch": 2.06, "learning_rate": 1.7462181391172207e-05, "loss": 2.6383, "step": 11030 }, { "epoch": 2.06, "learning_rate": 1.7427643848863714e-05, "loss": 2.6744, "step": 11040 }, { "epoch": 2.06, "learning_rate": 1.7393106306555228e-05, "loss": 2.6917, "step": 11050 }, { "epoch": 2.06, "learning_rate": 1.7358568764246738e-05, "loss": 2.6124, "step": 11060 }, { "epoch": 2.06, "learning_rate": 1.732403122193825e-05, "loss": 2.7056, "step": 11070 }, { "epoch": 2.07, "learning_rate": 1.728949367962976e-05, "loss": 2.666, "step": 11080 }, { "epoch": 2.07, "learning_rate": 1.725495613732127e-05, "loss": 2.5992, "step": 11090 }, { "epoch": 2.07, "learning_rate": 1.722041859501278e-05, "loss": 2.6046, "step": 11100 }, { "epoch": 2.07, "learning_rate": 1.718588105270429e-05, "loss": 2.5876, "step": 11110 }, { "epoch": 2.07, "learning_rate": 1.71513435103958e-05, "loss": 2.5939, "step": 11120 }, { "epoch": 2.08, "learning_rate": 1.7116805968087314e-05, "loss": 2.6956, "step": 11130 }, { "epoch": 2.08, "learning_rate": 1.708226842577882e-05, "loss": 2.6503, "step": 11140 }, { "epoch": 2.08, "learning_rate": 1.7047730883470334e-05, "loss": 2.606, "step": 11150 }, { "epoch": 2.08, "learning_rate": 1.701319334116184e-05, "loss": 2.6121, "step": 11160 }, { "epoch": 2.08, "learning_rate": 1.6978655798853355e-05, "loss": 2.5824, "step": 11170 }, { "epoch": 2.08, "learning_rate": 1.6944118256544865e-05, "loss": 2.5693, "step": 11180 }, { "epoch": 2.09, "learning_rate": 1.6909580714236376e-05, "loss": 2.6992, "step": 11190 }, { "epoch": 2.09, "learning_rate": 1.6875043171927886e-05, "loss": 2.6964, "step": 11200 }, { "epoch": 2.09, "learning_rate": 1.6840505629619396e-05, "loss": 2.5641, "step": 11210 }, { "epoch": 2.09, "learning_rate": 1.680596808731091e-05, "loss": 2.687, "step": 11220 }, { "epoch": 2.09, "learning_rate": 1.6771430545002417e-05, "loss": 2.7424, "step": 11230 }, { "epoch": 2.1, "learning_rate": 1.673689300269393e-05, "loss": 2.5703, "step": 11240 }, { "epoch": 2.1, "learning_rate": 1.670235546038544e-05, "loss": 2.6801, "step": 11250 }, { "epoch": 2.1, "learning_rate": 1.666781791807695e-05, "loss": 2.6199, "step": 11260 }, { "epoch": 2.1, "learning_rate": 1.663328037576846e-05, "loss": 2.5185, "step": 11270 }, { "epoch": 2.1, "learning_rate": 1.6598742833459972e-05, "loss": 2.6805, "step": 11280 }, { "epoch": 2.11, "learning_rate": 1.6564205291151482e-05, "loss": 2.6054, "step": 11290 }, { "epoch": 2.11, "learning_rate": 1.6529667748842992e-05, "loss": 2.5373, "step": 11300 }, { "epoch": 2.11, "learning_rate": 1.6495130206534503e-05, "loss": 2.6074, "step": 11310 }, { "epoch": 2.11, "learning_rate": 1.6460592664226016e-05, "loss": 2.5797, "step": 11320 }, { "epoch": 2.11, "learning_rate": 1.6426055121917523e-05, "loss": 2.6493, "step": 11330 }, { "epoch": 2.11, "learning_rate": 1.6391517579609037e-05, "loss": 2.5966, "step": 11340 }, { "epoch": 2.12, "learning_rate": 1.6356980037300544e-05, "loss": 2.6321, "step": 11350 }, { "epoch": 2.12, "learning_rate": 1.6322442494992058e-05, "loss": 2.6245, "step": 11360 }, { "epoch": 2.12, "learning_rate": 1.6287904952683568e-05, "loss": 2.6626, "step": 11370 }, { "epoch": 2.12, "learning_rate": 1.6253367410375078e-05, "loss": 2.6061, "step": 11380 }, { "epoch": 2.12, "learning_rate": 1.621882986806659e-05, "loss": 2.5854, "step": 11390 }, { "epoch": 2.13, "learning_rate": 1.61842923257581e-05, "loss": 2.6, "step": 11400 }, { "epoch": 2.13, "learning_rate": 1.6149754783449613e-05, "loss": 2.5424, "step": 11410 }, { "epoch": 2.13, "learning_rate": 1.611521724114112e-05, "loss": 2.6849, "step": 11420 }, { "epoch": 2.13, "learning_rate": 1.6080679698832633e-05, "loss": 2.5638, "step": 11430 }, { "epoch": 2.13, "learning_rate": 1.6046142156524144e-05, "loss": 2.6181, "step": 11440 }, { "epoch": 2.14, "learning_rate": 1.6011604614215654e-05, "loss": 2.5725, "step": 11450 }, { "epoch": 2.14, "learning_rate": 1.5977067071907164e-05, "loss": 2.5491, "step": 11460 }, { "epoch": 2.14, "learning_rate": 1.5942529529598675e-05, "loss": 2.6698, "step": 11470 }, { "epoch": 2.14, "learning_rate": 1.5907991987290185e-05, "loss": 2.6591, "step": 11480 }, { "epoch": 2.14, "learning_rate": 1.5873454444981695e-05, "loss": 2.669, "step": 11490 }, { "epoch": 2.14, "learning_rate": 1.5838916902673205e-05, "loss": 2.6449, "step": 11500 }, { "epoch": 2.15, "learning_rate": 1.580437936036472e-05, "loss": 2.6878, "step": 11510 }, { "epoch": 2.15, "learning_rate": 1.5769841818056226e-05, "loss": 2.5862, "step": 11520 }, { "epoch": 2.15, "learning_rate": 1.573530427574774e-05, "loss": 2.6372, "step": 11530 }, { "epoch": 2.15, "learning_rate": 1.5700766733439247e-05, "loss": 2.5786, "step": 11540 }, { "epoch": 2.15, "learning_rate": 1.566622919113076e-05, "loss": 2.525, "step": 11550 }, { "epoch": 2.16, "learning_rate": 1.563169164882227e-05, "loss": 2.5454, "step": 11560 }, { "epoch": 2.16, "learning_rate": 1.559715410651378e-05, "loss": 2.5995, "step": 11570 }, { "epoch": 2.16, "learning_rate": 1.556261656420529e-05, "loss": 2.5934, "step": 11580 }, { "epoch": 2.16, "learning_rate": 1.5528079021896802e-05, "loss": 2.5854, "step": 11590 }, { "epoch": 2.16, "learning_rate": 1.5493541479588315e-05, "loss": 2.4892, "step": 11600 }, { "epoch": 2.16, "learning_rate": 1.5459003937279822e-05, "loss": 2.5199, "step": 11610 }, { "epoch": 2.17, "learning_rate": 1.5424466394971336e-05, "loss": 2.6195, "step": 11620 }, { "epoch": 2.17, "learning_rate": 1.5389928852662846e-05, "loss": 2.5539, "step": 11630 }, { "epoch": 2.17, "learning_rate": 1.5355391310354357e-05, "loss": 2.4715, "step": 11640 }, { "epoch": 2.17, "learning_rate": 1.5320853768045867e-05, "loss": 2.636, "step": 11650 }, { "epoch": 2.17, "learning_rate": 1.5286316225737377e-05, "loss": 2.6113, "step": 11660 }, { "epoch": 2.18, "learning_rate": 1.5251778683428888e-05, "loss": 2.6125, "step": 11670 }, { "epoch": 2.18, "learning_rate": 1.5217241141120398e-05, "loss": 2.6328, "step": 11680 }, { "epoch": 2.18, "learning_rate": 1.518270359881191e-05, "loss": 2.6832, "step": 11690 }, { "epoch": 2.18, "learning_rate": 1.5148166056503422e-05, "loss": 2.6303, "step": 11700 }, { "epoch": 2.18, "learning_rate": 1.511362851419493e-05, "loss": 2.5381, "step": 11710 }, { "epoch": 2.19, "learning_rate": 1.5079090971886443e-05, "loss": 2.5419, "step": 11720 }, { "epoch": 2.19, "learning_rate": 1.5044553429577951e-05, "loss": 2.5489, "step": 11730 }, { "epoch": 2.19, "learning_rate": 1.5010015887269463e-05, "loss": 2.5821, "step": 11740 }, { "epoch": 2.19, "learning_rate": 1.4975478344960972e-05, "loss": 2.5817, "step": 11750 }, { "epoch": 2.19, "learning_rate": 1.4940940802652484e-05, "loss": 2.5954, "step": 11760 }, { "epoch": 2.19, "learning_rate": 1.4906403260343996e-05, "loss": 2.5759, "step": 11770 }, { "epoch": 2.2, "learning_rate": 1.4871865718035504e-05, "loss": 2.4825, "step": 11780 }, { "epoch": 2.2, "learning_rate": 1.4837328175727016e-05, "loss": 2.6348, "step": 11790 }, { "epoch": 2.2, "learning_rate": 1.4802790633418525e-05, "loss": 2.5029, "step": 11800 }, { "epoch": 2.2, "learning_rate": 1.4768253091110037e-05, "loss": 2.5852, "step": 11810 }, { "epoch": 2.2, "learning_rate": 1.4733715548801549e-05, "loss": 2.5978, "step": 11820 }, { "epoch": 2.21, "learning_rate": 1.4699178006493058e-05, "loss": 2.5382, "step": 11830 }, { "epoch": 2.21, "learning_rate": 1.466464046418457e-05, "loss": 2.5815, "step": 11840 }, { "epoch": 2.21, "learning_rate": 1.463010292187608e-05, "loss": 2.5992, "step": 11850 }, { "epoch": 2.21, "learning_rate": 1.459556537956759e-05, "loss": 2.5691, "step": 11860 }, { "epoch": 2.21, "learning_rate": 1.45610278372591e-05, "loss": 2.6351, "step": 11870 }, { "epoch": 2.22, "learning_rate": 1.4526490294950613e-05, "loss": 2.572, "step": 11880 }, { "epoch": 2.22, "learning_rate": 1.4491952752642125e-05, "loss": 2.5904, "step": 11890 }, { "epoch": 2.22, "learning_rate": 1.4457415210333633e-05, "loss": 2.5471, "step": 11900 }, { "epoch": 2.22, "learning_rate": 1.4422877668025145e-05, "loss": 2.4937, "step": 11910 }, { "epoch": 2.22, "learning_rate": 1.4388340125716654e-05, "loss": 2.4822, "step": 11920 }, { "epoch": 2.22, "learning_rate": 1.4353802583408166e-05, "loss": 2.5416, "step": 11930 }, { "epoch": 2.23, "learning_rate": 1.4319265041099675e-05, "loss": 2.4873, "step": 11940 }, { "epoch": 2.23, "learning_rate": 1.4284727498791187e-05, "loss": 2.4911, "step": 11950 }, { "epoch": 2.23, "learning_rate": 1.4250189956482699e-05, "loss": 2.4459, "step": 11960 }, { "epoch": 2.23, "learning_rate": 1.4215652414174207e-05, "loss": 2.499, "step": 11970 }, { "epoch": 2.23, "learning_rate": 1.418111487186572e-05, "loss": 2.4586, "step": 11980 }, { "epoch": 2.24, "learning_rate": 1.4146577329557228e-05, "loss": 2.5339, "step": 11990 }, { "epoch": 2.24, "learning_rate": 1.411203978724874e-05, "loss": 2.5919, "step": 12000 }, { "epoch": 2.24, "learning_rate": 1.407750224494025e-05, "loss": 2.5695, "step": 12010 }, { "epoch": 2.24, "learning_rate": 1.404296470263176e-05, "loss": 2.5624, "step": 12020 }, { "epoch": 2.24, "learning_rate": 1.4008427160323273e-05, "loss": 2.5149, "step": 12030 }, { "epoch": 2.25, "learning_rate": 1.3973889618014783e-05, "loss": 2.6278, "step": 12040 }, { "epoch": 2.25, "learning_rate": 1.3939352075706293e-05, "loss": 2.539, "step": 12050 }, { "epoch": 2.25, "learning_rate": 1.3904814533397803e-05, "loss": 2.4993, "step": 12060 }, { "epoch": 2.25, "learning_rate": 1.3870276991089315e-05, "loss": 2.576, "step": 12070 }, { "epoch": 2.25, "learning_rate": 1.3835739448780827e-05, "loss": 2.5318, "step": 12080 }, { "epoch": 2.25, "learning_rate": 1.3801201906472336e-05, "loss": 2.5886, "step": 12090 }, { "epoch": 2.26, "learning_rate": 1.3766664364163848e-05, "loss": 2.4935, "step": 12100 }, { "epoch": 2.26, "learning_rate": 1.3732126821855357e-05, "loss": 2.4726, "step": 12110 }, { "epoch": 2.26, "learning_rate": 1.3697589279546869e-05, "loss": 2.5471, "step": 12120 }, { "epoch": 2.26, "learning_rate": 1.3663051737238377e-05, "loss": 2.5019, "step": 12130 }, { "epoch": 2.26, "learning_rate": 1.362851419492989e-05, "loss": 2.5326, "step": 12140 }, { "epoch": 2.27, "learning_rate": 1.3593976652621401e-05, "loss": 2.5554, "step": 12150 }, { "epoch": 2.27, "learning_rate": 1.355943911031291e-05, "loss": 2.5263, "step": 12160 }, { "epoch": 2.27, "learning_rate": 1.3524901568004422e-05, "loss": 2.4685, "step": 12170 }, { "epoch": 2.27, "learning_rate": 1.349036402569593e-05, "loss": 2.5834, "step": 12180 }, { "epoch": 2.27, "learning_rate": 1.3455826483387443e-05, "loss": 2.4834, "step": 12190 }, { "epoch": 2.27, "learning_rate": 1.3421288941078953e-05, "loss": 2.5581, "step": 12200 }, { "epoch": 2.28, "learning_rate": 1.3386751398770463e-05, "loss": 2.4237, "step": 12210 }, { "epoch": 2.28, "learning_rate": 1.3352213856461975e-05, "loss": 2.5372, "step": 12220 }, { "epoch": 2.28, "learning_rate": 1.3317676314153486e-05, "loss": 2.5476, "step": 12230 }, { "epoch": 2.28, "learning_rate": 1.3283138771844998e-05, "loss": 2.5046, "step": 12240 }, { "epoch": 2.28, "learning_rate": 1.3248601229536506e-05, "loss": 2.5236, "step": 12250 }, { "epoch": 2.29, "learning_rate": 1.3214063687228018e-05, "loss": 2.4282, "step": 12260 }, { "epoch": 2.29, "learning_rate": 1.317952614491953e-05, "loss": 2.5041, "step": 12270 }, { "epoch": 2.29, "learning_rate": 1.3144988602611039e-05, "loss": 2.5305, "step": 12280 }, { "epoch": 2.29, "learning_rate": 1.3110451060302551e-05, "loss": 2.5322, "step": 12290 }, { "epoch": 2.29, "learning_rate": 1.307591351799406e-05, "loss": 2.443, "step": 12300 }, { "epoch": 2.3, "learning_rate": 1.3041375975685571e-05, "loss": 2.5025, "step": 12310 }, { "epoch": 2.3, "learning_rate": 1.300683843337708e-05, "loss": 2.6194, "step": 12320 }, { "epoch": 2.3, "learning_rate": 1.2972300891068592e-05, "loss": 2.5422, "step": 12330 }, { "epoch": 2.3, "learning_rate": 1.2937763348760104e-05, "loss": 2.487, "step": 12340 }, { "epoch": 2.3, "learning_rate": 1.2903225806451613e-05, "loss": 2.5176, "step": 12350 }, { "epoch": 2.3, "learning_rate": 1.2868688264143125e-05, "loss": 2.4831, "step": 12360 }, { "epoch": 2.31, "learning_rate": 1.2834150721834633e-05, "loss": 2.4596, "step": 12370 }, { "epoch": 2.31, "learning_rate": 1.2799613179526145e-05, "loss": 2.5229, "step": 12380 }, { "epoch": 2.31, "learning_rate": 1.2765075637217656e-05, "loss": 2.4673, "step": 12390 }, { "epoch": 2.31, "learning_rate": 1.2730538094909166e-05, "loss": 2.4803, "step": 12400 }, { "epoch": 2.31, "learning_rate": 1.2696000552600678e-05, "loss": 2.4534, "step": 12410 }, { "epoch": 2.32, "learning_rate": 1.2661463010292188e-05, "loss": 2.4959, "step": 12420 }, { "epoch": 2.32, "learning_rate": 1.26269254679837e-05, "loss": 2.523, "step": 12430 }, { "epoch": 2.32, "learning_rate": 1.2592387925675209e-05, "loss": 2.5406, "step": 12440 }, { "epoch": 2.32, "learning_rate": 1.2557850383366721e-05, "loss": 2.4748, "step": 12450 }, { "epoch": 2.32, "learning_rate": 1.252331284105823e-05, "loss": 2.5003, "step": 12460 }, { "epoch": 2.33, "learning_rate": 1.2488775298749742e-05, "loss": 2.5308, "step": 12470 }, { "epoch": 2.33, "learning_rate": 1.2454237756441252e-05, "loss": 2.5162, "step": 12480 }, { "epoch": 2.33, "learning_rate": 1.2419700214132762e-05, "loss": 2.4429, "step": 12490 }, { "epoch": 2.33, "learning_rate": 1.2385162671824274e-05, "loss": 2.5054, "step": 12500 }, { "epoch": 2.33, "learning_rate": 1.2350625129515785e-05, "loss": 2.427, "step": 12510 }, { "epoch": 2.33, "learning_rate": 1.2316087587207295e-05, "loss": 2.4848, "step": 12520 }, { "epoch": 2.34, "learning_rate": 1.2281550044898805e-05, "loss": 2.4365, "step": 12530 }, { "epoch": 2.34, "learning_rate": 1.2247012502590316e-05, "loss": 2.5547, "step": 12540 }, { "epoch": 2.34, "learning_rate": 1.2212474960281826e-05, "loss": 2.5358, "step": 12550 }, { "epoch": 2.34, "learning_rate": 1.2177937417973338e-05, "loss": 2.4882, "step": 12560 }, { "epoch": 2.34, "learning_rate": 1.2143399875664848e-05, "loss": 2.5398, "step": 12570 }, { "epoch": 2.35, "learning_rate": 1.2108862333356358e-05, "loss": 2.426, "step": 12580 }, { "epoch": 2.35, "learning_rate": 1.2074324791047869e-05, "loss": 2.5536, "step": 12590 }, { "epoch": 2.35, "learning_rate": 1.203978724873938e-05, "loss": 2.4513, "step": 12600 }, { "epoch": 2.35, "learning_rate": 1.2005249706430891e-05, "loss": 2.5256, "step": 12610 }, { "epoch": 2.35, "learning_rate": 1.1970712164122401e-05, "loss": 2.5244, "step": 12620 }, { "epoch": 2.36, "learning_rate": 1.1936174621813913e-05, "loss": 2.5293, "step": 12630 }, { "epoch": 2.36, "learning_rate": 1.1901637079505424e-05, "loss": 2.4588, "step": 12640 }, { "epoch": 2.36, "learning_rate": 1.1867099537196934e-05, "loss": 2.4325, "step": 12650 }, { "epoch": 2.36, "learning_rate": 1.1832561994888444e-05, "loss": 2.4101, "step": 12660 }, { "epoch": 2.36, "learning_rate": 1.1798024452579955e-05, "loss": 2.4884, "step": 12670 }, { "epoch": 2.36, "learning_rate": 1.1763486910271465e-05, "loss": 2.5393, "step": 12680 }, { "epoch": 2.37, "learning_rate": 1.1728949367962977e-05, "loss": 2.527, "step": 12690 }, { "epoch": 2.37, "learning_rate": 1.1694411825654487e-05, "loss": 2.5476, "step": 12700 }, { "epoch": 2.37, "learning_rate": 1.1659874283345998e-05, "loss": 2.5215, "step": 12710 }, { "epoch": 2.37, "learning_rate": 1.1625336741037508e-05, "loss": 2.4547, "step": 12720 }, { "epoch": 2.37, "learning_rate": 1.1590799198729018e-05, "loss": 2.3536, "step": 12730 }, { "epoch": 2.38, "learning_rate": 1.1556261656420529e-05, "loss": 2.4102, "step": 12740 }, { "epoch": 2.38, "learning_rate": 1.1521724114112039e-05, "loss": 2.5389, "step": 12750 }, { "epoch": 2.38, "learning_rate": 1.1487186571803551e-05, "loss": 2.4363, "step": 12760 }, { "epoch": 2.38, "learning_rate": 1.1452649029495061e-05, "loss": 2.5441, "step": 12770 }, { "epoch": 2.38, "learning_rate": 1.1418111487186573e-05, "loss": 2.4774, "step": 12780 }, { "epoch": 2.38, "learning_rate": 1.1383573944878084e-05, "loss": 2.4745, "step": 12790 }, { "epoch": 2.39, "learning_rate": 1.1349036402569594e-05, "loss": 2.4728, "step": 12800 }, { "epoch": 2.39, "learning_rate": 1.1314498860261104e-05, "loss": 2.5576, "step": 12810 }, { "epoch": 2.39, "learning_rate": 1.1279961317952616e-05, "loss": 2.5435, "step": 12820 }, { "epoch": 2.39, "learning_rate": 1.1245423775644127e-05, "loss": 2.4142, "step": 12830 }, { "epoch": 2.39, "learning_rate": 1.1210886233335637e-05, "loss": 2.4979, "step": 12840 }, { "epoch": 2.4, "learning_rate": 1.1176348691027147e-05, "loss": 2.582, "step": 12850 }, { "epoch": 2.4, "learning_rate": 1.1141811148718657e-05, "loss": 2.5088, "step": 12860 }, { "epoch": 2.4, "learning_rate": 1.1107273606410168e-05, "loss": 2.4736, "step": 12870 }, { "epoch": 2.4, "learning_rate": 1.107273606410168e-05, "loss": 2.5025, "step": 12880 }, { "epoch": 2.4, "learning_rate": 1.103819852179319e-05, "loss": 2.3659, "step": 12890 }, { "epoch": 2.41, "learning_rate": 1.10036609794847e-05, "loss": 2.4489, "step": 12900 }, { "epoch": 2.41, "learning_rate": 1.096912343717621e-05, "loss": 2.4468, "step": 12910 }, { "epoch": 2.41, "learning_rate": 1.0934585894867721e-05, "loss": 2.4301, "step": 12920 }, { "epoch": 2.41, "learning_rate": 1.0900048352559231e-05, "loss": 2.5407, "step": 12930 }, { "epoch": 2.41, "learning_rate": 1.0865510810250742e-05, "loss": 2.5414, "step": 12940 }, { "epoch": 2.41, "learning_rate": 1.0830973267942254e-05, "loss": 2.5377, "step": 12950 }, { "epoch": 2.42, "learning_rate": 1.0796435725633764e-05, "loss": 2.3887, "step": 12960 }, { "epoch": 2.42, "learning_rate": 1.0761898183325276e-05, "loss": 2.4716, "step": 12970 }, { "epoch": 2.42, "learning_rate": 1.0727360641016786e-05, "loss": 2.3875, "step": 12980 }, { "epoch": 2.42, "learning_rate": 1.0692823098708297e-05, "loss": 2.4645, "step": 12990 }, { "epoch": 2.42, "learning_rate": 1.0658285556399807e-05, "loss": 2.4588, "step": 13000 }, { "epoch": 2.43, "learning_rate": 1.0623748014091319e-05, "loss": 2.4394, "step": 13010 }, { "epoch": 2.43, "learning_rate": 1.058921047178283e-05, "loss": 2.4167, "step": 13020 }, { "epoch": 2.43, "learning_rate": 1.055467292947434e-05, "loss": 2.4317, "step": 13030 }, { "epoch": 2.43, "learning_rate": 1.052013538716585e-05, "loss": 2.46, "step": 13040 }, { "epoch": 2.43, "learning_rate": 1.048559784485736e-05, "loss": 2.5175, "step": 13050 }, { "epoch": 2.44, "learning_rate": 1.045106030254887e-05, "loss": 2.3986, "step": 13060 }, { "epoch": 2.44, "learning_rate": 1.0416522760240381e-05, "loss": 2.4969, "step": 13070 }, { "epoch": 2.44, "learning_rate": 1.0381985217931893e-05, "loss": 2.3781, "step": 13080 }, { "epoch": 2.44, "learning_rate": 1.0347447675623403e-05, "loss": 2.4569, "step": 13090 }, { "epoch": 2.44, "learning_rate": 1.0312910133314914e-05, "loss": 2.3831, "step": 13100 }, { "epoch": 2.44, "learning_rate": 1.0278372591006424e-05, "loss": 2.4431, "step": 13110 }, { "epoch": 2.45, "learning_rate": 1.0243835048697934e-05, "loss": 2.5117, "step": 13120 }, { "epoch": 2.45, "learning_rate": 1.0209297506389444e-05, "loss": 2.5629, "step": 13130 }, { "epoch": 2.45, "learning_rate": 1.0174759964080956e-05, "loss": 2.437, "step": 13140 }, { "epoch": 2.45, "learning_rate": 1.0140222421772467e-05, "loss": 2.5089, "step": 13150 }, { "epoch": 2.45, "learning_rate": 1.0105684879463979e-05, "loss": 2.5891, "step": 13160 }, { "epoch": 2.46, "learning_rate": 1.0071147337155489e-05, "loss": 2.4592, "step": 13170 }, { "epoch": 2.46, "learning_rate": 1.0036609794847e-05, "loss": 2.4947, "step": 13180 }, { "epoch": 2.46, "learning_rate": 1.000207225253851e-05, "loss": 2.3656, "step": 13190 }, { "epoch": 2.46, "learning_rate": 9.96753471023002e-06, "loss": 2.3021, "step": 13200 }, { "epoch": 2.46, "learning_rate": 9.932997167921532e-06, "loss": 2.5248, "step": 13210 }, { "epoch": 2.47, "learning_rate": 9.898459625613042e-06, "loss": 2.3746, "step": 13220 }, { "epoch": 2.47, "learning_rate": 9.863922083304553e-06, "loss": 2.4837, "step": 13230 }, { "epoch": 2.47, "learning_rate": 9.829384540996063e-06, "loss": 2.3482, "step": 13240 }, { "epoch": 2.47, "learning_rate": 9.794846998687573e-06, "loss": 2.3631, "step": 13250 }, { "epoch": 2.47, "learning_rate": 9.760309456379084e-06, "loss": 2.3923, "step": 13260 }, { "epoch": 2.47, "learning_rate": 9.725771914070596e-06, "loss": 2.465, "step": 13270 }, { "epoch": 2.48, "learning_rate": 9.691234371762106e-06, "loss": 2.5215, "step": 13280 }, { "epoch": 2.48, "learning_rate": 9.656696829453616e-06, "loss": 2.4189, "step": 13290 }, { "epoch": 2.48, "learning_rate": 9.622159287145127e-06, "loss": 2.4592, "step": 13300 }, { "epoch": 2.48, "learning_rate": 9.587621744836637e-06, "loss": 2.5317, "step": 13310 }, { "epoch": 2.48, "learning_rate": 9.553084202528149e-06, "loss": 2.5719, "step": 13320 }, { "epoch": 2.49, "learning_rate": 9.51854666021966e-06, "loss": 2.4286, "step": 13330 }, { "epoch": 2.49, "learning_rate": 9.48400911791117e-06, "loss": 2.3491, "step": 13340 }, { "epoch": 2.49, "learning_rate": 9.449471575602682e-06, "loss": 2.5172, "step": 13350 }, { "epoch": 2.49, "learning_rate": 9.414934033294192e-06, "loss": 2.4466, "step": 13360 }, { "epoch": 2.49, "learning_rate": 9.380396490985702e-06, "loss": 2.4197, "step": 13370 }, { "epoch": 2.49, "learning_rate": 9.345858948677212e-06, "loss": 2.5182, "step": 13380 }, { "epoch": 2.5, "learning_rate": 9.311321406368723e-06, "loss": 2.5291, "step": 13390 }, { "epoch": 2.5, "learning_rate": 9.276783864060235e-06, "loss": 2.5132, "step": 13400 }, { "epoch": 2.5, "learning_rate": 9.242246321751745e-06, "loss": 2.4365, "step": 13410 }, { "epoch": 2.5, "learning_rate": 9.207708779443255e-06, "loss": 2.5625, "step": 13420 }, { "epoch": 2.5, "learning_rate": 9.173171237134766e-06, "loss": 2.4663, "step": 13430 }, { "epoch": 2.51, "learning_rate": 9.138633694826276e-06, "loss": 2.5276, "step": 13440 }, { "epoch": 2.51, "learning_rate": 9.104096152517786e-06, "loss": 2.4061, "step": 13450 }, { "epoch": 2.51, "learning_rate": 9.069558610209298e-06, "loss": 2.4691, "step": 13460 }, { "epoch": 2.51, "learning_rate": 9.035021067900809e-06, "loss": 2.4038, "step": 13470 }, { "epoch": 2.51, "learning_rate": 9.000483525592319e-06, "loss": 2.4281, "step": 13480 }, { "epoch": 2.52, "learning_rate": 8.96594598328383e-06, "loss": 2.4752, "step": 13490 }, { "epoch": 2.52, "learning_rate": 8.93140844097534e-06, "loss": 2.3682, "step": 13500 }, { "epoch": 2.52, "learning_rate": 8.896870898666852e-06, "loss": 2.4398, "step": 13510 }, { "epoch": 2.52, "learning_rate": 8.862333356358362e-06, "loss": 2.4714, "step": 13520 }, { "epoch": 2.52, "learning_rate": 8.827795814049874e-06, "loss": 2.4158, "step": 13530 }, { "epoch": 2.52, "learning_rate": 8.793258271741384e-06, "loss": 2.4668, "step": 13540 }, { "epoch": 2.53, "learning_rate": 8.758720729432895e-06, "loss": 2.3691, "step": 13550 }, { "epoch": 2.53, "learning_rate": 8.724183187124405e-06, "loss": 2.3835, "step": 13560 }, { "epoch": 2.53, "learning_rate": 8.689645644815915e-06, "loss": 2.4404, "step": 13570 }, { "epoch": 2.53, "learning_rate": 8.655108102507426e-06, "loss": 2.4065, "step": 13580 }, { "epoch": 2.53, "learning_rate": 8.620570560198938e-06, "loss": 2.4134, "step": 13590 }, { "epoch": 2.54, "learning_rate": 8.586033017890448e-06, "loss": 2.3334, "step": 13600 }, { "epoch": 2.54, "learning_rate": 8.551495475581958e-06, "loss": 2.4672, "step": 13610 }, { "epoch": 2.54, "learning_rate": 8.516957933273469e-06, "loss": 2.3859, "step": 13620 }, { "epoch": 2.54, "learning_rate": 8.482420390964979e-06, "loss": 2.4169, "step": 13630 }, { "epoch": 2.54, "learning_rate": 8.44788284865649e-06, "loss": 2.4103, "step": 13640 }, { "epoch": 2.55, "learning_rate": 8.413345306348e-06, "loss": 2.492, "step": 13650 }, { "epoch": 2.55, "learning_rate": 8.378807764039511e-06, "loss": 2.4176, "step": 13660 }, { "epoch": 2.55, "learning_rate": 8.344270221731022e-06, "loss": 2.4864, "step": 13670 }, { "epoch": 2.55, "learning_rate": 8.309732679422532e-06, "loss": 2.412, "step": 13680 }, { "epoch": 2.55, "learning_rate": 8.275195137114042e-06, "loss": 2.2933, "step": 13690 }, { "epoch": 2.55, "learning_rate": 8.240657594805554e-06, "loss": 2.3668, "step": 13700 }, { "epoch": 2.56, "learning_rate": 8.206120052497065e-06, "loss": 2.4555, "step": 13710 }, { "epoch": 2.56, "learning_rate": 8.171582510188577e-06, "loss": 2.3584, "step": 13720 }, { "epoch": 2.56, "learning_rate": 8.137044967880087e-06, "loss": 2.3607, "step": 13730 }, { "epoch": 2.56, "learning_rate": 8.102507425571597e-06, "loss": 2.4085, "step": 13740 }, { "epoch": 2.56, "learning_rate": 8.067969883263108e-06, "loss": 2.3381, "step": 13750 }, { "epoch": 2.57, "learning_rate": 8.033432340954618e-06, "loss": 2.4618, "step": 13760 }, { "epoch": 2.57, "learning_rate": 7.998894798646128e-06, "loss": 2.3399, "step": 13770 }, { "epoch": 2.57, "learning_rate": 7.96435725633764e-06, "loss": 2.4523, "step": 13780 }, { "epoch": 2.57, "learning_rate": 7.92981971402915e-06, "loss": 2.387, "step": 13790 }, { "epoch": 2.57, "learning_rate": 7.895282171720661e-06, "loss": 2.3244, "step": 13800 }, { "epoch": 2.58, "learning_rate": 7.860744629412171e-06, "loss": 2.4542, "step": 13810 }, { "epoch": 2.58, "learning_rate": 7.826207087103682e-06, "loss": 2.4163, "step": 13820 }, { "epoch": 2.58, "learning_rate": 7.791669544795192e-06, "loss": 2.4133, "step": 13830 }, { "epoch": 2.58, "learning_rate": 7.757132002486702e-06, "loss": 2.5119, "step": 13840 }, { "epoch": 2.58, "learning_rate": 7.722594460178214e-06, "loss": 2.398, "step": 13850 }, { "epoch": 2.58, "learning_rate": 7.688056917869725e-06, "loss": 2.3494, "step": 13860 }, { "epoch": 2.59, "learning_rate": 7.653519375561235e-06, "loss": 2.3333, "step": 13870 }, { "epoch": 2.59, "learning_rate": 7.618981833252746e-06, "loss": 2.3259, "step": 13880 }, { "epoch": 2.59, "learning_rate": 7.584444290944256e-06, "loss": 2.3831, "step": 13890 }, { "epoch": 2.59, "learning_rate": 7.549906748635767e-06, "loss": 2.3466, "step": 13900 }, { "epoch": 2.59, "learning_rate": 7.515369206327279e-06, "loss": 2.3265, "step": 13910 }, { "epoch": 2.6, "learning_rate": 7.480831664018789e-06, "loss": 2.5178, "step": 13920 }, { "epoch": 2.6, "learning_rate": 7.446294121710299e-06, "loss": 2.434, "step": 13930 }, { "epoch": 2.6, "learning_rate": 7.4117565794018105e-06, "loss": 2.373, "step": 13940 }, { "epoch": 2.6, "learning_rate": 7.377219037093321e-06, "loss": 2.4323, "step": 13950 }, { "epoch": 2.6, "learning_rate": 7.342681494784831e-06, "loss": 2.4116, "step": 13960 }, { "epoch": 2.61, "learning_rate": 7.308143952476341e-06, "loss": 2.4483, "step": 13970 }, { "epoch": 2.61, "learning_rate": 7.273606410167853e-06, "loss": 2.316, "step": 13980 }, { "epoch": 2.61, "learning_rate": 7.239068867859364e-06, "loss": 2.4437, "step": 13990 }, { "epoch": 2.61, "learning_rate": 7.204531325550874e-06, "loss": 2.2788, "step": 14000 }, { "epoch": 2.61, "learning_rate": 7.169993783242384e-06, "loss": 2.3947, "step": 14010 }, { "epoch": 2.61, "learning_rate": 7.1354562409338955e-06, "loss": 2.3904, "step": 14020 }, { "epoch": 2.62, "learning_rate": 7.100918698625406e-06, "loss": 2.3955, "step": 14030 }, { "epoch": 2.62, "learning_rate": 7.066381156316918e-06, "loss": 2.3703, "step": 14040 }, { "epoch": 2.62, "learning_rate": 7.031843614008428e-06, "loss": 2.3459, "step": 14050 }, { "epoch": 2.62, "learning_rate": 6.9973060716999385e-06, "loss": 2.3317, "step": 14060 }, { "epoch": 2.62, "learning_rate": 6.962768529391449e-06, "loss": 2.4926, "step": 14070 }, { "epoch": 2.63, "learning_rate": 6.928230987082959e-06, "loss": 2.3021, "step": 14080 }, { "epoch": 2.63, "learning_rate": 6.8936934447744694e-06, "loss": 2.4277, "step": 14090 }, { "epoch": 2.63, "learning_rate": 6.85915590246598e-06, "loss": 2.4472, "step": 14100 }, { "epoch": 2.63, "learning_rate": 6.824618360157492e-06, "loss": 2.3815, "step": 14110 }, { "epoch": 2.63, "learning_rate": 6.790080817849002e-06, "loss": 2.4714, "step": 14120 }, { "epoch": 2.63, "learning_rate": 6.755543275540513e-06, "loss": 2.3671, "step": 14130 }, { "epoch": 2.64, "learning_rate": 6.7210057332320235e-06, "loss": 2.3541, "step": 14140 }, { "epoch": 2.64, "learning_rate": 6.686468190923534e-06, "loss": 2.3399, "step": 14150 }, { "epoch": 2.64, "learning_rate": 6.651930648615044e-06, "loss": 2.4081, "step": 14160 }, { "epoch": 2.64, "learning_rate": 6.617393106306556e-06, "loss": 2.4018, "step": 14170 }, { "epoch": 2.64, "learning_rate": 6.5828555639980665e-06, "loss": 2.4288, "step": 14180 }, { "epoch": 2.65, "learning_rate": 6.548318021689577e-06, "loss": 2.3876, "step": 14190 }, { "epoch": 2.65, "learning_rate": 6.513780479381087e-06, "loss": 2.3145, "step": 14200 }, { "epoch": 2.65, "learning_rate": 6.479242937072598e-06, "loss": 2.4065, "step": 14210 }, { "epoch": 2.65, "learning_rate": 6.444705394764109e-06, "loss": 2.4055, "step": 14220 }, { "epoch": 2.65, "learning_rate": 6.410167852455621e-06, "loss": 2.3118, "step": 14230 }, { "epoch": 2.66, "learning_rate": 6.375630310147131e-06, "loss": 2.36, "step": 14240 }, { "epoch": 2.66, "learning_rate": 6.341092767838641e-06, "loss": 2.3578, "step": 14250 }, { "epoch": 2.66, "learning_rate": 6.3065552255301516e-06, "loss": 2.3756, "step": 14260 }, { "epoch": 2.66, "learning_rate": 6.272017683221662e-06, "loss": 2.4196, "step": 14270 }, { "epoch": 2.66, "learning_rate": 6.237480140913173e-06, "loss": 2.4409, "step": 14280 }, { "epoch": 2.66, "learning_rate": 6.202942598604683e-06, "loss": 2.3316, "step": 14290 }, { "epoch": 2.67, "learning_rate": 6.1684050562961945e-06, "loss": 2.306, "step": 14300 }, { "epoch": 2.67, "learning_rate": 6.133867513987706e-06, "loss": 2.3856, "step": 14310 }, { "epoch": 2.67, "learning_rate": 6.099329971679216e-06, "loss": 2.3944, "step": 14320 }, { "epoch": 2.67, "learning_rate": 6.064792429370726e-06, "loss": 2.4568, "step": 14330 }, { "epoch": 2.67, "learning_rate": 6.030254887062237e-06, "loss": 2.3327, "step": 14340 }, { "epoch": 2.68, "learning_rate": 5.995717344753748e-06, "loss": 2.3498, "step": 14350 }, { "epoch": 2.68, "learning_rate": 5.961179802445258e-06, "loss": 2.4181, "step": 14360 }, { "epoch": 2.68, "learning_rate": 5.926642260136768e-06, "loss": 2.398, "step": 14370 }, { "epoch": 2.68, "learning_rate": 5.89210471782828e-06, "loss": 2.4317, "step": 14380 }, { "epoch": 2.68, "learning_rate": 5.85756717551979e-06, "loss": 2.4137, "step": 14390 }, { "epoch": 2.69, "learning_rate": 5.823029633211301e-06, "loss": 2.4382, "step": 14400 }, { "epoch": 2.69, "learning_rate": 5.788492090902812e-06, "loss": 2.3696, "step": 14410 }, { "epoch": 2.69, "learning_rate": 5.7539545485943225e-06, "loss": 2.4141, "step": 14420 }, { "epoch": 2.69, "learning_rate": 5.719417006285833e-06, "loss": 2.3535, "step": 14430 }, { "epoch": 2.69, "learning_rate": 5.684879463977344e-06, "loss": 2.408, "step": 14440 }, { "epoch": 2.69, "learning_rate": 5.650341921668854e-06, "loss": 2.354, "step": 14450 }, { "epoch": 2.7, "learning_rate": 5.615804379360365e-06, "loss": 2.4954, "step": 14460 }, { "epoch": 2.7, "learning_rate": 5.581266837051876e-06, "loss": 2.4316, "step": 14470 }, { "epoch": 2.7, "learning_rate": 5.546729294743386e-06, "loss": 2.3022, "step": 14480 }, { "epoch": 2.7, "learning_rate": 5.512191752434897e-06, "loss": 2.2843, "step": 14490 }, { "epoch": 2.7, "learning_rate": 5.477654210126408e-06, "loss": 2.4304, "step": 14500 }, { "epoch": 2.71, "learning_rate": 5.443116667817919e-06, "loss": 2.2816, "step": 14510 }, { "epoch": 2.71, "learning_rate": 5.408579125509429e-06, "loss": 2.3768, "step": 14520 }, { "epoch": 2.71, "learning_rate": 5.374041583200939e-06, "loss": 2.4476, "step": 14530 }, { "epoch": 2.71, "learning_rate": 5.3395040408924506e-06, "loss": 2.439, "step": 14540 }, { "epoch": 2.71, "learning_rate": 5.304966498583961e-06, "loss": 2.3349, "step": 14550 }, { "epoch": 2.72, "learning_rate": 5.270428956275471e-06, "loss": 2.542, "step": 14560 }, { "epoch": 2.72, "learning_rate": 5.235891413966982e-06, "loss": 2.3421, "step": 14570 }, { "epoch": 2.72, "learning_rate": 5.201353871658493e-06, "loss": 2.3747, "step": 14580 }, { "epoch": 2.72, "learning_rate": 5.166816329350004e-06, "loss": 2.3729, "step": 14590 }, { "epoch": 2.72, "learning_rate": 5.132278787041515e-06, "loss": 2.3492, "step": 14600 }, { "epoch": 2.72, "learning_rate": 5.097741244733025e-06, "loss": 2.2954, "step": 14610 }, { "epoch": 2.73, "learning_rate": 5.063203702424536e-06, "loss": 2.3548, "step": 14620 }, { "epoch": 2.73, "learning_rate": 5.028666160116046e-06, "loss": 2.2816, "step": 14630 }, { "epoch": 2.73, "learning_rate": 4.994128617807557e-06, "loss": 2.4028, "step": 14640 }, { "epoch": 2.73, "learning_rate": 4.959591075499067e-06, "loss": 2.3788, "step": 14650 }, { "epoch": 2.73, "learning_rate": 4.925053533190578e-06, "loss": 2.4394, "step": 14660 }, { "epoch": 2.74, "learning_rate": 4.890515990882089e-06, "loss": 2.2951, "step": 14670 }, { "epoch": 2.74, "learning_rate": 4.8559784485736e-06, "loss": 2.2777, "step": 14680 }, { "epoch": 2.74, "learning_rate": 4.82144090626511e-06, "loss": 2.3185, "step": 14690 }, { "epoch": 2.74, "learning_rate": 4.7869033639566215e-06, "loss": 2.4007, "step": 14700 }, { "epoch": 2.74, "learning_rate": 4.752365821648132e-06, "loss": 2.3751, "step": 14710 }, { "epoch": 2.74, "learning_rate": 4.717828279339642e-06, "loss": 2.3223, "step": 14720 }, { "epoch": 2.75, "learning_rate": 4.683290737031153e-06, "loss": 2.3298, "step": 14730 }, { "epoch": 2.75, "learning_rate": 4.648753194722664e-06, "loss": 2.3654, "step": 14740 }, { "epoch": 2.75, "learning_rate": 4.614215652414174e-06, "loss": 2.4975, "step": 14750 }, { "epoch": 2.75, "learning_rate": 4.579678110105685e-06, "loss": 2.4081, "step": 14760 }, { "epoch": 2.75, "learning_rate": 4.545140567797196e-06, "loss": 2.3517, "step": 14770 }, { "epoch": 2.76, "learning_rate": 4.510603025488707e-06, "loss": 2.4114, "step": 14780 }, { "epoch": 2.76, "learning_rate": 4.476065483180217e-06, "loss": 2.2955, "step": 14790 }, { "epoch": 2.76, "learning_rate": 4.441527940871728e-06, "loss": 2.2924, "step": 14800 }, { "epoch": 2.76, "learning_rate": 4.406990398563238e-06, "loss": 2.3289, "step": 14810 }, { "epoch": 2.76, "learning_rate": 4.372452856254749e-06, "loss": 2.4274, "step": 14820 }, { "epoch": 2.77, "learning_rate": 4.33791531394626e-06, "loss": 2.2682, "step": 14830 }, { "epoch": 2.77, "learning_rate": 4.30337777163777e-06, "loss": 2.3307, "step": 14840 }, { "epoch": 2.77, "learning_rate": 4.2688402293292805e-06, "loss": 2.3643, "step": 14850 }, { "epoch": 2.77, "learning_rate": 4.234302687020792e-06, "loss": 2.4233, "step": 14860 }, { "epoch": 2.77, "learning_rate": 4.199765144712303e-06, "loss": 2.4175, "step": 14870 }, { "epoch": 2.77, "learning_rate": 4.165227602403813e-06, "loss": 2.3321, "step": 14880 }, { "epoch": 2.78, "learning_rate": 4.130690060095324e-06, "loss": 2.4373, "step": 14890 }, { "epoch": 2.78, "learning_rate": 4.096152517786835e-06, "loss": 2.3562, "step": 14900 }, { "epoch": 2.78, "learning_rate": 4.061614975478345e-06, "loss": 2.3348, "step": 14910 }, { "epoch": 2.78, "learning_rate": 4.027077433169856e-06, "loss": 2.3721, "step": 14920 }, { "epoch": 2.78, "learning_rate": 3.992539890861366e-06, "loss": 2.3379, "step": 14930 }, { "epoch": 2.79, "learning_rate": 3.958002348552877e-06, "loss": 2.3698, "step": 14940 }, { "epoch": 2.79, "learning_rate": 3.923464806244388e-06, "loss": 2.4562, "step": 14950 }, { "epoch": 2.79, "learning_rate": 3.888927263935899e-06, "loss": 2.4839, "step": 14960 }, { "epoch": 2.79, "learning_rate": 3.854389721627409e-06, "loss": 2.3623, "step": 14970 }, { "epoch": 2.79, "learning_rate": 3.81985217931892e-06, "loss": 2.3421, "step": 14980 }, { "epoch": 2.8, "learning_rate": 3.785314637010431e-06, "loss": 2.3138, "step": 14990 }, { "epoch": 2.8, "learning_rate": 3.750777094701941e-06, "loss": 2.3843, "step": 15000 }, { "epoch": 2.8, "learning_rate": 3.7162395523934515e-06, "loss": 2.3591, "step": 15010 }, { "epoch": 2.8, "learning_rate": 3.6817020100849626e-06, "loss": 2.3636, "step": 15020 }, { "epoch": 2.8, "learning_rate": 3.6471644677764734e-06, "loss": 2.4267, "step": 15030 }, { "epoch": 2.8, "learning_rate": 3.6126269254679837e-06, "loss": 2.373, "step": 15040 }, { "epoch": 2.81, "learning_rate": 3.578089383159495e-06, "loss": 2.5028, "step": 15050 }, { "epoch": 2.81, "learning_rate": 3.543551840851005e-06, "loss": 2.3799, "step": 15060 }, { "epoch": 2.81, "learning_rate": 3.509014298542516e-06, "loss": 2.4805, "step": 15070 }, { "epoch": 2.81, "learning_rate": 3.474476756234026e-06, "loss": 2.2899, "step": 15080 }, { "epoch": 2.81, "learning_rate": 3.4399392139255374e-06, "loss": 2.3221, "step": 15090 }, { "epoch": 2.82, "learning_rate": 3.4054016716170477e-06, "loss": 2.3465, "step": 15100 }, { "epoch": 2.82, "learning_rate": 3.3708641293085584e-06, "loss": 2.3942, "step": 15110 }, { "epoch": 2.82, "learning_rate": 3.3363265870000696e-06, "loss": 2.4674, "step": 15120 }, { "epoch": 2.82, "learning_rate": 3.30178904469158e-06, "loss": 2.4301, "step": 15130 }, { "epoch": 2.82, "learning_rate": 3.2672515023830902e-06, "loss": 2.4088, "step": 15140 }, { "epoch": 2.83, "learning_rate": 3.2327139600746014e-06, "loss": 2.3602, "step": 15150 }, { "epoch": 2.83, "learning_rate": 3.198176417766112e-06, "loss": 2.3013, "step": 15160 }, { "epoch": 2.83, "learning_rate": 3.1636388754576224e-06, "loss": 2.3958, "step": 15170 }, { "epoch": 2.83, "learning_rate": 3.1291013331491336e-06, "loss": 2.3123, "step": 15180 }, { "epoch": 2.83, "learning_rate": 3.094563790840644e-06, "loss": 2.3162, "step": 15190 }, { "epoch": 2.83, "learning_rate": 3.0600262485321547e-06, "loss": 2.3614, "step": 15200 }, { "epoch": 2.84, "learning_rate": 3.0254887062236654e-06, "loss": 2.3247, "step": 15210 }, { "epoch": 2.84, "learning_rate": 2.990951163915176e-06, "loss": 2.3145, "step": 15220 }, { "epoch": 2.84, "learning_rate": 2.9564136216066864e-06, "loss": 2.3681, "step": 15230 }, { "epoch": 2.84, "learning_rate": 2.921876079298197e-06, "loss": 2.2981, "step": 15240 }, { "epoch": 2.84, "learning_rate": 2.887338536989708e-06, "loss": 2.346, "step": 15250 }, { "epoch": 2.85, "learning_rate": 2.8528009946812187e-06, "loss": 2.4164, "step": 15260 }, { "epoch": 2.85, "learning_rate": 2.8182634523727294e-06, "loss": 2.4174, "step": 15270 }, { "epoch": 2.85, "learning_rate": 2.7837259100642397e-06, "loss": 2.3599, "step": 15280 }, { "epoch": 2.85, "learning_rate": 2.7491883677557505e-06, "loss": 2.278, "step": 15290 }, { "epoch": 2.85, "learning_rate": 2.7146508254472616e-06, "loss": 2.3583, "step": 15300 }, { "epoch": 2.85, "learning_rate": 2.680113283138772e-06, "loss": 2.4094, "step": 15310 }, { "epoch": 2.86, "learning_rate": 2.6455757408302827e-06, "loss": 2.4016, "step": 15320 }, { "epoch": 2.86, "learning_rate": 2.6110381985217934e-06, "loss": 2.3733, "step": 15330 }, { "epoch": 2.86, "learning_rate": 2.5765006562133037e-06, "loss": 2.3657, "step": 15340 }, { "epoch": 2.86, "learning_rate": 2.541963113904815e-06, "loss": 2.3585, "step": 15350 }, { "epoch": 2.86, "learning_rate": 2.507425571596325e-06, "loss": 2.3034, "step": 15360 }, { "epoch": 2.87, "learning_rate": 2.472888029287836e-06, "loss": 2.399, "step": 15370 }, { "epoch": 2.87, "learning_rate": 2.4383504869793467e-06, "loss": 2.3786, "step": 15380 }, { "epoch": 2.87, "learning_rate": 2.4038129446708574e-06, "loss": 2.3629, "step": 15390 }, { "epoch": 2.87, "learning_rate": 2.369275402362368e-06, "loss": 2.3671, "step": 15400 }, { "epoch": 2.87, "learning_rate": 2.334737860053879e-06, "loss": 2.2268, "step": 15410 }, { "epoch": 2.88, "learning_rate": 2.300200317745389e-06, "loss": 2.3121, "step": 15420 }, { "epoch": 2.88, "learning_rate": 2.2656627754369e-06, "loss": 2.3288, "step": 15430 }, { "epoch": 2.88, "learning_rate": 2.2311252331284107e-06, "loss": 2.3856, "step": 15440 }, { "epoch": 2.88, "learning_rate": 2.1965876908199214e-06, "loss": 2.3113, "step": 15450 }, { "epoch": 2.88, "learning_rate": 2.162050148511432e-06, "loss": 2.2406, "step": 15460 }, { "epoch": 2.88, "learning_rate": 2.1275126062029425e-06, "loss": 2.353, "step": 15470 }, { "epoch": 2.89, "learning_rate": 2.0929750638944532e-06, "loss": 2.3918, "step": 15480 }, { "epoch": 2.89, "learning_rate": 2.058437521585964e-06, "loss": 2.4184, "step": 15490 }, { "epoch": 2.89, "learning_rate": 2.0238999792774747e-06, "loss": 2.3828, "step": 15500 }, { "epoch": 2.89, "learning_rate": 1.9893624369689854e-06, "loss": 2.4124, "step": 15510 }, { "epoch": 2.89, "learning_rate": 1.9548248946604957e-06, "loss": 2.3288, "step": 15520 }, { "epoch": 2.9, "learning_rate": 1.920287352352007e-06, "loss": 2.2583, "step": 15530 }, { "epoch": 2.9, "learning_rate": 1.8857498100435174e-06, "loss": 2.4088, "step": 15540 }, { "epoch": 2.9, "learning_rate": 1.851212267735028e-06, "loss": 2.2754, "step": 15550 }, { "epoch": 2.9, "learning_rate": 1.8166747254265387e-06, "loss": 2.3621, "step": 15560 }, { "epoch": 2.9, "learning_rate": 1.7821371831180492e-06, "loss": 2.3717, "step": 15570 }, { "epoch": 2.91, "learning_rate": 1.74759964080956e-06, "loss": 2.4536, "step": 15580 }, { "epoch": 2.91, "learning_rate": 1.713062098501071e-06, "loss": 2.3785, "step": 15590 }, { "epoch": 2.91, "learning_rate": 1.6785245561925812e-06, "loss": 2.2907, "step": 15600 }, { "epoch": 2.91, "learning_rate": 1.6439870138840922e-06, "loss": 2.3086, "step": 15610 }, { "epoch": 2.91, "learning_rate": 1.609449471575603e-06, "loss": 2.358, "step": 15620 }, { "epoch": 2.91, "learning_rate": 1.5749119292671134e-06, "loss": 2.3363, "step": 15630 }, { "epoch": 2.92, "learning_rate": 1.5403743869586242e-06, "loss": 2.3688, "step": 15640 }, { "epoch": 2.92, "learning_rate": 1.5058368446501347e-06, "loss": 2.3412, "step": 15650 }, { "epoch": 2.92, "learning_rate": 1.4712993023416455e-06, "loss": 2.3828, "step": 15660 }, { "epoch": 2.92, "learning_rate": 1.436761760033156e-06, "loss": 2.2863, "step": 15670 }, { "epoch": 2.92, "learning_rate": 1.402224217724667e-06, "loss": 2.5111, "step": 15680 }, { "epoch": 2.93, "learning_rate": 1.3676866754161775e-06, "loss": 2.4224, "step": 15690 }, { "epoch": 2.93, "learning_rate": 1.3331491331076882e-06, "loss": 2.4222, "step": 15700 }, { "epoch": 2.93, "learning_rate": 1.2986115907991987e-06, "loss": 2.3345, "step": 15710 }, { "epoch": 2.93, "learning_rate": 1.2640740484907095e-06, "loss": 2.2492, "step": 15720 }, { "epoch": 2.93, "learning_rate": 1.2295365061822202e-06, "loss": 2.4186, "step": 15730 }, { "epoch": 2.94, "learning_rate": 1.1949989638737307e-06, "loss": 2.328, "step": 15740 }, { "epoch": 2.94, "learning_rate": 1.1604614215652415e-06, "loss": 2.3137, "step": 15750 }, { "epoch": 2.94, "learning_rate": 1.1259238792567522e-06, "loss": 2.3326, "step": 15760 }, { "epoch": 2.94, "learning_rate": 1.091386336948263e-06, "loss": 2.3862, "step": 15770 }, { "epoch": 2.94, "learning_rate": 1.0568487946397735e-06, "loss": 2.3108, "step": 15780 }, { "epoch": 2.94, "learning_rate": 1.022311252331284e-06, "loss": 2.26, "step": 15790 }, { "epoch": 2.95, "learning_rate": 9.87773710022795e-07, "loss": 2.4293, "step": 15800 }, { "epoch": 2.95, "learning_rate": 9.532361677143055e-07, "loss": 2.3243, "step": 15810 }, { "epoch": 2.95, "learning_rate": 9.186986254058161e-07, "loss": 2.3935, "step": 15820 }, { "epoch": 2.95, "learning_rate": 8.841610830973267e-07, "loss": 2.3129, "step": 15830 }, { "epoch": 2.95, "learning_rate": 8.496235407888376e-07, "loss": 2.3353, "step": 15840 }, { "epoch": 2.96, "learning_rate": 8.150859984803482e-07, "loss": 2.2725, "step": 15850 }, { "epoch": 2.96, "learning_rate": 7.805484561718589e-07, "loss": 2.378, "step": 15860 }, { "epoch": 2.96, "learning_rate": 7.460109138633696e-07, "loss": 2.3231, "step": 15870 }, { "epoch": 2.96, "learning_rate": 7.114733715548802e-07, "loss": 2.3203, "step": 15880 }, { "epoch": 2.96, "learning_rate": 6.769358292463909e-07, "loss": 2.3593, "step": 15890 }, { "epoch": 2.96, "learning_rate": 6.423982869379015e-07, "loss": 2.3439, "step": 15900 }, { "epoch": 2.97, "learning_rate": 6.078607446294122e-07, "loss": 2.3775, "step": 15910 }, { "epoch": 2.97, "learning_rate": 5.733232023209229e-07, "loss": 2.3511, "step": 15920 }, { "epoch": 2.97, "learning_rate": 5.387856600124336e-07, "loss": 2.3449, "step": 15930 }, { "epoch": 2.97, "learning_rate": 5.042481177039442e-07, "loss": 2.3427, "step": 15940 }, { "epoch": 2.97, "learning_rate": 4.697105753954549e-07, "loss": 2.2693, "step": 15950 }, { "epoch": 2.98, "learning_rate": 4.3517303308696555e-07, "loss": 2.3398, "step": 15960 }, { "epoch": 2.98, "learning_rate": 4.006354907784763e-07, "loss": 2.2743, "step": 15970 }, { "epoch": 2.98, "learning_rate": 3.6609794846998687e-07, "loss": 2.2086, "step": 15980 }, { "epoch": 2.98, "learning_rate": 3.3156040616149755e-07, "loss": 2.3393, "step": 15990 }, { "epoch": 2.98, "learning_rate": 2.9702286385300824e-07, "loss": 2.3861, "step": 16000 }, { "epoch": 2.99, "learning_rate": 2.624853215445189e-07, "loss": 2.2871, "step": 16010 }, { "epoch": 2.99, "learning_rate": 2.2794777923602959e-07, "loss": 2.3119, "step": 16020 }, { "epoch": 2.99, "learning_rate": 1.9341023692754024e-07, "loss": 2.385, "step": 16030 }, { "epoch": 2.99, "learning_rate": 1.5887269461905093e-07, "loss": 2.2872, "step": 16040 }, { "epoch": 2.99, "learning_rate": 1.243351523105616e-07, "loss": 2.3681, "step": 16050 }, { "epoch": 2.99, "learning_rate": 8.979761000207225e-08, "loss": 2.3491, "step": 16060 }, { "epoch": 3.0, "learning_rate": 5.526006769358292e-08, "loss": 2.3968, "step": 16070 }, { "epoch": 3.0, "learning_rate": 2.0722525385093597e-08, "loss": 2.4475, "step": 16080 }, { "epoch": 3.0, "eval_accuracy": 0.5851995594482614, "eval_loss": 1.9156352281570435, "eval_runtime": 8294.1564, "eval_samples_per_second": 9.195, "eval_steps_per_second": 0.287, "step": 16086 }, { "epoch": 3.0, "step": 16086, "total_flos": 1.6352487334240263e+20, "train_loss": 3.7719367721047283, "train_runtime": 628821.6026, "train_samples_per_second": 3.275, "train_steps_per_second": 0.026 } ], "logging_steps": 10, "max_steps": 16086, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.6352487334240263e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }