Caracam / trainer_state.json
Takekazuchi's picture
End of training
d904e61 verified
{
"best_metric": 0.5851995594482614,
"best_model_checkpoint": "vit-base-patch16-224-vit-base-patch16\\checkpoint-16086",
"epoch": 2.9995804391403667,
"eval_steps": 500,
"global_step": 16086,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.107520198881293e-07,
"loss": 7.9302,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 6.215040397762586e-07,
"loss": 7.9301,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 9.32256059664388e-07,
"loss": 7.93,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 1.2430080795525172e-06,
"loss": 7.9296,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 1.5537600994406465e-06,
"loss": 7.9293,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.864512119328776e-06,
"loss": 7.9286,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 2.175264139216905e-06,
"loss": 7.9281,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 2.4860161591050345e-06,
"loss": 7.9278,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 2.7967681789931635e-06,
"loss": 7.926,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 3.107520198881293e-06,
"loss": 7.9253,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 3.418272218769422e-06,
"loss": 7.9242,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 3.729024238657552e-06,
"loss": 7.9225,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 4.0397762585456806e-06,
"loss": 7.9207,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 4.35052827843381e-06,
"loss": 7.9191,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 4.6612802983219395e-06,
"loss": 7.917,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 4.972032318210069e-06,
"loss": 7.9149,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 5.282784338098198e-06,
"loss": 7.9116,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 5.593536357986327e-06,
"loss": 7.9083,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 5.9042883778744565e-06,
"loss": 7.9065,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 6.215040397762586e-06,
"loss": 7.9019,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 6.5257924176507155e-06,
"loss": 7.8957,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 6.836544437538844e-06,
"loss": 7.8927,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 7.1472964574269735e-06,
"loss": 7.8868,
"step": 230
},
{
"epoch": 0.04,
"learning_rate": 7.458048477315104e-06,
"loss": 7.8831,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 7.768800497203232e-06,
"loss": 7.8775,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 8.079552517091361e-06,
"loss": 7.8719,
"step": 260
},
{
"epoch": 0.05,
"learning_rate": 8.39030453697949e-06,
"loss": 7.8652,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 8.70105655686762e-06,
"loss": 7.8573,
"step": 280
},
{
"epoch": 0.05,
"learning_rate": 9.01180857675575e-06,
"loss": 7.8496,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 9.322560596643879e-06,
"loss": 7.8454,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 9.633312616532007e-06,
"loss": 7.8381,
"step": 310
},
{
"epoch": 0.06,
"learning_rate": 9.944064636420138e-06,
"loss": 7.8295,
"step": 320
},
{
"epoch": 0.06,
"learning_rate": 1.0254816656308266e-05,
"loss": 7.8215,
"step": 330
},
{
"epoch": 0.06,
"learning_rate": 1.0565568676196395e-05,
"loss": 7.811,
"step": 340
},
{
"epoch": 0.07,
"learning_rate": 1.0876320696084526e-05,
"loss": 7.8033,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 1.1187072715972654e-05,
"loss": 7.7903,
"step": 360
},
{
"epoch": 0.07,
"learning_rate": 1.1497824735860784e-05,
"loss": 7.7835,
"step": 370
},
{
"epoch": 0.07,
"learning_rate": 1.1808576755748913e-05,
"loss": 7.7719,
"step": 380
},
{
"epoch": 0.07,
"learning_rate": 1.2119328775637043e-05,
"loss": 7.7589,
"step": 390
},
{
"epoch": 0.07,
"learning_rate": 1.2430080795525172e-05,
"loss": 7.7576,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 1.27408328154133e-05,
"loss": 7.7456,
"step": 410
},
{
"epoch": 0.08,
"learning_rate": 1.3051584835301431e-05,
"loss": 7.7309,
"step": 420
},
{
"epoch": 0.08,
"learning_rate": 1.3362336855189559e-05,
"loss": 7.724,
"step": 430
},
{
"epoch": 0.08,
"learning_rate": 1.3673088875077688e-05,
"loss": 7.7139,
"step": 440
},
{
"epoch": 0.08,
"learning_rate": 1.398384089496582e-05,
"loss": 7.7025,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 1.4294592914853947e-05,
"loss": 7.6857,
"step": 460
},
{
"epoch": 0.09,
"learning_rate": 1.4605344934742077e-05,
"loss": 7.6762,
"step": 470
},
{
"epoch": 0.09,
"learning_rate": 1.4916096954630208e-05,
"loss": 7.6618,
"step": 480
},
{
"epoch": 0.09,
"learning_rate": 1.5226848974518334e-05,
"loss": 7.6498,
"step": 490
},
{
"epoch": 0.09,
"learning_rate": 1.5537600994406463e-05,
"loss": 7.6415,
"step": 500
},
{
"epoch": 0.1,
"learning_rate": 1.5848353014294593e-05,
"loss": 7.6319,
"step": 510
},
{
"epoch": 0.1,
"learning_rate": 1.6159105034182722e-05,
"loss": 7.6112,
"step": 520
},
{
"epoch": 0.1,
"learning_rate": 1.646985705407085e-05,
"loss": 7.6032,
"step": 530
},
{
"epoch": 0.1,
"learning_rate": 1.678060907395898e-05,
"loss": 7.5935,
"step": 540
},
{
"epoch": 0.1,
"learning_rate": 1.709136109384711e-05,
"loss": 7.5846,
"step": 550
},
{
"epoch": 0.1,
"learning_rate": 1.740211311373524e-05,
"loss": 7.5674,
"step": 560
},
{
"epoch": 0.11,
"learning_rate": 1.771286513362337e-05,
"loss": 7.5512,
"step": 570
},
{
"epoch": 0.11,
"learning_rate": 1.80236171535115e-05,
"loss": 7.541,
"step": 580
},
{
"epoch": 0.11,
"learning_rate": 1.833436917339963e-05,
"loss": 7.5262,
"step": 590
},
{
"epoch": 0.11,
"learning_rate": 1.8645121193287758e-05,
"loss": 7.5138,
"step": 600
},
{
"epoch": 0.11,
"learning_rate": 1.8955873213175887e-05,
"loss": 7.4991,
"step": 610
},
{
"epoch": 0.12,
"learning_rate": 1.9266625233064014e-05,
"loss": 7.4734,
"step": 620
},
{
"epoch": 0.12,
"learning_rate": 1.9577377252952146e-05,
"loss": 7.4607,
"step": 630
},
{
"epoch": 0.12,
"learning_rate": 1.9888129272840276e-05,
"loss": 7.4621,
"step": 640
},
{
"epoch": 0.12,
"learning_rate": 2.0198881292728402e-05,
"loss": 7.4499,
"step": 650
},
{
"epoch": 0.12,
"learning_rate": 2.050963331261653e-05,
"loss": 7.4228,
"step": 660
},
{
"epoch": 0.12,
"learning_rate": 2.0820385332504664e-05,
"loss": 7.4174,
"step": 670
},
{
"epoch": 0.13,
"learning_rate": 2.113113735239279e-05,
"loss": 7.3955,
"step": 680
},
{
"epoch": 0.13,
"learning_rate": 2.144188937228092e-05,
"loss": 7.387,
"step": 690
},
{
"epoch": 0.13,
"learning_rate": 2.1752641392169053e-05,
"loss": 7.3643,
"step": 700
},
{
"epoch": 0.13,
"learning_rate": 2.206339341205718e-05,
"loss": 7.3718,
"step": 710
},
{
"epoch": 0.13,
"learning_rate": 2.2374145431945308e-05,
"loss": 7.3485,
"step": 720
},
{
"epoch": 0.14,
"learning_rate": 2.2684897451833438e-05,
"loss": 7.3414,
"step": 730
},
{
"epoch": 0.14,
"learning_rate": 2.2995649471721567e-05,
"loss": 7.3117,
"step": 740
},
{
"epoch": 0.14,
"learning_rate": 2.3306401491609697e-05,
"loss": 7.2965,
"step": 750
},
{
"epoch": 0.14,
"learning_rate": 2.3617153511497826e-05,
"loss": 7.3053,
"step": 760
},
{
"epoch": 0.14,
"learning_rate": 2.3927905531385956e-05,
"loss": 7.2712,
"step": 770
},
{
"epoch": 0.15,
"learning_rate": 2.4238657551274085e-05,
"loss": 7.2515,
"step": 780
},
{
"epoch": 0.15,
"learning_rate": 2.4549409571162214e-05,
"loss": 7.2411,
"step": 790
},
{
"epoch": 0.15,
"learning_rate": 2.4860161591050344e-05,
"loss": 7.2357,
"step": 800
},
{
"epoch": 0.15,
"learning_rate": 2.5170913610938473e-05,
"loss": 7.2151,
"step": 810
},
{
"epoch": 0.15,
"learning_rate": 2.54816656308266e-05,
"loss": 7.1943,
"step": 820
},
{
"epoch": 0.15,
"learning_rate": 2.579241765071473e-05,
"loss": 7.1974,
"step": 830
},
{
"epoch": 0.16,
"learning_rate": 2.6103169670602862e-05,
"loss": 7.2105,
"step": 840
},
{
"epoch": 0.16,
"learning_rate": 2.6413921690490988e-05,
"loss": 7.156,
"step": 850
},
{
"epoch": 0.16,
"learning_rate": 2.6724673710379117e-05,
"loss": 7.1484,
"step": 860
},
{
"epoch": 0.16,
"learning_rate": 2.703542573026725e-05,
"loss": 7.1293,
"step": 870
},
{
"epoch": 0.16,
"learning_rate": 2.7346177750155376e-05,
"loss": 7.1283,
"step": 880
},
{
"epoch": 0.17,
"learning_rate": 2.7656929770043506e-05,
"loss": 7.1159,
"step": 890
},
{
"epoch": 0.17,
"learning_rate": 2.796768178993164e-05,
"loss": 7.0768,
"step": 900
},
{
"epoch": 0.17,
"learning_rate": 2.8278433809819765e-05,
"loss": 7.0734,
"step": 910
},
{
"epoch": 0.17,
"learning_rate": 2.8589185829707894e-05,
"loss": 7.068,
"step": 920
},
{
"epoch": 0.17,
"learning_rate": 2.8899937849596027e-05,
"loss": 7.0497,
"step": 930
},
{
"epoch": 0.18,
"learning_rate": 2.9210689869484153e-05,
"loss": 7.0373,
"step": 940
},
{
"epoch": 0.18,
"learning_rate": 2.9521441889372283e-05,
"loss": 7.0332,
"step": 950
},
{
"epoch": 0.18,
"learning_rate": 2.9832193909260415e-05,
"loss": 7.0304,
"step": 960
},
{
"epoch": 0.18,
"learning_rate": 3.014294592914854e-05,
"loss": 7.0186,
"step": 970
},
{
"epoch": 0.18,
"learning_rate": 3.0453697949036668e-05,
"loss": 6.9994,
"step": 980
},
{
"epoch": 0.18,
"learning_rate": 3.0764449968924804e-05,
"loss": 6.988,
"step": 990
},
{
"epoch": 0.19,
"learning_rate": 3.1075201988812927e-05,
"loss": 6.9575,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 3.1385954008701056e-05,
"loss": 6.9407,
"step": 1010
},
{
"epoch": 0.19,
"learning_rate": 3.1696706028589185e-05,
"loss": 6.9424,
"step": 1020
},
{
"epoch": 0.19,
"learning_rate": 3.2007458048477315e-05,
"loss": 6.9267,
"step": 1030
},
{
"epoch": 0.19,
"learning_rate": 3.2318210068365444e-05,
"loss": 6.9348,
"step": 1040
},
{
"epoch": 0.2,
"learning_rate": 3.2628962088253574e-05,
"loss": 6.9101,
"step": 1050
},
{
"epoch": 0.2,
"learning_rate": 3.29397141081417e-05,
"loss": 6.8636,
"step": 1060
},
{
"epoch": 0.2,
"learning_rate": 3.325046612802983e-05,
"loss": 6.8813,
"step": 1070
},
{
"epoch": 0.2,
"learning_rate": 3.356121814791796e-05,
"loss": 6.8706,
"step": 1080
},
{
"epoch": 0.2,
"learning_rate": 3.387197016780609e-05,
"loss": 6.8722,
"step": 1090
},
{
"epoch": 0.21,
"learning_rate": 3.418272218769422e-05,
"loss": 6.8492,
"step": 1100
},
{
"epoch": 0.21,
"learning_rate": 3.449347420758235e-05,
"loss": 6.8014,
"step": 1110
},
{
"epoch": 0.21,
"learning_rate": 3.480422622747048e-05,
"loss": 6.8025,
"step": 1120
},
{
"epoch": 0.21,
"learning_rate": 3.511497824735861e-05,
"loss": 6.8108,
"step": 1130
},
{
"epoch": 0.21,
"learning_rate": 3.542573026724674e-05,
"loss": 6.8155,
"step": 1140
},
{
"epoch": 0.21,
"learning_rate": 3.573648228713487e-05,
"loss": 6.7615,
"step": 1150
},
{
"epoch": 0.22,
"learning_rate": 3.6047234307023e-05,
"loss": 6.7385,
"step": 1160
},
{
"epoch": 0.22,
"learning_rate": 3.635798632691113e-05,
"loss": 6.7358,
"step": 1170
},
{
"epoch": 0.22,
"learning_rate": 3.666873834679926e-05,
"loss": 6.7388,
"step": 1180
},
{
"epoch": 0.22,
"learning_rate": 3.6979490366687386e-05,
"loss": 6.7352,
"step": 1190
},
{
"epoch": 0.22,
"learning_rate": 3.7290242386575516e-05,
"loss": 6.7093,
"step": 1200
},
{
"epoch": 0.23,
"learning_rate": 3.7600994406463645e-05,
"loss": 6.7211,
"step": 1210
},
{
"epoch": 0.23,
"learning_rate": 3.7911746426351775e-05,
"loss": 6.6963,
"step": 1220
},
{
"epoch": 0.23,
"learning_rate": 3.8222498446239904e-05,
"loss": 6.6921,
"step": 1230
},
{
"epoch": 0.23,
"learning_rate": 3.853325046612803e-05,
"loss": 6.6363,
"step": 1240
},
{
"epoch": 0.23,
"learning_rate": 3.884400248601616e-05,
"loss": 6.6501,
"step": 1250
},
{
"epoch": 0.23,
"learning_rate": 3.915475450590429e-05,
"loss": 6.6199,
"step": 1260
},
{
"epoch": 0.24,
"learning_rate": 3.9465506525792415e-05,
"loss": 6.5996,
"step": 1270
},
{
"epoch": 0.24,
"learning_rate": 3.977625854568055e-05,
"loss": 6.6274,
"step": 1280
},
{
"epoch": 0.24,
"learning_rate": 4.008701056556868e-05,
"loss": 6.5743,
"step": 1290
},
{
"epoch": 0.24,
"learning_rate": 4.0397762585456804e-05,
"loss": 6.5633,
"step": 1300
},
{
"epoch": 0.24,
"learning_rate": 4.070851460534494e-05,
"loss": 6.5602,
"step": 1310
},
{
"epoch": 0.25,
"learning_rate": 4.101926662523306e-05,
"loss": 6.5558,
"step": 1320
},
{
"epoch": 0.25,
"learning_rate": 4.133001864512119e-05,
"loss": 6.5785,
"step": 1330
},
{
"epoch": 0.25,
"learning_rate": 4.164077066500933e-05,
"loss": 6.5247,
"step": 1340
},
{
"epoch": 0.25,
"learning_rate": 4.195152268489745e-05,
"loss": 6.5409,
"step": 1350
},
{
"epoch": 0.25,
"learning_rate": 4.226227470478558e-05,
"loss": 6.4857,
"step": 1360
},
{
"epoch": 0.26,
"learning_rate": 4.257302672467372e-05,
"loss": 6.5344,
"step": 1370
},
{
"epoch": 0.26,
"learning_rate": 4.288377874456184e-05,
"loss": 6.5113,
"step": 1380
},
{
"epoch": 0.26,
"learning_rate": 4.319453076444997e-05,
"loss": 6.4928,
"step": 1390
},
{
"epoch": 0.26,
"learning_rate": 4.3505282784338105e-05,
"loss": 6.4826,
"step": 1400
},
{
"epoch": 0.26,
"learning_rate": 4.381603480422623e-05,
"loss": 6.4621,
"step": 1410
},
{
"epoch": 0.26,
"learning_rate": 4.412678682411436e-05,
"loss": 6.4211,
"step": 1420
},
{
"epoch": 0.27,
"learning_rate": 4.4437538844002494e-05,
"loss": 6.4753,
"step": 1430
},
{
"epoch": 0.27,
"learning_rate": 4.4748290863890616e-05,
"loss": 6.4343,
"step": 1440
},
{
"epoch": 0.27,
"learning_rate": 4.5059042883778746e-05,
"loss": 6.4252,
"step": 1450
},
{
"epoch": 0.27,
"learning_rate": 4.5369794903666875e-05,
"loss": 6.3907,
"step": 1460
},
{
"epoch": 0.27,
"learning_rate": 4.5680546923555005e-05,
"loss": 6.383,
"step": 1470
},
{
"epoch": 0.28,
"learning_rate": 4.5991298943443134e-05,
"loss": 6.378,
"step": 1480
},
{
"epoch": 0.28,
"learning_rate": 4.6302050963331264e-05,
"loss": 6.3726,
"step": 1490
},
{
"epoch": 0.28,
"learning_rate": 4.661280298321939e-05,
"loss": 6.3738,
"step": 1500
},
{
"epoch": 0.28,
"learning_rate": 4.692355500310752e-05,
"loss": 6.3397,
"step": 1510
},
{
"epoch": 0.28,
"learning_rate": 4.723430702299565e-05,
"loss": 6.351,
"step": 1520
},
{
"epoch": 0.29,
"learning_rate": 4.754505904288378e-05,
"loss": 6.3207,
"step": 1530
},
{
"epoch": 0.29,
"learning_rate": 4.785581106277191e-05,
"loss": 6.3161,
"step": 1540
},
{
"epoch": 0.29,
"learning_rate": 4.816656308266004e-05,
"loss": 6.3103,
"step": 1550
},
{
"epoch": 0.29,
"learning_rate": 4.847731510254817e-05,
"loss": 6.291,
"step": 1560
},
{
"epoch": 0.29,
"learning_rate": 4.87880671224363e-05,
"loss": 6.2851,
"step": 1570
},
{
"epoch": 0.29,
"learning_rate": 4.909881914232443e-05,
"loss": 6.2605,
"step": 1580
},
{
"epoch": 0.3,
"learning_rate": 4.940957116221256e-05,
"loss": 6.2434,
"step": 1590
},
{
"epoch": 0.3,
"learning_rate": 4.972032318210069e-05,
"loss": 6.2739,
"step": 1600
},
{
"epoch": 0.3,
"learning_rate": 4.999654624576915e-05,
"loss": 6.2309,
"step": 1610
},
{
"epoch": 0.3,
"learning_rate": 4.996200870346067e-05,
"loss": 6.2764,
"step": 1620
},
{
"epoch": 0.3,
"learning_rate": 4.992747116115217e-05,
"loss": 6.1911,
"step": 1630
},
{
"epoch": 0.31,
"learning_rate": 4.989293361884368e-05,
"loss": 6.193,
"step": 1640
},
{
"epoch": 0.31,
"learning_rate": 4.98583960765352e-05,
"loss": 6.1912,
"step": 1650
},
{
"epoch": 0.31,
"learning_rate": 4.982385853422671e-05,
"loss": 6.1854,
"step": 1660
},
{
"epoch": 0.31,
"learning_rate": 4.978932099191822e-05,
"loss": 6.1694,
"step": 1670
},
{
"epoch": 0.31,
"learning_rate": 4.9754783449609724e-05,
"loss": 6.1337,
"step": 1680
},
{
"epoch": 0.32,
"learning_rate": 4.972024590730124e-05,
"loss": 6.1062,
"step": 1690
},
{
"epoch": 0.32,
"learning_rate": 4.968570836499275e-05,
"loss": 6.132,
"step": 1700
},
{
"epoch": 0.32,
"learning_rate": 4.965117082268426e-05,
"loss": 6.178,
"step": 1710
},
{
"epoch": 0.32,
"learning_rate": 4.961663328037577e-05,
"loss": 6.1392,
"step": 1720
},
{
"epoch": 0.32,
"learning_rate": 4.958209573806728e-05,
"loss": 6.1331,
"step": 1730
},
{
"epoch": 0.32,
"learning_rate": 4.954755819575879e-05,
"loss": 6.0742,
"step": 1740
},
{
"epoch": 0.33,
"learning_rate": 4.95130206534503e-05,
"loss": 6.0837,
"step": 1750
},
{
"epoch": 0.33,
"learning_rate": 4.947848311114181e-05,
"loss": 6.0774,
"step": 1760
},
{
"epoch": 0.33,
"learning_rate": 4.944394556883332e-05,
"loss": 6.0711,
"step": 1770
},
{
"epoch": 0.33,
"learning_rate": 4.9409408026524834e-05,
"loss": 6.0922,
"step": 1780
},
{
"epoch": 0.33,
"learning_rate": 4.9374870484216344e-05,
"loss": 6.041,
"step": 1790
},
{
"epoch": 0.34,
"learning_rate": 4.9340332941907854e-05,
"loss": 6.0081,
"step": 1800
},
{
"epoch": 0.34,
"learning_rate": 4.930579539959937e-05,
"loss": 5.9962,
"step": 1810
},
{
"epoch": 0.34,
"learning_rate": 4.9271257857290875e-05,
"loss": 6.0346,
"step": 1820
},
{
"epoch": 0.34,
"learning_rate": 4.9236720314982385e-05,
"loss": 6.0221,
"step": 1830
},
{
"epoch": 0.34,
"learning_rate": 4.92021827726739e-05,
"loss": 6.0185,
"step": 1840
},
{
"epoch": 0.34,
"learning_rate": 4.916764523036541e-05,
"loss": 5.9659,
"step": 1850
},
{
"epoch": 0.35,
"learning_rate": 4.913310768805692e-05,
"loss": 5.9443,
"step": 1860
},
{
"epoch": 0.35,
"learning_rate": 4.9098570145748426e-05,
"loss": 5.9759,
"step": 1870
},
{
"epoch": 0.35,
"learning_rate": 4.9064032603439943e-05,
"loss": 5.973,
"step": 1880
},
{
"epoch": 0.35,
"learning_rate": 4.9029495061131454e-05,
"loss": 5.9397,
"step": 1890
},
{
"epoch": 0.35,
"learning_rate": 4.8994957518822964e-05,
"loss": 5.9718,
"step": 1900
},
{
"epoch": 0.36,
"learning_rate": 4.8960419976514474e-05,
"loss": 5.8958,
"step": 1910
},
{
"epoch": 0.36,
"learning_rate": 4.8925882434205985e-05,
"loss": 5.9235,
"step": 1920
},
{
"epoch": 0.36,
"learning_rate": 4.8891344891897495e-05,
"loss": 5.8631,
"step": 1930
},
{
"epoch": 0.36,
"learning_rate": 4.8856807349589005e-05,
"loss": 5.8879,
"step": 1940
},
{
"epoch": 0.36,
"learning_rate": 4.8822269807280516e-05,
"loss": 5.9051,
"step": 1950
},
{
"epoch": 0.37,
"learning_rate": 4.8787732264972026e-05,
"loss": 5.8506,
"step": 1960
},
{
"epoch": 0.37,
"learning_rate": 4.8753194722663536e-05,
"loss": 5.8902,
"step": 1970
},
{
"epoch": 0.37,
"learning_rate": 4.8718657180355047e-05,
"loss": 5.8543,
"step": 1980
},
{
"epoch": 0.37,
"learning_rate": 4.868411963804656e-05,
"loss": 5.8779,
"step": 1990
},
{
"epoch": 0.37,
"learning_rate": 4.8649582095738074e-05,
"loss": 5.859,
"step": 2000
},
{
"epoch": 0.37,
"learning_rate": 4.861504455342958e-05,
"loss": 5.8649,
"step": 2010
},
{
"epoch": 0.38,
"learning_rate": 4.858050701112109e-05,
"loss": 5.8307,
"step": 2020
},
{
"epoch": 0.38,
"learning_rate": 4.8545969468812605e-05,
"loss": 5.7816,
"step": 2030
},
{
"epoch": 0.38,
"learning_rate": 4.8511431926504115e-05,
"loss": 5.7834,
"step": 2040
},
{
"epoch": 0.38,
"learning_rate": 4.847689438419562e-05,
"loss": 5.8585,
"step": 2050
},
{
"epoch": 0.38,
"learning_rate": 4.844235684188713e-05,
"loss": 5.758,
"step": 2060
},
{
"epoch": 0.39,
"learning_rate": 4.8407819299578646e-05,
"loss": 5.7945,
"step": 2070
},
{
"epoch": 0.39,
"learning_rate": 4.8373281757270157e-05,
"loss": 5.7647,
"step": 2080
},
{
"epoch": 0.39,
"learning_rate": 4.833874421496167e-05,
"loss": 5.7845,
"step": 2090
},
{
"epoch": 0.39,
"learning_rate": 4.830420667265318e-05,
"loss": 5.7382,
"step": 2100
},
{
"epoch": 0.39,
"learning_rate": 4.826966913034469e-05,
"loss": 5.7743,
"step": 2110
},
{
"epoch": 0.4,
"learning_rate": 4.82351315880362e-05,
"loss": 5.7199,
"step": 2120
},
{
"epoch": 0.4,
"learning_rate": 4.820059404572771e-05,
"loss": 5.7205,
"step": 2130
},
{
"epoch": 0.4,
"learning_rate": 4.816605650341922e-05,
"loss": 5.7001,
"step": 2140
},
{
"epoch": 0.4,
"learning_rate": 4.813151896111073e-05,
"loss": 5.7475,
"step": 2150
},
{
"epoch": 0.4,
"learning_rate": 4.809698141880224e-05,
"loss": 5.735,
"step": 2160
},
{
"epoch": 0.4,
"learning_rate": 4.806244387649375e-05,
"loss": 5.7608,
"step": 2170
},
{
"epoch": 0.41,
"learning_rate": 4.8027906334185266e-05,
"loss": 5.6663,
"step": 2180
},
{
"epoch": 0.41,
"learning_rate": 4.799336879187678e-05,
"loss": 5.6878,
"step": 2190
},
{
"epoch": 0.41,
"learning_rate": 4.795883124956828e-05,
"loss": 5.6914,
"step": 2200
},
{
"epoch": 0.41,
"learning_rate": 4.792429370725979e-05,
"loss": 5.6306,
"step": 2210
},
{
"epoch": 0.41,
"learning_rate": 4.788975616495131e-05,
"loss": 5.6178,
"step": 2220
},
{
"epoch": 0.42,
"learning_rate": 4.785521862264282e-05,
"loss": 5.679,
"step": 2230
},
{
"epoch": 0.42,
"learning_rate": 4.782068108033432e-05,
"loss": 5.6543,
"step": 2240
},
{
"epoch": 0.42,
"learning_rate": 4.778614353802583e-05,
"loss": 5.6911,
"step": 2250
},
{
"epoch": 0.42,
"learning_rate": 4.775160599571735e-05,
"loss": 5.6754,
"step": 2260
},
{
"epoch": 0.42,
"learning_rate": 4.771706845340886e-05,
"loss": 5.6252,
"step": 2270
},
{
"epoch": 0.43,
"learning_rate": 4.768253091110037e-05,
"loss": 5.6094,
"step": 2280
},
{
"epoch": 0.43,
"learning_rate": 4.764799336879188e-05,
"loss": 5.599,
"step": 2290
},
{
"epoch": 0.43,
"learning_rate": 4.761345582648339e-05,
"loss": 5.6413,
"step": 2300
},
{
"epoch": 0.43,
"learning_rate": 4.75789182841749e-05,
"loss": 5.6193,
"step": 2310
},
{
"epoch": 0.43,
"learning_rate": 4.754438074186641e-05,
"loss": 5.5898,
"step": 2320
},
{
"epoch": 0.43,
"learning_rate": 4.750984319955792e-05,
"loss": 5.572,
"step": 2330
},
{
"epoch": 0.44,
"learning_rate": 4.747530565724943e-05,
"loss": 5.6013,
"step": 2340
},
{
"epoch": 0.44,
"learning_rate": 4.744076811494094e-05,
"loss": 5.5543,
"step": 2350
},
{
"epoch": 0.44,
"learning_rate": 4.740623057263245e-05,
"loss": 5.5415,
"step": 2360
},
{
"epoch": 0.44,
"learning_rate": 4.737169303032397e-05,
"loss": 5.5246,
"step": 2370
},
{
"epoch": 0.44,
"learning_rate": 4.733715548801548e-05,
"loss": 5.5657,
"step": 2380
},
{
"epoch": 0.45,
"learning_rate": 4.730261794570698e-05,
"loss": 5.5453,
"step": 2390
},
{
"epoch": 0.45,
"learning_rate": 4.7268080403398493e-05,
"loss": 5.5467,
"step": 2400
},
{
"epoch": 0.45,
"learning_rate": 4.723354286109001e-05,
"loss": 5.5455,
"step": 2410
},
{
"epoch": 0.45,
"learning_rate": 4.719900531878152e-05,
"loss": 5.5253,
"step": 2420
},
{
"epoch": 0.45,
"learning_rate": 4.7164467776473024e-05,
"loss": 5.4831,
"step": 2430
},
{
"epoch": 0.45,
"learning_rate": 4.7129930234164535e-05,
"loss": 5.4704,
"step": 2440
},
{
"epoch": 0.46,
"learning_rate": 4.709539269185605e-05,
"loss": 5.4801,
"step": 2450
},
{
"epoch": 0.46,
"learning_rate": 4.706085514954756e-05,
"loss": 5.48,
"step": 2460
},
{
"epoch": 0.46,
"learning_rate": 4.702631760723907e-05,
"loss": 5.5388,
"step": 2470
},
{
"epoch": 0.46,
"learning_rate": 4.699178006493058e-05,
"loss": 5.4883,
"step": 2480
},
{
"epoch": 0.46,
"learning_rate": 4.695724252262209e-05,
"loss": 5.4321,
"step": 2490
},
{
"epoch": 0.47,
"learning_rate": 4.69227049803136e-05,
"loss": 5.4297,
"step": 2500
},
{
"epoch": 0.47,
"learning_rate": 4.6888167438005114e-05,
"loss": 5.4174,
"step": 2510
},
{
"epoch": 0.47,
"learning_rate": 4.6853629895696624e-05,
"loss": 5.5185,
"step": 2520
},
{
"epoch": 0.47,
"learning_rate": 4.6819092353388134e-05,
"loss": 5.4269,
"step": 2530
},
{
"epoch": 0.47,
"learning_rate": 4.6784554811079645e-05,
"loss": 5.4688,
"step": 2540
},
{
"epoch": 0.48,
"learning_rate": 4.6750017268771155e-05,
"loss": 5.384,
"step": 2550
},
{
"epoch": 0.48,
"learning_rate": 4.671547972646267e-05,
"loss": 5.4662,
"step": 2560
},
{
"epoch": 0.48,
"learning_rate": 4.668094218415418e-05,
"loss": 5.4473,
"step": 2570
},
{
"epoch": 0.48,
"learning_rate": 4.6646404641845686e-05,
"loss": 5.4024,
"step": 2580
},
{
"epoch": 0.48,
"learning_rate": 4.6611867099537196e-05,
"loss": 5.3608,
"step": 2590
},
{
"epoch": 0.48,
"learning_rate": 4.657732955722871e-05,
"loss": 5.4895,
"step": 2600
},
{
"epoch": 0.49,
"learning_rate": 4.6542792014920224e-05,
"loss": 5.3868,
"step": 2610
},
{
"epoch": 0.49,
"learning_rate": 4.650825447261173e-05,
"loss": 5.3681,
"step": 2620
},
{
"epoch": 0.49,
"learning_rate": 4.647371693030324e-05,
"loss": 5.4223,
"step": 2630
},
{
"epoch": 0.49,
"learning_rate": 4.6439179387994755e-05,
"loss": 5.412,
"step": 2640
},
{
"epoch": 0.49,
"learning_rate": 4.6404641845686265e-05,
"loss": 5.3381,
"step": 2650
},
{
"epoch": 0.5,
"learning_rate": 4.6370104303377775e-05,
"loss": 5.3195,
"step": 2660
},
{
"epoch": 0.5,
"learning_rate": 4.6335566761069285e-05,
"loss": 5.3945,
"step": 2670
},
{
"epoch": 0.5,
"learning_rate": 4.6301029218760796e-05,
"loss": 5.3316,
"step": 2680
},
{
"epoch": 0.5,
"learning_rate": 4.6266491676452306e-05,
"loss": 5.3232,
"step": 2690
},
{
"epoch": 0.5,
"learning_rate": 4.6231954134143816e-05,
"loss": 5.3246,
"step": 2700
},
{
"epoch": 0.51,
"learning_rate": 4.619741659183533e-05,
"loss": 5.3445,
"step": 2710
},
{
"epoch": 0.51,
"learning_rate": 4.616287904952684e-05,
"loss": 5.2847,
"step": 2720
},
{
"epoch": 0.51,
"learning_rate": 4.612834150721835e-05,
"loss": 5.2795,
"step": 2730
},
{
"epoch": 0.51,
"learning_rate": 4.609380396490986e-05,
"loss": 5.2559,
"step": 2740
},
{
"epoch": 0.51,
"learning_rate": 4.6059266422601375e-05,
"loss": 5.3091,
"step": 2750
},
{
"epoch": 0.51,
"learning_rate": 4.6024728880292885e-05,
"loss": 5.2441,
"step": 2760
},
{
"epoch": 0.52,
"learning_rate": 4.599019133798439e-05,
"loss": 5.2534,
"step": 2770
},
{
"epoch": 0.52,
"learning_rate": 4.59556537956759e-05,
"loss": 5.2869,
"step": 2780
},
{
"epoch": 0.52,
"learning_rate": 4.5921116253367416e-05,
"loss": 5.2629,
"step": 2790
},
{
"epoch": 0.52,
"learning_rate": 4.5886578711058926e-05,
"loss": 5.2835,
"step": 2800
},
{
"epoch": 0.52,
"learning_rate": 4.585204116875043e-05,
"loss": 5.2437,
"step": 2810
},
{
"epoch": 0.53,
"learning_rate": 4.581750362644194e-05,
"loss": 5.2736,
"step": 2820
},
{
"epoch": 0.53,
"learning_rate": 4.578296608413346e-05,
"loss": 5.2331,
"step": 2830
},
{
"epoch": 0.53,
"learning_rate": 4.574842854182497e-05,
"loss": 5.2059,
"step": 2840
},
{
"epoch": 0.53,
"learning_rate": 4.571389099951648e-05,
"loss": 5.2348,
"step": 2850
},
{
"epoch": 0.53,
"learning_rate": 4.567935345720799e-05,
"loss": 5.2183,
"step": 2860
},
{
"epoch": 0.54,
"learning_rate": 4.56448159148995e-05,
"loss": 5.1723,
"step": 2870
},
{
"epoch": 0.54,
"learning_rate": 4.561027837259101e-05,
"loss": 5.206,
"step": 2880
},
{
"epoch": 0.54,
"learning_rate": 4.557574083028252e-05,
"loss": 5.276,
"step": 2890
},
{
"epoch": 0.54,
"learning_rate": 4.554120328797403e-05,
"loss": 5.1271,
"step": 2900
},
{
"epoch": 0.54,
"learning_rate": 4.550666574566554e-05,
"loss": 5.1887,
"step": 2910
},
{
"epoch": 0.54,
"learning_rate": 4.547212820335705e-05,
"loss": 5.2678,
"step": 2920
},
{
"epoch": 0.55,
"learning_rate": 4.543759066104856e-05,
"loss": 5.2341,
"step": 2930
},
{
"epoch": 0.55,
"learning_rate": 4.540305311874008e-05,
"loss": 5.2136,
"step": 2940
},
{
"epoch": 0.55,
"learning_rate": 4.536851557643158e-05,
"loss": 5.1565,
"step": 2950
},
{
"epoch": 0.55,
"learning_rate": 4.533397803412309e-05,
"loss": 5.1883,
"step": 2960
},
{
"epoch": 0.55,
"learning_rate": 4.52994404918146e-05,
"loss": 5.187,
"step": 2970
},
{
"epoch": 0.56,
"learning_rate": 4.526490294950612e-05,
"loss": 5.145,
"step": 2980
},
{
"epoch": 0.56,
"learning_rate": 4.523036540719763e-05,
"loss": 5.1121,
"step": 2990
},
{
"epoch": 0.56,
"learning_rate": 4.519582786488913e-05,
"loss": 5.063,
"step": 3000
},
{
"epoch": 0.56,
"learning_rate": 4.516129032258064e-05,
"loss": 5.157,
"step": 3010
},
{
"epoch": 0.56,
"learning_rate": 4.512675278027216e-05,
"loss": 5.1123,
"step": 3020
},
{
"epoch": 0.57,
"learning_rate": 4.509221523796367e-05,
"loss": 5.115,
"step": 3030
},
{
"epoch": 0.57,
"learning_rate": 4.505767769565518e-05,
"loss": 5.1296,
"step": 3040
},
{
"epoch": 0.57,
"learning_rate": 4.502314015334669e-05,
"loss": 5.1624,
"step": 3050
},
{
"epoch": 0.57,
"learning_rate": 4.49886026110382e-05,
"loss": 5.0889,
"step": 3060
},
{
"epoch": 0.57,
"learning_rate": 4.495406506872971e-05,
"loss": 5.0914,
"step": 3070
},
{
"epoch": 0.57,
"learning_rate": 4.491952752642122e-05,
"loss": 5.1042,
"step": 3080
},
{
"epoch": 0.58,
"learning_rate": 4.488498998411273e-05,
"loss": 5.0769,
"step": 3090
},
{
"epoch": 0.58,
"learning_rate": 4.485045244180424e-05,
"loss": 5.0678,
"step": 3100
},
{
"epoch": 0.58,
"learning_rate": 4.481591489949575e-05,
"loss": 5.0928,
"step": 3110
},
{
"epoch": 0.58,
"learning_rate": 4.478137735718726e-05,
"loss": 5.0958,
"step": 3120
},
{
"epoch": 0.58,
"learning_rate": 4.474683981487878e-05,
"loss": 5.0713,
"step": 3130
},
{
"epoch": 0.59,
"learning_rate": 4.4712302272570284e-05,
"loss": 5.0995,
"step": 3140
},
{
"epoch": 0.59,
"learning_rate": 4.4677764730261794e-05,
"loss": 5.0845,
"step": 3150
},
{
"epoch": 0.59,
"learning_rate": 4.4643227187953304e-05,
"loss": 5.0856,
"step": 3160
},
{
"epoch": 0.59,
"learning_rate": 4.460868964564482e-05,
"loss": 5.0638,
"step": 3170
},
{
"epoch": 0.59,
"learning_rate": 4.457415210333633e-05,
"loss": 5.0594,
"step": 3180
},
{
"epoch": 0.59,
"learning_rate": 4.4539614561027835e-05,
"loss": 5.0941,
"step": 3190
},
{
"epoch": 0.6,
"learning_rate": 4.4505077018719346e-05,
"loss": 5.0457,
"step": 3200
},
{
"epoch": 0.6,
"learning_rate": 4.447053947641086e-05,
"loss": 4.9635,
"step": 3210
},
{
"epoch": 0.6,
"learning_rate": 4.443600193410237e-05,
"loss": 4.9817,
"step": 3220
},
{
"epoch": 0.6,
"learning_rate": 4.4401464391793883e-05,
"loss": 4.989,
"step": 3230
},
{
"epoch": 0.6,
"learning_rate": 4.4366926849485394e-05,
"loss": 4.9672,
"step": 3240
},
{
"epoch": 0.61,
"learning_rate": 4.4332389307176904e-05,
"loss": 5.008,
"step": 3250
},
{
"epoch": 0.61,
"learning_rate": 4.4297851764868414e-05,
"loss": 4.9655,
"step": 3260
},
{
"epoch": 0.61,
"learning_rate": 4.4263314222559925e-05,
"loss": 4.9778,
"step": 3270
},
{
"epoch": 0.61,
"learning_rate": 4.4228776680251435e-05,
"loss": 4.9593,
"step": 3280
},
{
"epoch": 0.61,
"learning_rate": 4.4194239137942945e-05,
"loss": 4.9961,
"step": 3290
},
{
"epoch": 0.62,
"learning_rate": 4.4159701595634456e-05,
"loss": 4.9818,
"step": 3300
},
{
"epoch": 0.62,
"learning_rate": 4.4125164053325966e-05,
"loss": 4.913,
"step": 3310
},
{
"epoch": 0.62,
"learning_rate": 4.409062651101748e-05,
"loss": 4.9754,
"step": 3320
},
{
"epoch": 0.62,
"learning_rate": 4.4056088968708987e-05,
"loss": 4.9515,
"step": 3330
},
{
"epoch": 0.62,
"learning_rate": 4.40215514264005e-05,
"loss": 4.9495,
"step": 3340
},
{
"epoch": 0.62,
"learning_rate": 4.398701388409201e-05,
"loss": 4.9268,
"step": 3350
},
{
"epoch": 0.63,
"learning_rate": 4.3952476341783524e-05,
"loss": 4.9985,
"step": 3360
},
{
"epoch": 0.63,
"learning_rate": 4.3917938799475035e-05,
"loss": 4.9254,
"step": 3370
},
{
"epoch": 0.63,
"learning_rate": 4.388340125716654e-05,
"loss": 4.9554,
"step": 3380
},
{
"epoch": 0.63,
"learning_rate": 4.384886371485805e-05,
"loss": 4.9328,
"step": 3390
},
{
"epoch": 0.63,
"learning_rate": 4.3814326172549566e-05,
"loss": 4.9389,
"step": 3400
},
{
"epoch": 0.64,
"learning_rate": 4.3779788630241076e-05,
"loss": 4.9226,
"step": 3410
},
{
"epoch": 0.64,
"learning_rate": 4.3745251087932586e-05,
"loss": 4.9134,
"step": 3420
},
{
"epoch": 0.64,
"learning_rate": 4.3710713545624097e-05,
"loss": 4.9435,
"step": 3430
},
{
"epoch": 0.64,
"learning_rate": 4.367617600331561e-05,
"loss": 4.9202,
"step": 3440
},
{
"epoch": 0.64,
"learning_rate": 4.364163846100712e-05,
"loss": 4.9282,
"step": 3450
},
{
"epoch": 0.65,
"learning_rate": 4.360710091869863e-05,
"loss": 4.8598,
"step": 3460
},
{
"epoch": 0.65,
"learning_rate": 4.357256337639014e-05,
"loss": 4.9474,
"step": 3470
},
{
"epoch": 0.65,
"learning_rate": 4.353802583408165e-05,
"loss": 4.8958,
"step": 3480
},
{
"epoch": 0.65,
"learning_rate": 4.350348829177316e-05,
"loss": 4.8854,
"step": 3490
},
{
"epoch": 0.65,
"learning_rate": 4.346895074946467e-05,
"loss": 4.8184,
"step": 3500
},
{
"epoch": 0.65,
"learning_rate": 4.3434413207156186e-05,
"loss": 4.9108,
"step": 3510
},
{
"epoch": 0.66,
"learning_rate": 4.339987566484769e-05,
"loss": 4.8647,
"step": 3520
},
{
"epoch": 0.66,
"learning_rate": 4.33653381225392e-05,
"loss": 4.8807,
"step": 3530
},
{
"epoch": 0.66,
"learning_rate": 4.333080058023071e-05,
"loss": 4.8766,
"step": 3540
},
{
"epoch": 0.66,
"learning_rate": 4.329626303792223e-05,
"loss": 4.8508,
"step": 3550
},
{
"epoch": 0.66,
"learning_rate": 4.326172549561374e-05,
"loss": 4.8532,
"step": 3560
},
{
"epoch": 0.67,
"learning_rate": 4.322718795330524e-05,
"loss": 4.8687,
"step": 3570
},
{
"epoch": 0.67,
"learning_rate": 4.319265041099675e-05,
"loss": 4.7944,
"step": 3580
},
{
"epoch": 0.67,
"learning_rate": 4.315811286868827e-05,
"loss": 4.7731,
"step": 3590
},
{
"epoch": 0.67,
"learning_rate": 4.312357532637978e-05,
"loss": 4.8183,
"step": 3600
},
{
"epoch": 0.67,
"learning_rate": 4.308903778407129e-05,
"loss": 4.7874,
"step": 3610
},
{
"epoch": 0.68,
"learning_rate": 4.30545002417628e-05,
"loss": 4.7327,
"step": 3620
},
{
"epoch": 0.68,
"learning_rate": 4.301996269945431e-05,
"loss": 4.8096,
"step": 3630
},
{
"epoch": 0.68,
"learning_rate": 4.298542515714582e-05,
"loss": 4.738,
"step": 3640
},
{
"epoch": 0.68,
"learning_rate": 4.295088761483733e-05,
"loss": 4.8368,
"step": 3650
},
{
"epoch": 0.68,
"learning_rate": 4.291635007252884e-05,
"loss": 4.757,
"step": 3660
},
{
"epoch": 0.68,
"learning_rate": 4.288181253022035e-05,
"loss": 4.789,
"step": 3670
},
{
"epoch": 0.69,
"learning_rate": 4.284727498791186e-05,
"loss": 4.7926,
"step": 3680
},
{
"epoch": 0.69,
"learning_rate": 4.281273744560337e-05,
"loss": 4.7196,
"step": 3690
},
{
"epoch": 0.69,
"learning_rate": 4.277819990329489e-05,
"loss": 4.7615,
"step": 3700
},
{
"epoch": 0.69,
"learning_rate": 4.274366236098639e-05,
"loss": 4.8161,
"step": 3710
},
{
"epoch": 0.69,
"learning_rate": 4.27091248186779e-05,
"loss": 4.7221,
"step": 3720
},
{
"epoch": 0.7,
"learning_rate": 4.267458727636941e-05,
"loss": 4.7104,
"step": 3730
},
{
"epoch": 0.7,
"learning_rate": 4.264004973406093e-05,
"loss": 4.6939,
"step": 3740
},
{
"epoch": 0.7,
"learning_rate": 4.260551219175244e-05,
"loss": 4.7584,
"step": 3750
},
{
"epoch": 0.7,
"learning_rate": 4.2570974649443944e-05,
"loss": 4.7131,
"step": 3760
},
{
"epoch": 0.7,
"learning_rate": 4.2536437107135454e-05,
"loss": 4.7359,
"step": 3770
},
{
"epoch": 0.7,
"learning_rate": 4.250189956482697e-05,
"loss": 4.7093,
"step": 3780
},
{
"epoch": 0.71,
"learning_rate": 4.246736202251848e-05,
"loss": 4.6965,
"step": 3790
},
{
"epoch": 0.71,
"learning_rate": 4.243282448020999e-05,
"loss": 4.7546,
"step": 3800
},
{
"epoch": 0.71,
"learning_rate": 4.23982869379015e-05,
"loss": 4.6808,
"step": 3810
},
{
"epoch": 0.71,
"learning_rate": 4.236374939559301e-05,
"loss": 4.7021,
"step": 3820
},
{
"epoch": 0.71,
"learning_rate": 4.232921185328452e-05,
"loss": 4.6742,
"step": 3830
},
{
"epoch": 0.72,
"learning_rate": 4.229467431097603e-05,
"loss": 4.7074,
"step": 3840
},
{
"epoch": 0.72,
"learning_rate": 4.226013676866754e-05,
"loss": 4.719,
"step": 3850
},
{
"epoch": 0.72,
"learning_rate": 4.2225599226359054e-05,
"loss": 4.6518,
"step": 3860
},
{
"epoch": 0.72,
"learning_rate": 4.2191061684050564e-05,
"loss": 4.6734,
"step": 3870
},
{
"epoch": 0.72,
"learning_rate": 4.2156524141742074e-05,
"loss": 4.686,
"step": 3880
},
{
"epoch": 0.73,
"learning_rate": 4.212198659943359e-05,
"loss": 4.6587,
"step": 3890
},
{
"epoch": 0.73,
"learning_rate": 4.2087449057125095e-05,
"loss": 4.6642,
"step": 3900
},
{
"epoch": 0.73,
"learning_rate": 4.2052911514816605e-05,
"loss": 4.6555,
"step": 3910
},
{
"epoch": 0.73,
"learning_rate": 4.2018373972508116e-05,
"loss": 4.6315,
"step": 3920
},
{
"epoch": 0.73,
"learning_rate": 4.198383643019963e-05,
"loss": 4.6435,
"step": 3930
},
{
"epoch": 0.73,
"learning_rate": 4.194929888789114e-05,
"loss": 4.6456,
"step": 3940
},
{
"epoch": 0.74,
"learning_rate": 4.1914761345582646e-05,
"loss": 4.5384,
"step": 3950
},
{
"epoch": 0.74,
"learning_rate": 4.188022380327416e-05,
"loss": 4.6354,
"step": 3960
},
{
"epoch": 0.74,
"learning_rate": 4.1845686260965674e-05,
"loss": 4.5797,
"step": 3970
},
{
"epoch": 0.74,
"learning_rate": 4.1811148718657184e-05,
"loss": 4.6615,
"step": 3980
},
{
"epoch": 0.74,
"learning_rate": 4.1776611176348694e-05,
"loss": 4.6493,
"step": 3990
},
{
"epoch": 0.75,
"learning_rate": 4.1742073634040205e-05,
"loss": 4.5619,
"step": 4000
},
{
"epoch": 0.75,
"learning_rate": 4.1707536091731715e-05,
"loss": 4.5834,
"step": 4010
},
{
"epoch": 0.75,
"learning_rate": 4.1672998549423225e-05,
"loss": 4.6102,
"step": 4020
},
{
"epoch": 0.75,
"learning_rate": 4.1638461007114736e-05,
"loss": 4.6063,
"step": 4030
},
{
"epoch": 0.75,
"learning_rate": 4.1603923464806246e-05,
"loss": 4.5329,
"step": 4040
},
{
"epoch": 0.76,
"learning_rate": 4.1569385922497756e-05,
"loss": 4.6316,
"step": 4050
},
{
"epoch": 0.76,
"learning_rate": 4.153484838018927e-05,
"loss": 4.6018,
"step": 4060
},
{
"epoch": 0.76,
"learning_rate": 4.150031083788078e-05,
"loss": 4.5185,
"step": 4070
},
{
"epoch": 0.76,
"learning_rate": 4.1465773295572294e-05,
"loss": 4.572,
"step": 4080
},
{
"epoch": 0.76,
"learning_rate": 4.14312357532638e-05,
"loss": 4.5646,
"step": 4090
},
{
"epoch": 0.76,
"learning_rate": 4.139669821095531e-05,
"loss": 4.603,
"step": 4100
},
{
"epoch": 0.77,
"learning_rate": 4.136216066864682e-05,
"loss": 4.5372,
"step": 4110
},
{
"epoch": 0.77,
"learning_rate": 4.1327623126338335e-05,
"loss": 4.5963,
"step": 4120
},
{
"epoch": 0.77,
"learning_rate": 4.1293085584029846e-05,
"loss": 4.5808,
"step": 4130
},
{
"epoch": 0.77,
"learning_rate": 4.125854804172135e-05,
"loss": 4.497,
"step": 4140
},
{
"epoch": 0.77,
"learning_rate": 4.122401049941286e-05,
"loss": 4.5251,
"step": 4150
},
{
"epoch": 0.78,
"learning_rate": 4.118947295710438e-05,
"loss": 4.6056,
"step": 4160
},
{
"epoch": 0.78,
"learning_rate": 4.115493541479589e-05,
"loss": 4.5351,
"step": 4170
},
{
"epoch": 0.78,
"learning_rate": 4.11203978724874e-05,
"loss": 4.5328,
"step": 4180
},
{
"epoch": 0.78,
"learning_rate": 4.108586033017891e-05,
"loss": 4.5216,
"step": 4190
},
{
"epoch": 0.78,
"learning_rate": 4.105132278787042e-05,
"loss": 4.4807,
"step": 4200
},
{
"epoch": 0.79,
"learning_rate": 4.101678524556193e-05,
"loss": 4.4105,
"step": 4210
},
{
"epoch": 0.79,
"learning_rate": 4.098224770325344e-05,
"loss": 4.5167,
"step": 4220
},
{
"epoch": 0.79,
"learning_rate": 4.094771016094495e-05,
"loss": 4.5025,
"step": 4230
},
{
"epoch": 0.79,
"learning_rate": 4.091317261863646e-05,
"loss": 4.4726,
"step": 4240
},
{
"epoch": 0.79,
"learning_rate": 4.087863507632797e-05,
"loss": 4.5453,
"step": 4250
},
{
"epoch": 0.79,
"learning_rate": 4.084409753401948e-05,
"loss": 4.5499,
"step": 4260
},
{
"epoch": 0.8,
"learning_rate": 4.0809559991711e-05,
"loss": 4.477,
"step": 4270
},
{
"epoch": 0.8,
"learning_rate": 4.07750224494025e-05,
"loss": 4.4173,
"step": 4280
},
{
"epoch": 0.8,
"learning_rate": 4.074048490709401e-05,
"loss": 4.4168,
"step": 4290
},
{
"epoch": 0.8,
"learning_rate": 4.070594736478552e-05,
"loss": 4.4963,
"step": 4300
},
{
"epoch": 0.8,
"learning_rate": 4.067140982247704e-05,
"loss": 4.4329,
"step": 4310
},
{
"epoch": 0.81,
"learning_rate": 4.063687228016855e-05,
"loss": 4.4016,
"step": 4320
},
{
"epoch": 0.81,
"learning_rate": 4.060233473786005e-05,
"loss": 4.4456,
"step": 4330
},
{
"epoch": 0.81,
"learning_rate": 4.056779719555156e-05,
"loss": 4.5099,
"step": 4340
},
{
"epoch": 0.81,
"learning_rate": 4.053325965324308e-05,
"loss": 4.5314,
"step": 4350
},
{
"epoch": 0.81,
"learning_rate": 4.049872211093459e-05,
"loss": 4.3918,
"step": 4360
},
{
"epoch": 0.81,
"learning_rate": 4.04641845686261e-05,
"loss": 4.3786,
"step": 4370
},
{
"epoch": 0.82,
"learning_rate": 4.042964702631761e-05,
"loss": 4.3641,
"step": 4380
},
{
"epoch": 0.82,
"learning_rate": 4.039510948400912e-05,
"loss": 4.3287,
"step": 4390
},
{
"epoch": 0.82,
"learning_rate": 4.036057194170063e-05,
"loss": 4.3898,
"step": 4400
},
{
"epoch": 0.82,
"learning_rate": 4.032603439939214e-05,
"loss": 4.4319,
"step": 4410
},
{
"epoch": 0.82,
"learning_rate": 4.029149685708365e-05,
"loss": 4.4166,
"step": 4420
},
{
"epoch": 0.83,
"learning_rate": 4.025695931477516e-05,
"loss": 4.4221,
"step": 4430
},
{
"epoch": 0.83,
"learning_rate": 4.022242177246667e-05,
"loss": 4.4518,
"step": 4440
},
{
"epoch": 0.83,
"learning_rate": 4.018788423015818e-05,
"loss": 4.433,
"step": 4450
},
{
"epoch": 0.83,
"learning_rate": 4.01533466878497e-05,
"loss": 4.4263,
"step": 4460
},
{
"epoch": 0.83,
"learning_rate": 4.01188091455412e-05,
"loss": 4.44,
"step": 4470
},
{
"epoch": 0.84,
"learning_rate": 4.0084271603232713e-05,
"loss": 4.3684,
"step": 4480
},
{
"epoch": 0.84,
"learning_rate": 4.0049734060924224e-05,
"loss": 4.3712,
"step": 4490
},
{
"epoch": 0.84,
"learning_rate": 4.001519651861574e-05,
"loss": 4.3684,
"step": 4500
},
{
"epoch": 0.84,
"learning_rate": 3.998065897630725e-05,
"loss": 4.3534,
"step": 4510
},
{
"epoch": 0.84,
"learning_rate": 3.9946121433998755e-05,
"loss": 4.355,
"step": 4520
},
{
"epoch": 0.84,
"learning_rate": 3.9911583891690265e-05,
"loss": 4.3857,
"step": 4530
},
{
"epoch": 0.85,
"learning_rate": 3.987704634938178e-05,
"loss": 4.3869,
"step": 4540
},
{
"epoch": 0.85,
"learning_rate": 3.984250880707329e-05,
"loss": 4.3584,
"step": 4550
},
{
"epoch": 0.85,
"learning_rate": 3.98079712647648e-05,
"loss": 4.2255,
"step": 4560
},
{
"epoch": 0.85,
"learning_rate": 3.977343372245631e-05,
"loss": 4.3284,
"step": 4570
},
{
"epoch": 0.85,
"learning_rate": 3.9738896180147823e-05,
"loss": 4.3396,
"step": 4580
},
{
"epoch": 0.86,
"learning_rate": 3.9704358637839334e-05,
"loss": 4.3761,
"step": 4590
},
{
"epoch": 0.86,
"learning_rate": 3.9669821095530844e-05,
"loss": 4.3291,
"step": 4600
},
{
"epoch": 0.86,
"learning_rate": 3.9635283553222354e-05,
"loss": 4.3493,
"step": 4610
},
{
"epoch": 0.86,
"learning_rate": 3.9600746010913865e-05,
"loss": 4.3123,
"step": 4620
},
{
"epoch": 0.86,
"learning_rate": 3.9566208468605375e-05,
"loss": 4.3874,
"step": 4630
},
{
"epoch": 0.87,
"learning_rate": 3.9531670926296885e-05,
"loss": 4.3719,
"step": 4640
},
{
"epoch": 0.87,
"learning_rate": 3.94971333839884e-05,
"loss": 4.3051,
"step": 4650
},
{
"epoch": 0.87,
"learning_rate": 3.9462595841679906e-05,
"loss": 4.3216,
"step": 4660
},
{
"epoch": 0.87,
"learning_rate": 3.9428058299371416e-05,
"loss": 4.4278,
"step": 4670
},
{
"epoch": 0.87,
"learning_rate": 3.9393520757062927e-05,
"loss": 4.3334,
"step": 4680
},
{
"epoch": 0.87,
"learning_rate": 3.9358983214754444e-05,
"loss": 4.2799,
"step": 4690
},
{
"epoch": 0.88,
"learning_rate": 3.9324445672445954e-05,
"loss": 4.3025,
"step": 4700
},
{
"epoch": 0.88,
"learning_rate": 3.928990813013746e-05,
"loss": 4.2286,
"step": 4710
},
{
"epoch": 0.88,
"learning_rate": 3.9255370587828975e-05,
"loss": 4.286,
"step": 4720
},
{
"epoch": 0.88,
"learning_rate": 3.9220833045520485e-05,
"loss": 4.2102,
"step": 4730
},
{
"epoch": 0.88,
"learning_rate": 3.9186295503211995e-05,
"loss": 4.2735,
"step": 4740
},
{
"epoch": 0.89,
"learning_rate": 3.91517579609035e-05,
"loss": 4.3194,
"step": 4750
},
{
"epoch": 0.89,
"learning_rate": 3.9117220418595016e-05,
"loss": 4.2928,
"step": 4760
},
{
"epoch": 0.89,
"learning_rate": 3.9082682876286526e-05,
"loss": 4.348,
"step": 4770
},
{
"epoch": 0.89,
"learning_rate": 3.9048145333978037e-05,
"loss": 4.3026,
"step": 4780
},
{
"epoch": 0.89,
"learning_rate": 3.901360779166955e-05,
"loss": 4.2144,
"step": 4790
},
{
"epoch": 0.9,
"learning_rate": 3.897907024936106e-05,
"loss": 4.2993,
"step": 4800
},
{
"epoch": 0.9,
"learning_rate": 3.894453270705257e-05,
"loss": 4.2313,
"step": 4810
},
{
"epoch": 0.9,
"learning_rate": 3.890999516474408e-05,
"loss": 4.2205,
"step": 4820
},
{
"epoch": 0.9,
"learning_rate": 3.887545762243559e-05,
"loss": 4.2033,
"step": 4830
},
{
"epoch": 0.9,
"learning_rate": 3.8840920080127105e-05,
"loss": 4.2286,
"step": 4840
},
{
"epoch": 0.9,
"learning_rate": 3.880638253781861e-05,
"loss": 4.2882,
"step": 4850
},
{
"epoch": 0.91,
"learning_rate": 3.877184499551012e-05,
"loss": 4.2705,
"step": 4860
},
{
"epoch": 0.91,
"learning_rate": 3.873730745320163e-05,
"loss": 4.2006,
"step": 4870
},
{
"epoch": 0.91,
"learning_rate": 3.8702769910893146e-05,
"loss": 4.2344,
"step": 4880
},
{
"epoch": 0.91,
"learning_rate": 3.866823236858466e-05,
"loss": 4.2227,
"step": 4890
},
{
"epoch": 0.91,
"learning_rate": 3.863369482627616e-05,
"loss": 4.2481,
"step": 4900
},
{
"epoch": 0.92,
"learning_rate": 3.859915728396768e-05,
"loss": 4.2321,
"step": 4910
},
{
"epoch": 0.92,
"learning_rate": 3.856461974165919e-05,
"loss": 4.2588,
"step": 4920
},
{
"epoch": 0.92,
"learning_rate": 3.85300821993507e-05,
"loss": 4.1625,
"step": 4930
},
{
"epoch": 0.92,
"learning_rate": 3.84955446570422e-05,
"loss": 4.1685,
"step": 4940
},
{
"epoch": 0.92,
"learning_rate": 3.846100711473372e-05,
"loss": 4.2126,
"step": 4950
},
{
"epoch": 0.92,
"learning_rate": 3.842646957242523e-05,
"loss": 4.1852,
"step": 4960
},
{
"epoch": 0.93,
"learning_rate": 3.839193203011674e-05,
"loss": 4.0762,
"step": 4970
},
{
"epoch": 0.93,
"learning_rate": 3.835739448780825e-05,
"loss": 4.1486,
"step": 4980
},
{
"epoch": 0.93,
"learning_rate": 3.832285694549976e-05,
"loss": 4.2086,
"step": 4990
},
{
"epoch": 0.93,
"learning_rate": 3.828831940319127e-05,
"loss": 4.1741,
"step": 5000
},
{
"epoch": 0.93,
"learning_rate": 3.825378186088278e-05,
"loss": 4.1473,
"step": 5010
},
{
"epoch": 0.94,
"learning_rate": 3.821924431857429e-05,
"loss": 4.1852,
"step": 5020
},
{
"epoch": 0.94,
"learning_rate": 3.818470677626581e-05,
"loss": 4.126,
"step": 5030
},
{
"epoch": 0.94,
"learning_rate": 3.815016923395731e-05,
"loss": 4.2465,
"step": 5040
},
{
"epoch": 0.94,
"learning_rate": 3.811563169164882e-05,
"loss": 4.0662,
"step": 5050
},
{
"epoch": 0.94,
"learning_rate": 3.808109414934033e-05,
"loss": 4.1311,
"step": 5060
},
{
"epoch": 0.95,
"learning_rate": 3.804655660703185e-05,
"loss": 4.1458,
"step": 5070
},
{
"epoch": 0.95,
"learning_rate": 3.801201906472336e-05,
"loss": 4.1106,
"step": 5080
},
{
"epoch": 0.95,
"learning_rate": 3.797748152241486e-05,
"loss": 4.1293,
"step": 5090
},
{
"epoch": 0.95,
"learning_rate": 3.794294398010638e-05,
"loss": 4.0843,
"step": 5100
},
{
"epoch": 0.95,
"learning_rate": 3.790840643779789e-05,
"loss": 4.1247,
"step": 5110
},
{
"epoch": 0.95,
"learning_rate": 3.78738688954894e-05,
"loss": 4.1126,
"step": 5120
},
{
"epoch": 0.96,
"learning_rate": 3.7839331353180904e-05,
"loss": 4.0693,
"step": 5130
},
{
"epoch": 0.96,
"learning_rate": 3.780479381087242e-05,
"loss": 4.0749,
"step": 5140
},
{
"epoch": 0.96,
"learning_rate": 3.777025626856393e-05,
"loss": 4.1138,
"step": 5150
},
{
"epoch": 0.96,
"learning_rate": 3.773571872625544e-05,
"loss": 4.1342,
"step": 5160
},
{
"epoch": 0.96,
"learning_rate": 3.770118118394695e-05,
"loss": 4.2315,
"step": 5170
},
{
"epoch": 0.97,
"learning_rate": 3.766664364163846e-05,
"loss": 4.0744,
"step": 5180
},
{
"epoch": 0.97,
"learning_rate": 3.763210609932997e-05,
"loss": 4.1098,
"step": 5190
},
{
"epoch": 0.97,
"learning_rate": 3.759756855702148e-05,
"loss": 4.1641,
"step": 5200
},
{
"epoch": 0.97,
"learning_rate": 3.7563031014712994e-05,
"loss": 4.1207,
"step": 5210
},
{
"epoch": 0.97,
"learning_rate": 3.752849347240451e-05,
"loss": 4.0232,
"step": 5220
},
{
"epoch": 0.98,
"learning_rate": 3.7493955930096014e-05,
"loss": 4.1027,
"step": 5230
},
{
"epoch": 0.98,
"learning_rate": 3.7459418387787525e-05,
"loss": 4.1394,
"step": 5240
},
{
"epoch": 0.98,
"learning_rate": 3.7424880845479035e-05,
"loss": 4.0756,
"step": 5250
},
{
"epoch": 0.98,
"learning_rate": 3.739034330317055e-05,
"loss": 4.1204,
"step": 5260
},
{
"epoch": 0.98,
"learning_rate": 3.735580576086206e-05,
"loss": 4.1218,
"step": 5270
},
{
"epoch": 0.98,
"learning_rate": 3.7321268218553566e-05,
"loss": 4.0515,
"step": 5280
},
{
"epoch": 0.99,
"learning_rate": 3.728673067624508e-05,
"loss": 4.109,
"step": 5290
},
{
"epoch": 0.99,
"learning_rate": 3.725219313393659e-05,
"loss": 3.9909,
"step": 5300
},
{
"epoch": 0.99,
"learning_rate": 3.7217655591628104e-05,
"loss": 4.0671,
"step": 5310
},
{
"epoch": 0.99,
"learning_rate": 3.718311804931961e-05,
"loss": 4.1067,
"step": 5320
},
{
"epoch": 0.99,
"learning_rate": 3.7148580507011124e-05,
"loss": 4.0092,
"step": 5330
},
{
"epoch": 1.0,
"learning_rate": 3.7114042964702634e-05,
"loss": 4.0346,
"step": 5340
},
{
"epoch": 1.0,
"learning_rate": 3.7079505422394145e-05,
"loss": 4.0381,
"step": 5350
},
{
"epoch": 1.0,
"learning_rate": 3.7044967880085655e-05,
"loss": 4.0308,
"step": 5360
},
{
"epoch": 1.0,
"eval_accuracy": 0.24909529553679133,
"eval_loss": 3.6947672367095947,
"eval_runtime": 8621.8692,
"eval_samples_per_second": 8.846,
"eval_steps_per_second": 0.277,
"step": 5362
},
{
"epoch": 1.0,
"learning_rate": 3.7010430337777165e-05,
"loss": 4.0648,
"step": 5370
},
{
"epoch": 1.0,
"learning_rate": 3.6975892795468676e-05,
"loss": 3.9987,
"step": 5380
},
{
"epoch": 1.01,
"learning_rate": 3.6941355253160186e-05,
"loss": 3.9417,
"step": 5390
},
{
"epoch": 1.01,
"learning_rate": 3.6906817710851696e-05,
"loss": 3.9734,
"step": 5400
},
{
"epoch": 1.01,
"learning_rate": 3.6872280168543213e-05,
"loss": 3.9119,
"step": 5410
},
{
"epoch": 1.01,
"learning_rate": 3.683774262623472e-05,
"loss": 3.9818,
"step": 5420
},
{
"epoch": 1.01,
"learning_rate": 3.680320508392623e-05,
"loss": 3.9722,
"step": 5430
},
{
"epoch": 1.01,
"learning_rate": 3.676866754161774e-05,
"loss": 3.9544,
"step": 5440
},
{
"epoch": 1.02,
"learning_rate": 3.6734129999309255e-05,
"loss": 3.9963,
"step": 5450
},
{
"epoch": 1.02,
"learning_rate": 3.6699592457000765e-05,
"loss": 3.9356,
"step": 5460
},
{
"epoch": 1.02,
"learning_rate": 3.666505491469227e-05,
"loss": 3.9639,
"step": 5470
},
{
"epoch": 1.02,
"learning_rate": 3.6630517372383786e-05,
"loss": 3.9783,
"step": 5480
},
{
"epoch": 1.02,
"learning_rate": 3.6595979830075296e-05,
"loss": 3.9439,
"step": 5490
},
{
"epoch": 1.03,
"learning_rate": 3.6561442287766806e-05,
"loss": 3.9195,
"step": 5500
},
{
"epoch": 1.03,
"learning_rate": 3.652690474545831e-05,
"loss": 3.8927,
"step": 5510
},
{
"epoch": 1.03,
"learning_rate": 3.649236720314983e-05,
"loss": 3.9244,
"step": 5520
},
{
"epoch": 1.03,
"learning_rate": 3.645782966084134e-05,
"loss": 3.9266,
"step": 5530
},
{
"epoch": 1.03,
"learning_rate": 3.642329211853285e-05,
"loss": 3.9866,
"step": 5540
},
{
"epoch": 1.03,
"learning_rate": 3.638875457622436e-05,
"loss": 3.8888,
"step": 5550
},
{
"epoch": 1.04,
"learning_rate": 3.635421703391587e-05,
"loss": 3.8811,
"step": 5560
},
{
"epoch": 1.04,
"learning_rate": 3.631967949160738e-05,
"loss": 3.959,
"step": 5570
},
{
"epoch": 1.04,
"learning_rate": 3.628514194929889e-05,
"loss": 4.0576,
"step": 5580
},
{
"epoch": 1.04,
"learning_rate": 3.62506044069904e-05,
"loss": 3.9046,
"step": 5590
},
{
"epoch": 1.04,
"learning_rate": 3.6216066864681916e-05,
"loss": 3.8684,
"step": 5600
},
{
"epoch": 1.05,
"learning_rate": 3.618152932237342e-05,
"loss": 3.9167,
"step": 5610
},
{
"epoch": 1.05,
"learning_rate": 3.614699178006493e-05,
"loss": 3.8899,
"step": 5620
},
{
"epoch": 1.05,
"learning_rate": 3.611245423775644e-05,
"loss": 3.9039,
"step": 5630
},
{
"epoch": 1.05,
"learning_rate": 3.607791669544796e-05,
"loss": 3.8306,
"step": 5640
},
{
"epoch": 1.05,
"learning_rate": 3.604337915313946e-05,
"loss": 3.8872,
"step": 5650
},
{
"epoch": 1.06,
"learning_rate": 3.600884161083097e-05,
"loss": 3.8607,
"step": 5660
},
{
"epoch": 1.06,
"learning_rate": 3.597430406852249e-05,
"loss": 3.9271,
"step": 5670
},
{
"epoch": 1.06,
"learning_rate": 3.5939766526214e-05,
"loss": 4.006,
"step": 5680
},
{
"epoch": 1.06,
"learning_rate": 3.590522898390551e-05,
"loss": 3.901,
"step": 5690
},
{
"epoch": 1.06,
"learning_rate": 3.587069144159701e-05,
"loss": 3.8321,
"step": 5700
},
{
"epoch": 1.06,
"learning_rate": 3.583615389928853e-05,
"loss": 3.8744,
"step": 5710
},
{
"epoch": 1.07,
"learning_rate": 3.580161635698004e-05,
"loss": 3.9017,
"step": 5720
},
{
"epoch": 1.07,
"learning_rate": 3.576707881467155e-05,
"loss": 3.8878,
"step": 5730
},
{
"epoch": 1.07,
"learning_rate": 3.573254127236306e-05,
"loss": 3.832,
"step": 5740
},
{
"epoch": 1.07,
"learning_rate": 3.569800373005457e-05,
"loss": 3.831,
"step": 5750
},
{
"epoch": 1.07,
"learning_rate": 3.566346618774608e-05,
"loss": 3.9113,
"step": 5760
},
{
"epoch": 1.08,
"learning_rate": 3.562892864543759e-05,
"loss": 3.796,
"step": 5770
},
{
"epoch": 1.08,
"learning_rate": 3.55943911031291e-05,
"loss": 3.8043,
"step": 5780
},
{
"epoch": 1.08,
"learning_rate": 3.555985356082062e-05,
"loss": 3.9136,
"step": 5790
},
{
"epoch": 1.08,
"learning_rate": 3.552531601851212e-05,
"loss": 3.7756,
"step": 5800
},
{
"epoch": 1.08,
"learning_rate": 3.549077847620363e-05,
"loss": 3.7959,
"step": 5810
},
{
"epoch": 1.09,
"learning_rate": 3.545624093389514e-05,
"loss": 3.7942,
"step": 5820
},
{
"epoch": 1.09,
"learning_rate": 3.542170339158666e-05,
"loss": 3.8605,
"step": 5830
},
{
"epoch": 1.09,
"learning_rate": 3.5387165849278164e-05,
"loss": 3.7954,
"step": 5840
},
{
"epoch": 1.09,
"learning_rate": 3.5352628306969674e-05,
"loss": 3.8062,
"step": 5850
},
{
"epoch": 1.09,
"learning_rate": 3.531809076466119e-05,
"loss": 3.8087,
"step": 5860
},
{
"epoch": 1.09,
"learning_rate": 3.52835532223527e-05,
"loss": 3.801,
"step": 5870
},
{
"epoch": 1.1,
"learning_rate": 3.524901568004421e-05,
"loss": 3.7685,
"step": 5880
},
{
"epoch": 1.1,
"learning_rate": 3.5214478137735715e-05,
"loss": 3.8729,
"step": 5890
},
{
"epoch": 1.1,
"learning_rate": 3.517994059542723e-05,
"loss": 3.8502,
"step": 5900
},
{
"epoch": 1.1,
"learning_rate": 3.514540305311874e-05,
"loss": 3.8123,
"step": 5910
},
{
"epoch": 1.1,
"learning_rate": 3.511086551081025e-05,
"loss": 3.7957,
"step": 5920
},
{
"epoch": 1.11,
"learning_rate": 3.5076327968501763e-05,
"loss": 3.7741,
"step": 5930
},
{
"epoch": 1.11,
"learning_rate": 3.5041790426193274e-05,
"loss": 3.7132,
"step": 5940
},
{
"epoch": 1.11,
"learning_rate": 3.5007252883884784e-05,
"loss": 3.7215,
"step": 5950
},
{
"epoch": 1.11,
"learning_rate": 3.4972715341576294e-05,
"loss": 3.773,
"step": 5960
},
{
"epoch": 1.11,
"learning_rate": 3.4938177799267805e-05,
"loss": 3.7669,
"step": 5970
},
{
"epoch": 1.12,
"learning_rate": 3.490364025695932e-05,
"loss": 3.7389,
"step": 5980
},
{
"epoch": 1.12,
"learning_rate": 3.4869102714650825e-05,
"loss": 3.748,
"step": 5990
},
{
"epoch": 1.12,
"learning_rate": 3.4834565172342336e-05,
"loss": 3.7029,
"step": 6000
},
{
"epoch": 1.12,
"learning_rate": 3.4800027630033846e-05,
"loss": 3.8851,
"step": 6010
},
{
"epoch": 1.12,
"learning_rate": 3.476549008772536e-05,
"loss": 3.8138,
"step": 6020
},
{
"epoch": 1.12,
"learning_rate": 3.4730952545416867e-05,
"loss": 3.7595,
"step": 6030
},
{
"epoch": 1.13,
"learning_rate": 3.469641500310838e-05,
"loss": 3.7483,
"step": 6040
},
{
"epoch": 1.13,
"learning_rate": 3.4661877460799894e-05,
"loss": 3.7428,
"step": 6050
},
{
"epoch": 1.13,
"learning_rate": 3.4627339918491404e-05,
"loss": 3.7077,
"step": 6060
},
{
"epoch": 1.13,
"learning_rate": 3.4592802376182915e-05,
"loss": 3.7716,
"step": 6070
},
{
"epoch": 1.13,
"learning_rate": 3.455826483387442e-05,
"loss": 3.7343,
"step": 6080
},
{
"epoch": 1.14,
"learning_rate": 3.4523727291565935e-05,
"loss": 3.7992,
"step": 6090
},
{
"epoch": 1.14,
"learning_rate": 3.4489189749257446e-05,
"loss": 3.6995,
"step": 6100
},
{
"epoch": 1.14,
"learning_rate": 3.4454652206948956e-05,
"loss": 3.7807,
"step": 6110
},
{
"epoch": 1.14,
"learning_rate": 3.4420114664640466e-05,
"loss": 3.7395,
"step": 6120
},
{
"epoch": 1.14,
"learning_rate": 3.4385577122331976e-05,
"loss": 3.8304,
"step": 6130
},
{
"epoch": 1.14,
"learning_rate": 3.435103958002349e-05,
"loss": 3.6993,
"step": 6140
},
{
"epoch": 1.15,
"learning_rate": 3.4316502037715e-05,
"loss": 3.6915,
"step": 6150
},
{
"epoch": 1.15,
"learning_rate": 3.428196449540651e-05,
"loss": 3.6993,
"step": 6160
},
{
"epoch": 1.15,
"learning_rate": 3.4247426953098025e-05,
"loss": 3.7521,
"step": 6170
},
{
"epoch": 1.15,
"learning_rate": 3.421288941078953e-05,
"loss": 3.6159,
"step": 6180
},
{
"epoch": 1.15,
"learning_rate": 3.417835186848104e-05,
"loss": 3.8004,
"step": 6190
},
{
"epoch": 1.16,
"learning_rate": 3.414381432617255e-05,
"loss": 3.7197,
"step": 6200
},
{
"epoch": 1.16,
"learning_rate": 3.4109276783864066e-05,
"loss": 3.6887,
"step": 6210
},
{
"epoch": 1.16,
"learning_rate": 3.407473924155557e-05,
"loss": 3.6608,
"step": 6220
},
{
"epoch": 1.16,
"learning_rate": 3.404020169924708e-05,
"loss": 3.6339,
"step": 6230
},
{
"epoch": 1.16,
"learning_rate": 3.40056641569386e-05,
"loss": 3.7329,
"step": 6240
},
{
"epoch": 1.17,
"learning_rate": 3.397112661463011e-05,
"loss": 3.7651,
"step": 6250
},
{
"epoch": 1.17,
"learning_rate": 3.393658907232162e-05,
"loss": 3.731,
"step": 6260
},
{
"epoch": 1.17,
"learning_rate": 3.390205153001312e-05,
"loss": 3.6192,
"step": 6270
},
{
"epoch": 1.17,
"learning_rate": 3.386751398770464e-05,
"loss": 3.6153,
"step": 6280
},
{
"epoch": 1.17,
"learning_rate": 3.383297644539615e-05,
"loss": 3.6365,
"step": 6290
},
{
"epoch": 1.17,
"learning_rate": 3.379843890308766e-05,
"loss": 3.6716,
"step": 6300
},
{
"epoch": 1.18,
"learning_rate": 3.376390136077917e-05,
"loss": 3.6605,
"step": 6310
},
{
"epoch": 1.18,
"learning_rate": 3.372936381847068e-05,
"loss": 3.7046,
"step": 6320
},
{
"epoch": 1.18,
"learning_rate": 3.369482627616219e-05,
"loss": 3.6256,
"step": 6330
},
{
"epoch": 1.18,
"learning_rate": 3.36602887338537e-05,
"loss": 3.6081,
"step": 6340
},
{
"epoch": 1.18,
"learning_rate": 3.362575119154521e-05,
"loss": 3.6484,
"step": 6350
},
{
"epoch": 1.19,
"learning_rate": 3.359121364923673e-05,
"loss": 3.6968,
"step": 6360
},
{
"epoch": 1.19,
"learning_rate": 3.355667610692823e-05,
"loss": 3.5797,
"step": 6370
},
{
"epoch": 1.19,
"learning_rate": 3.352213856461974e-05,
"loss": 3.6661,
"step": 6380
},
{
"epoch": 1.19,
"learning_rate": 3.348760102231125e-05,
"loss": 3.6663,
"step": 6390
},
{
"epoch": 1.19,
"learning_rate": 3.345306348000277e-05,
"loss": 3.5707,
"step": 6400
},
{
"epoch": 1.2,
"learning_rate": 3.341852593769427e-05,
"loss": 3.5989,
"step": 6410
},
{
"epoch": 1.2,
"learning_rate": 3.338398839538578e-05,
"loss": 3.6403,
"step": 6420
},
{
"epoch": 1.2,
"learning_rate": 3.33494508530773e-05,
"loss": 3.6176,
"step": 6430
},
{
"epoch": 1.2,
"learning_rate": 3.331491331076881e-05,
"loss": 3.5832,
"step": 6440
},
{
"epoch": 1.2,
"learning_rate": 3.328037576846032e-05,
"loss": 3.6051,
"step": 6450
},
{
"epoch": 1.2,
"learning_rate": 3.3245838226151824e-05,
"loss": 3.6246,
"step": 6460
},
{
"epoch": 1.21,
"learning_rate": 3.321130068384334e-05,
"loss": 3.6141,
"step": 6470
},
{
"epoch": 1.21,
"learning_rate": 3.317676314153485e-05,
"loss": 3.5999,
"step": 6480
},
{
"epoch": 1.21,
"learning_rate": 3.314222559922636e-05,
"loss": 3.6628,
"step": 6490
},
{
"epoch": 1.21,
"learning_rate": 3.310768805691787e-05,
"loss": 3.651,
"step": 6500
},
{
"epoch": 1.21,
"learning_rate": 3.307315051460938e-05,
"loss": 3.5738,
"step": 6510
},
{
"epoch": 1.22,
"learning_rate": 3.303861297230089e-05,
"loss": 3.5479,
"step": 6520
},
{
"epoch": 1.22,
"learning_rate": 3.30040754299924e-05,
"loss": 3.5372,
"step": 6530
},
{
"epoch": 1.22,
"learning_rate": 3.296953788768391e-05,
"loss": 3.6088,
"step": 6540
},
{
"epoch": 1.22,
"learning_rate": 3.293500034537542e-05,
"loss": 3.5082,
"step": 6550
},
{
"epoch": 1.22,
"learning_rate": 3.2900462803066934e-05,
"loss": 3.5443,
"step": 6560
},
{
"epoch": 1.23,
"learning_rate": 3.2865925260758444e-05,
"loss": 3.4684,
"step": 6570
},
{
"epoch": 1.23,
"learning_rate": 3.2831387718449954e-05,
"loss": 3.5836,
"step": 6580
},
{
"epoch": 1.23,
"learning_rate": 3.279685017614147e-05,
"loss": 3.5457,
"step": 6590
},
{
"epoch": 1.23,
"learning_rate": 3.2762312633832975e-05,
"loss": 3.5694,
"step": 6600
},
{
"epoch": 1.23,
"learning_rate": 3.2727775091524485e-05,
"loss": 3.4931,
"step": 6610
},
{
"epoch": 1.23,
"learning_rate": 3.2693237549216e-05,
"loss": 3.6173,
"step": 6620
},
{
"epoch": 1.24,
"learning_rate": 3.265870000690751e-05,
"loss": 3.5387,
"step": 6630
},
{
"epoch": 1.24,
"learning_rate": 3.262416246459902e-05,
"loss": 3.552,
"step": 6640
},
{
"epoch": 1.24,
"learning_rate": 3.2589624922290526e-05,
"loss": 3.5856,
"step": 6650
},
{
"epoch": 1.24,
"learning_rate": 3.2555087379982044e-05,
"loss": 3.5191,
"step": 6660
},
{
"epoch": 1.24,
"learning_rate": 3.2520549837673554e-05,
"loss": 3.4632,
"step": 6670
},
{
"epoch": 1.25,
"learning_rate": 3.2486012295365064e-05,
"loss": 3.557,
"step": 6680
},
{
"epoch": 1.25,
"learning_rate": 3.2451474753056574e-05,
"loss": 3.4683,
"step": 6690
},
{
"epoch": 1.25,
"learning_rate": 3.2416937210748085e-05,
"loss": 3.6178,
"step": 6700
},
{
"epoch": 1.25,
"learning_rate": 3.2382399668439595e-05,
"loss": 3.509,
"step": 6710
},
{
"epoch": 1.25,
"learning_rate": 3.2347862126131105e-05,
"loss": 3.5511,
"step": 6720
},
{
"epoch": 1.25,
"learning_rate": 3.2313324583822616e-05,
"loss": 3.5375,
"step": 6730
},
{
"epoch": 1.26,
"learning_rate": 3.2278787041514126e-05,
"loss": 3.5648,
"step": 6740
},
{
"epoch": 1.26,
"learning_rate": 3.2244249499205636e-05,
"loss": 3.4806,
"step": 6750
},
{
"epoch": 1.26,
"learning_rate": 3.220971195689715e-05,
"loss": 3.5598,
"step": 6760
},
{
"epoch": 1.26,
"learning_rate": 3.217517441458866e-05,
"loss": 3.5497,
"step": 6770
},
{
"epoch": 1.26,
"learning_rate": 3.2140636872280174e-05,
"loss": 3.5869,
"step": 6780
},
{
"epoch": 1.27,
"learning_rate": 3.210609932997168e-05,
"loss": 3.46,
"step": 6790
},
{
"epoch": 1.27,
"learning_rate": 3.207156178766319e-05,
"loss": 3.4238,
"step": 6800
},
{
"epoch": 1.27,
"learning_rate": 3.2037024245354705e-05,
"loss": 3.5371,
"step": 6810
},
{
"epoch": 1.27,
"learning_rate": 3.2002486703046215e-05,
"loss": 3.5355,
"step": 6820
},
{
"epoch": 1.27,
"learning_rate": 3.1967949160737726e-05,
"loss": 3.481,
"step": 6830
},
{
"epoch": 1.28,
"learning_rate": 3.193341161842923e-05,
"loss": 3.3692,
"step": 6840
},
{
"epoch": 1.28,
"learning_rate": 3.1898874076120746e-05,
"loss": 3.4681,
"step": 6850
},
{
"epoch": 1.28,
"learning_rate": 3.1864336533812257e-05,
"loss": 3.4593,
"step": 6860
},
{
"epoch": 1.28,
"learning_rate": 3.182979899150377e-05,
"loss": 3.4291,
"step": 6870
},
{
"epoch": 1.28,
"learning_rate": 3.179526144919528e-05,
"loss": 3.452,
"step": 6880
},
{
"epoch": 1.28,
"learning_rate": 3.176072390688679e-05,
"loss": 3.4585,
"step": 6890
},
{
"epoch": 1.29,
"learning_rate": 3.17261863645783e-05,
"loss": 3.4393,
"step": 6900
},
{
"epoch": 1.29,
"learning_rate": 3.169164882226981e-05,
"loss": 3.472,
"step": 6910
},
{
"epoch": 1.29,
"learning_rate": 3.165711127996132e-05,
"loss": 3.4524,
"step": 6920
},
{
"epoch": 1.29,
"learning_rate": 3.162257373765283e-05,
"loss": 3.4865,
"step": 6930
},
{
"epoch": 1.29,
"learning_rate": 3.158803619534434e-05,
"loss": 3.442,
"step": 6940
},
{
"epoch": 1.3,
"learning_rate": 3.155349865303585e-05,
"loss": 3.4376,
"step": 6950
},
{
"epoch": 1.3,
"learning_rate": 3.151896111072736e-05,
"loss": 3.4278,
"step": 6960
},
{
"epoch": 1.3,
"learning_rate": 3.148442356841888e-05,
"loss": 3.342,
"step": 6970
},
{
"epoch": 1.3,
"learning_rate": 3.144988602611038e-05,
"loss": 3.4077,
"step": 6980
},
{
"epoch": 1.3,
"learning_rate": 3.141534848380189e-05,
"loss": 3.3748,
"step": 6990
},
{
"epoch": 1.31,
"learning_rate": 3.138081094149341e-05,
"loss": 3.3983,
"step": 7000
},
{
"epoch": 1.31,
"learning_rate": 3.134627339918492e-05,
"loss": 3.4114,
"step": 7010
},
{
"epoch": 1.31,
"learning_rate": 3.131173585687643e-05,
"loss": 3.5379,
"step": 7020
},
{
"epoch": 1.31,
"learning_rate": 3.127719831456793e-05,
"loss": 3.446,
"step": 7030
},
{
"epoch": 1.31,
"learning_rate": 3.124266077225945e-05,
"loss": 3.3867,
"step": 7040
},
{
"epoch": 1.31,
"learning_rate": 3.120812322995096e-05,
"loss": 3.434,
"step": 7050
},
{
"epoch": 1.32,
"learning_rate": 3.117358568764247e-05,
"loss": 3.425,
"step": 7060
},
{
"epoch": 1.32,
"learning_rate": 3.113904814533398e-05,
"loss": 3.4585,
"step": 7070
},
{
"epoch": 1.32,
"learning_rate": 3.110451060302549e-05,
"loss": 3.4087,
"step": 7080
},
{
"epoch": 1.32,
"learning_rate": 3.1069973060717e-05,
"loss": 3.4151,
"step": 7090
},
{
"epoch": 1.32,
"learning_rate": 3.103543551840851e-05,
"loss": 3.3507,
"step": 7100
},
{
"epoch": 1.33,
"learning_rate": 3.100089797610002e-05,
"loss": 3.5211,
"step": 7110
},
{
"epoch": 1.33,
"learning_rate": 3.096636043379153e-05,
"loss": 3.3704,
"step": 7120
},
{
"epoch": 1.33,
"learning_rate": 3.093182289148304e-05,
"loss": 3.4302,
"step": 7130
},
{
"epoch": 1.33,
"learning_rate": 3.089728534917455e-05,
"loss": 3.4675,
"step": 7140
},
{
"epoch": 1.33,
"learning_rate": 3.086274780686606e-05,
"loss": 3.4799,
"step": 7150
},
{
"epoch": 1.34,
"learning_rate": 3.082821026455758e-05,
"loss": 3.4777,
"step": 7160
},
{
"epoch": 1.34,
"learning_rate": 3.079367272224908e-05,
"loss": 3.4224,
"step": 7170
},
{
"epoch": 1.34,
"learning_rate": 3.0759135179940593e-05,
"loss": 3.3939,
"step": 7180
},
{
"epoch": 1.34,
"learning_rate": 3.072459763763211e-05,
"loss": 3.4045,
"step": 7190
},
{
"epoch": 1.34,
"learning_rate": 3.069006009532362e-05,
"loss": 3.3775,
"step": 7200
},
{
"epoch": 1.34,
"learning_rate": 3.065552255301513e-05,
"loss": 3.3832,
"step": 7210
},
{
"epoch": 1.35,
"learning_rate": 3.0620985010706635e-05,
"loss": 3.3987,
"step": 7220
},
{
"epoch": 1.35,
"learning_rate": 3.058644746839815e-05,
"loss": 3.3736,
"step": 7230
},
{
"epoch": 1.35,
"learning_rate": 3.055190992608966e-05,
"loss": 3.4009,
"step": 7240
},
{
"epoch": 1.35,
"learning_rate": 3.0517372383781172e-05,
"loss": 3.4189,
"step": 7250
},
{
"epoch": 1.35,
"learning_rate": 3.0482834841472686e-05,
"loss": 3.3918,
"step": 7260
},
{
"epoch": 1.36,
"learning_rate": 3.044829729916419e-05,
"loss": 3.38,
"step": 7270
},
{
"epoch": 1.36,
"learning_rate": 3.0413759756855703e-05,
"loss": 3.3672,
"step": 7280
},
{
"epoch": 1.36,
"learning_rate": 3.0379222214547214e-05,
"loss": 3.3829,
"step": 7290
},
{
"epoch": 1.36,
"learning_rate": 3.0344684672238727e-05,
"loss": 3.3583,
"step": 7300
},
{
"epoch": 1.36,
"learning_rate": 3.0310147129930234e-05,
"loss": 3.2762,
"step": 7310
},
{
"epoch": 1.36,
"learning_rate": 3.0275609587621745e-05,
"loss": 3.2542,
"step": 7320
},
{
"epoch": 1.37,
"learning_rate": 3.024107204531326e-05,
"loss": 3.2616,
"step": 7330
},
{
"epoch": 1.37,
"learning_rate": 3.020653450300477e-05,
"loss": 3.4247,
"step": 7340
},
{
"epoch": 1.37,
"learning_rate": 3.017199696069628e-05,
"loss": 3.2569,
"step": 7350
},
{
"epoch": 1.37,
"learning_rate": 3.0137459418387786e-05,
"loss": 3.2765,
"step": 7360
},
{
"epoch": 1.37,
"learning_rate": 3.01029218760793e-05,
"loss": 3.4377,
"step": 7370
},
{
"epoch": 1.38,
"learning_rate": 3.006838433377081e-05,
"loss": 3.289,
"step": 7380
},
{
"epoch": 1.38,
"learning_rate": 3.0033846791462324e-05,
"loss": 3.4032,
"step": 7390
},
{
"epoch": 1.38,
"learning_rate": 2.9999309249153834e-05,
"loss": 3.3145,
"step": 7400
},
{
"epoch": 1.38,
"learning_rate": 2.996477170684534e-05,
"loss": 3.3723,
"step": 7410
},
{
"epoch": 1.38,
"learning_rate": 2.993023416453685e-05,
"loss": 3.3719,
"step": 7420
},
{
"epoch": 1.39,
"learning_rate": 2.9895696622228365e-05,
"loss": 3.3268,
"step": 7430
},
{
"epoch": 1.39,
"learning_rate": 2.9861159079919875e-05,
"loss": 3.4391,
"step": 7440
},
{
"epoch": 1.39,
"learning_rate": 2.982662153761139e-05,
"loss": 3.2366,
"step": 7450
},
{
"epoch": 1.39,
"learning_rate": 2.9792083995302892e-05,
"loss": 3.3724,
"step": 7460
},
{
"epoch": 1.39,
"learning_rate": 2.9757546452994406e-05,
"loss": 3.3473,
"step": 7470
},
{
"epoch": 1.39,
"learning_rate": 2.9723008910685916e-05,
"loss": 3.2653,
"step": 7480
},
{
"epoch": 1.4,
"learning_rate": 2.968847136837743e-05,
"loss": 3.2872,
"step": 7490
},
{
"epoch": 1.4,
"learning_rate": 2.9653933826068937e-05,
"loss": 3.2932,
"step": 7500
},
{
"epoch": 1.4,
"learning_rate": 2.9619396283760447e-05,
"loss": 3.2541,
"step": 7510
},
{
"epoch": 1.4,
"learning_rate": 2.958485874145196e-05,
"loss": 3.3233,
"step": 7520
},
{
"epoch": 1.4,
"learning_rate": 2.955032119914347e-05,
"loss": 3.3025,
"step": 7530
},
{
"epoch": 1.41,
"learning_rate": 2.9515783656834982e-05,
"loss": 3.2965,
"step": 7540
},
{
"epoch": 1.41,
"learning_rate": 2.948124611452649e-05,
"loss": 3.2564,
"step": 7550
},
{
"epoch": 1.41,
"learning_rate": 2.9446708572218002e-05,
"loss": 3.3116,
"step": 7560
},
{
"epoch": 1.41,
"learning_rate": 2.9412171029909513e-05,
"loss": 3.2414,
"step": 7570
},
{
"epoch": 1.41,
"learning_rate": 2.9377633487601026e-05,
"loss": 3.3608,
"step": 7580
},
{
"epoch": 1.42,
"learning_rate": 2.9343095945292537e-05,
"loss": 3.3019,
"step": 7590
},
{
"epoch": 1.42,
"learning_rate": 2.9308558402984044e-05,
"loss": 3.3488,
"step": 7600
},
{
"epoch": 1.42,
"learning_rate": 2.9274020860675554e-05,
"loss": 3.3367,
"step": 7610
},
{
"epoch": 1.42,
"learning_rate": 2.9239483318367068e-05,
"loss": 3.2071,
"step": 7620
},
{
"epoch": 1.42,
"learning_rate": 2.9204945776058578e-05,
"loss": 3.2963,
"step": 7630
},
{
"epoch": 1.42,
"learning_rate": 2.9170408233750085e-05,
"loss": 3.2445,
"step": 7640
},
{
"epoch": 1.43,
"learning_rate": 2.9135870691441595e-05,
"loss": 3.3087,
"step": 7650
},
{
"epoch": 1.43,
"learning_rate": 2.910133314913311e-05,
"loss": 3.3239,
"step": 7660
},
{
"epoch": 1.43,
"learning_rate": 2.906679560682462e-05,
"loss": 3.2231,
"step": 7670
},
{
"epoch": 1.43,
"learning_rate": 2.9032258064516133e-05,
"loss": 3.3682,
"step": 7680
},
{
"epoch": 1.43,
"learning_rate": 2.899772052220764e-05,
"loss": 3.1886,
"step": 7690
},
{
"epoch": 1.44,
"learning_rate": 2.896318297989915e-05,
"loss": 3.285,
"step": 7700
},
{
"epoch": 1.44,
"learning_rate": 2.8928645437590664e-05,
"loss": 3.1984,
"step": 7710
},
{
"epoch": 1.44,
"learning_rate": 2.8894107895282174e-05,
"loss": 3.2885,
"step": 7720
},
{
"epoch": 1.44,
"learning_rate": 2.8859570352973685e-05,
"loss": 3.2339,
"step": 7730
},
{
"epoch": 1.44,
"learning_rate": 2.882503281066519e-05,
"loss": 3.3014,
"step": 7740
},
{
"epoch": 1.45,
"learning_rate": 2.8790495268356705e-05,
"loss": 3.3142,
"step": 7750
},
{
"epoch": 1.45,
"learning_rate": 2.8755957726048215e-05,
"loss": 3.2698,
"step": 7760
},
{
"epoch": 1.45,
"learning_rate": 2.872142018373973e-05,
"loss": 3.2825,
"step": 7770
},
{
"epoch": 1.45,
"learning_rate": 2.868688264143124e-05,
"loss": 3.2214,
"step": 7780
},
{
"epoch": 1.45,
"learning_rate": 2.8652345099122746e-05,
"loss": 3.1402,
"step": 7790
},
{
"epoch": 1.45,
"learning_rate": 2.8617807556814257e-05,
"loss": 3.2267,
"step": 7800
},
{
"epoch": 1.46,
"learning_rate": 2.858327001450577e-05,
"loss": 3.185,
"step": 7810
},
{
"epoch": 1.46,
"learning_rate": 2.854873247219728e-05,
"loss": 3.1691,
"step": 7820
},
{
"epoch": 1.46,
"learning_rate": 2.8514194929888788e-05,
"loss": 3.1357,
"step": 7830
},
{
"epoch": 1.46,
"learning_rate": 2.8479657387580298e-05,
"loss": 3.2274,
"step": 7840
},
{
"epoch": 1.46,
"learning_rate": 2.844511984527181e-05,
"loss": 3.2324,
"step": 7850
},
{
"epoch": 1.47,
"learning_rate": 2.8410582302963322e-05,
"loss": 3.2761,
"step": 7860
},
{
"epoch": 1.47,
"learning_rate": 2.8376044760654836e-05,
"loss": 3.2883,
"step": 7870
},
{
"epoch": 1.47,
"learning_rate": 2.8341507218346343e-05,
"loss": 3.2104,
"step": 7880
},
{
"epoch": 1.47,
"learning_rate": 2.8306969676037853e-05,
"loss": 3.2581,
"step": 7890
},
{
"epoch": 1.47,
"learning_rate": 2.8272432133729367e-05,
"loss": 3.265,
"step": 7900
},
{
"epoch": 1.47,
"learning_rate": 2.8237894591420877e-05,
"loss": 3.2577,
"step": 7910
},
{
"epoch": 1.48,
"learning_rate": 2.8203357049112387e-05,
"loss": 3.2683,
"step": 7920
},
{
"epoch": 1.48,
"learning_rate": 2.8168819506803894e-05,
"loss": 3.1734,
"step": 7930
},
{
"epoch": 1.48,
"learning_rate": 2.8134281964495408e-05,
"loss": 3.2041,
"step": 7940
},
{
"epoch": 1.48,
"learning_rate": 2.8099744422186918e-05,
"loss": 3.1578,
"step": 7950
},
{
"epoch": 1.48,
"learning_rate": 2.8065206879878432e-05,
"loss": 3.2432,
"step": 7960
},
{
"epoch": 1.49,
"learning_rate": 2.8030669337569942e-05,
"loss": 3.2978,
"step": 7970
},
{
"epoch": 1.49,
"learning_rate": 2.799613179526145e-05,
"loss": 3.201,
"step": 7980
},
{
"epoch": 1.49,
"learning_rate": 2.796159425295296e-05,
"loss": 3.2955,
"step": 7990
},
{
"epoch": 1.49,
"learning_rate": 2.7927056710644473e-05,
"loss": 3.206,
"step": 8000
},
{
"epoch": 1.49,
"learning_rate": 2.7892519168335984e-05,
"loss": 3.1621,
"step": 8010
},
{
"epoch": 1.5,
"learning_rate": 2.785798162602749e-05,
"loss": 3.2604,
"step": 8020
},
{
"epoch": 1.5,
"learning_rate": 2.7823444083719004e-05,
"loss": 3.2261,
"step": 8030
},
{
"epoch": 1.5,
"learning_rate": 2.7788906541410514e-05,
"loss": 3.1247,
"step": 8040
},
{
"epoch": 1.5,
"learning_rate": 2.7754368999102025e-05,
"loss": 3.1877,
"step": 8050
},
{
"epoch": 1.5,
"learning_rate": 2.771983145679354e-05,
"loss": 3.1831,
"step": 8060
},
{
"epoch": 1.5,
"learning_rate": 2.7685293914485045e-05,
"loss": 3.177,
"step": 8070
},
{
"epoch": 1.51,
"learning_rate": 2.7650756372176556e-05,
"loss": 3.1021,
"step": 8080
},
{
"epoch": 1.51,
"learning_rate": 2.761621882986807e-05,
"loss": 3.1824,
"step": 8090
},
{
"epoch": 1.51,
"learning_rate": 2.758168128755958e-05,
"loss": 3.1609,
"step": 8100
},
{
"epoch": 1.51,
"learning_rate": 2.754714374525109e-05,
"loss": 3.2185,
"step": 8110
},
{
"epoch": 1.51,
"learning_rate": 2.7512606202942597e-05,
"loss": 3.1663,
"step": 8120
},
{
"epoch": 1.52,
"learning_rate": 2.747806866063411e-05,
"loss": 3.1356,
"step": 8130
},
{
"epoch": 1.52,
"learning_rate": 2.744353111832562e-05,
"loss": 3.1245,
"step": 8140
},
{
"epoch": 1.52,
"learning_rate": 2.7408993576017135e-05,
"loss": 3.1758,
"step": 8150
},
{
"epoch": 1.52,
"learning_rate": 2.7374456033708645e-05,
"loss": 3.0987,
"step": 8160
},
{
"epoch": 1.52,
"learning_rate": 2.7339918491400152e-05,
"loss": 3.0779,
"step": 8170
},
{
"epoch": 1.53,
"learning_rate": 2.7305380949091662e-05,
"loss": 3.1655,
"step": 8180
},
{
"epoch": 1.53,
"learning_rate": 2.7270843406783176e-05,
"loss": 3.1217,
"step": 8190
},
{
"epoch": 1.53,
"learning_rate": 2.7236305864474686e-05,
"loss": 3.1073,
"step": 8200
},
{
"epoch": 1.53,
"learning_rate": 2.7201768322166193e-05,
"loss": 3.1061,
"step": 8210
},
{
"epoch": 1.53,
"learning_rate": 2.7167230779857707e-05,
"loss": 3.1527,
"step": 8220
},
{
"epoch": 1.53,
"learning_rate": 2.7132693237549217e-05,
"loss": 3.1558,
"step": 8230
},
{
"epoch": 1.54,
"learning_rate": 2.7098155695240728e-05,
"loss": 3.1518,
"step": 8240
},
{
"epoch": 1.54,
"learning_rate": 2.706361815293224e-05,
"loss": 3.1196,
"step": 8250
},
{
"epoch": 1.54,
"learning_rate": 2.7029080610623748e-05,
"loss": 3.1993,
"step": 8260
},
{
"epoch": 1.54,
"learning_rate": 2.699454306831526e-05,
"loss": 3.1277,
"step": 8270
},
{
"epoch": 1.54,
"learning_rate": 2.6960005526006772e-05,
"loss": 3.1874,
"step": 8280
},
{
"epoch": 1.55,
"learning_rate": 2.6925467983698282e-05,
"loss": 3.1914,
"step": 8290
},
{
"epoch": 1.55,
"learning_rate": 2.6890930441389793e-05,
"loss": 3.0947,
"step": 8300
},
{
"epoch": 1.55,
"learning_rate": 2.68563928990813e-05,
"loss": 3.0878,
"step": 8310
},
{
"epoch": 1.55,
"learning_rate": 2.6821855356772813e-05,
"loss": 3.1066,
"step": 8320
},
{
"epoch": 1.55,
"learning_rate": 2.6787317814464324e-05,
"loss": 3.1181,
"step": 8330
},
{
"epoch": 1.56,
"learning_rate": 2.6752780272155837e-05,
"loss": 3.1704,
"step": 8340
},
{
"epoch": 1.56,
"learning_rate": 2.6718242729847348e-05,
"loss": 3.1091,
"step": 8350
},
{
"epoch": 1.56,
"learning_rate": 2.6683705187538855e-05,
"loss": 3.0995,
"step": 8360
},
{
"epoch": 1.56,
"learning_rate": 2.6649167645230365e-05,
"loss": 3.1144,
"step": 8370
},
{
"epoch": 1.56,
"learning_rate": 2.661463010292188e-05,
"loss": 3.0199,
"step": 8380
},
{
"epoch": 1.56,
"learning_rate": 2.658009256061339e-05,
"loss": 3.118,
"step": 8390
},
{
"epoch": 1.57,
"learning_rate": 2.6545555018304896e-05,
"loss": 3.1443,
"step": 8400
},
{
"epoch": 1.57,
"learning_rate": 2.651101747599641e-05,
"loss": 3.1003,
"step": 8410
},
{
"epoch": 1.57,
"learning_rate": 2.647647993368792e-05,
"loss": 3.1032,
"step": 8420
},
{
"epoch": 1.57,
"learning_rate": 2.644194239137943e-05,
"loss": 3.0726,
"step": 8430
},
{
"epoch": 1.57,
"learning_rate": 2.6407404849070944e-05,
"loss": 3.1226,
"step": 8440
},
{
"epoch": 1.58,
"learning_rate": 2.637286730676245e-05,
"loss": 3.1293,
"step": 8450
},
{
"epoch": 1.58,
"learning_rate": 2.633832976445396e-05,
"loss": 2.9997,
"step": 8460
},
{
"epoch": 1.58,
"learning_rate": 2.6303792222145475e-05,
"loss": 3.0414,
"step": 8470
},
{
"epoch": 1.58,
"learning_rate": 2.6269254679836985e-05,
"loss": 3.11,
"step": 8480
},
{
"epoch": 1.58,
"learning_rate": 2.6234717137528496e-05,
"loss": 3.1564,
"step": 8490
},
{
"epoch": 1.59,
"learning_rate": 2.6200179595220002e-05,
"loss": 3.0275,
"step": 8500
},
{
"epoch": 1.59,
"learning_rate": 2.6165642052911516e-05,
"loss": 3.1224,
"step": 8510
},
{
"epoch": 1.59,
"learning_rate": 2.6131104510603027e-05,
"loss": 3.089,
"step": 8520
},
{
"epoch": 1.59,
"learning_rate": 2.609656696829454e-05,
"loss": 2.9951,
"step": 8530
},
{
"epoch": 1.59,
"learning_rate": 2.6062029425986044e-05,
"loss": 3.0263,
"step": 8540
},
{
"epoch": 1.59,
"learning_rate": 2.6027491883677557e-05,
"loss": 3.0849,
"step": 8550
},
{
"epoch": 1.6,
"learning_rate": 2.5992954341369068e-05,
"loss": 3.0331,
"step": 8560
},
{
"epoch": 1.6,
"learning_rate": 2.595841679906058e-05,
"loss": 3.0678,
"step": 8570
},
{
"epoch": 1.6,
"learning_rate": 2.5923879256752092e-05,
"loss": 3.0979,
"step": 8580
},
{
"epoch": 1.6,
"learning_rate": 2.58893417144436e-05,
"loss": 3.1191,
"step": 8590
},
{
"epoch": 1.6,
"learning_rate": 2.5854804172135112e-05,
"loss": 3.0135,
"step": 8600
},
{
"epoch": 1.61,
"learning_rate": 2.5820266629826623e-05,
"loss": 3.0578,
"step": 8610
},
{
"epoch": 1.61,
"learning_rate": 2.5785729087518133e-05,
"loss": 3.1627,
"step": 8620
},
{
"epoch": 1.61,
"learning_rate": 2.5751191545209647e-05,
"loss": 3.0275,
"step": 8630
},
{
"epoch": 1.61,
"learning_rate": 2.5716654002901154e-05,
"loss": 2.9919,
"step": 8640
},
{
"epoch": 1.61,
"learning_rate": 2.5682116460592664e-05,
"loss": 3.0629,
"step": 8650
},
{
"epoch": 1.61,
"learning_rate": 2.5647578918284178e-05,
"loss": 3.0972,
"step": 8660
},
{
"epoch": 1.62,
"learning_rate": 2.5613041375975688e-05,
"loss": 3.1405,
"step": 8670
},
{
"epoch": 1.62,
"learning_rate": 2.5578503833667202e-05,
"loss": 3.0783,
"step": 8680
},
{
"epoch": 1.62,
"learning_rate": 2.5543966291358705e-05,
"loss": 3.0817,
"step": 8690
},
{
"epoch": 1.62,
"learning_rate": 2.550942874905022e-05,
"loss": 3.0429,
"step": 8700
},
{
"epoch": 1.62,
"learning_rate": 2.547489120674173e-05,
"loss": 3.0268,
"step": 8710
},
{
"epoch": 1.63,
"learning_rate": 2.5440353664433243e-05,
"loss": 3.0695,
"step": 8720
},
{
"epoch": 1.63,
"learning_rate": 2.5405816122124747e-05,
"loss": 3.0673,
"step": 8730
},
{
"epoch": 1.63,
"learning_rate": 2.537127857981626e-05,
"loss": 2.9537,
"step": 8740
},
{
"epoch": 1.63,
"learning_rate": 2.533674103750777e-05,
"loss": 2.982,
"step": 8750
},
{
"epoch": 1.63,
"learning_rate": 2.5302203495199284e-05,
"loss": 3.0885,
"step": 8760
},
{
"epoch": 1.64,
"learning_rate": 2.5267665952890795e-05,
"loss": 3.0187,
"step": 8770
},
{
"epoch": 1.64,
"learning_rate": 2.52331284105823e-05,
"loss": 2.9979,
"step": 8780
},
{
"epoch": 1.64,
"learning_rate": 2.5198590868273815e-05,
"loss": 2.998,
"step": 8790
},
{
"epoch": 1.64,
"learning_rate": 2.5164053325965326e-05,
"loss": 2.9708,
"step": 8800
},
{
"epoch": 1.64,
"learning_rate": 2.5129515783656836e-05,
"loss": 2.9856,
"step": 8810
},
{
"epoch": 1.64,
"learning_rate": 2.509497824134835e-05,
"loss": 3.0117,
"step": 8820
},
{
"epoch": 1.65,
"learning_rate": 2.5060440699039856e-05,
"loss": 3.0323,
"step": 8830
},
{
"epoch": 1.65,
"learning_rate": 2.5025903156731367e-05,
"loss": 3.0227,
"step": 8840
},
{
"epoch": 1.65,
"learning_rate": 2.499136561442288e-05,
"loss": 3.0074,
"step": 8850
},
{
"epoch": 1.65,
"learning_rate": 2.4956828072114387e-05,
"loss": 3.0091,
"step": 8860
},
{
"epoch": 1.65,
"learning_rate": 2.49222905298059e-05,
"loss": 3.1571,
"step": 8870
},
{
"epoch": 1.66,
"learning_rate": 2.488775298749741e-05,
"loss": 3.0275,
"step": 8880
},
{
"epoch": 1.66,
"learning_rate": 2.4853215445188922e-05,
"loss": 3.0307,
"step": 8890
},
{
"epoch": 1.66,
"learning_rate": 2.4818677902880432e-05,
"loss": 2.9366,
"step": 8900
},
{
"epoch": 1.66,
"learning_rate": 2.4784140360571942e-05,
"loss": 2.9924,
"step": 8910
},
{
"epoch": 1.66,
"learning_rate": 2.4749602818263453e-05,
"loss": 3.0313,
"step": 8920
},
{
"epoch": 1.67,
"learning_rate": 2.4715065275954966e-05,
"loss": 2.9134,
"step": 8930
},
{
"epoch": 1.67,
"learning_rate": 2.4680527733646473e-05,
"loss": 2.9929,
"step": 8940
},
{
"epoch": 1.67,
"learning_rate": 2.4645990191337987e-05,
"loss": 2.9983,
"step": 8950
},
{
"epoch": 1.67,
"learning_rate": 2.4611452649029494e-05,
"loss": 2.9756,
"step": 8960
},
{
"epoch": 1.67,
"learning_rate": 2.4576915106721008e-05,
"loss": 2.9688,
"step": 8970
},
{
"epoch": 1.67,
"learning_rate": 2.4542377564412518e-05,
"loss": 2.9359,
"step": 8980
},
{
"epoch": 1.68,
"learning_rate": 2.4507840022104028e-05,
"loss": 3.0948,
"step": 8990
},
{
"epoch": 1.68,
"learning_rate": 2.447330247979554e-05,
"loss": 2.9085,
"step": 9000
},
{
"epoch": 1.68,
"learning_rate": 2.443876493748705e-05,
"loss": 3.053,
"step": 9010
},
{
"epoch": 1.68,
"learning_rate": 2.4404227395178563e-05,
"loss": 3.0007,
"step": 9020
},
{
"epoch": 1.68,
"learning_rate": 2.436968985287007e-05,
"loss": 2.9837,
"step": 9030
},
{
"epoch": 1.69,
"learning_rate": 2.4335152310561583e-05,
"loss": 2.9764,
"step": 9040
},
{
"epoch": 1.69,
"learning_rate": 2.430061476825309e-05,
"loss": 2.955,
"step": 9050
},
{
"epoch": 1.69,
"learning_rate": 2.4266077225944604e-05,
"loss": 2.9645,
"step": 9060
},
{
"epoch": 1.69,
"learning_rate": 2.4231539683636114e-05,
"loss": 3.0081,
"step": 9070
},
{
"epoch": 1.69,
"learning_rate": 2.4197002141327624e-05,
"loss": 3.0566,
"step": 9080
},
{
"epoch": 1.7,
"learning_rate": 2.4162464599019135e-05,
"loss": 2.8261,
"step": 9090
},
{
"epoch": 1.7,
"learning_rate": 2.4127927056710645e-05,
"loss": 2.9972,
"step": 9100
},
{
"epoch": 1.7,
"learning_rate": 2.4093389514402155e-05,
"loss": 2.9635,
"step": 9110
},
{
"epoch": 1.7,
"learning_rate": 2.4058851972093666e-05,
"loss": 2.9686,
"step": 9120
},
{
"epoch": 1.7,
"learning_rate": 2.4024314429785176e-05,
"loss": 2.952,
"step": 9130
},
{
"epoch": 1.7,
"learning_rate": 2.398977688747669e-05,
"loss": 2.9526,
"step": 9140
},
{
"epoch": 1.71,
"learning_rate": 2.3955239345168197e-05,
"loss": 3.0147,
"step": 9150
},
{
"epoch": 1.71,
"learning_rate": 2.392070180285971e-05,
"loss": 2.8927,
"step": 9160
},
{
"epoch": 1.71,
"learning_rate": 2.388616426055122e-05,
"loss": 2.8888,
"step": 9170
},
{
"epoch": 1.71,
"learning_rate": 2.385162671824273e-05,
"loss": 2.9943,
"step": 9180
},
{
"epoch": 1.71,
"learning_rate": 2.381708917593424e-05,
"loss": 2.8836,
"step": 9190
},
{
"epoch": 1.72,
"learning_rate": 2.378255163362575e-05,
"loss": 3.0489,
"step": 9200
},
{
"epoch": 1.72,
"learning_rate": 2.3748014091317265e-05,
"loss": 3.009,
"step": 9210
},
{
"epoch": 1.72,
"learning_rate": 2.3713476549008772e-05,
"loss": 2.8603,
"step": 9220
},
{
"epoch": 1.72,
"learning_rate": 2.3678939006700286e-05,
"loss": 2.9036,
"step": 9230
},
{
"epoch": 1.72,
"learning_rate": 2.3644401464391793e-05,
"loss": 2.9626,
"step": 9240
},
{
"epoch": 1.72,
"learning_rate": 2.3609863922083307e-05,
"loss": 2.9827,
"step": 9250
},
{
"epoch": 1.73,
"learning_rate": 2.3575326379774817e-05,
"loss": 3.0024,
"step": 9260
},
{
"epoch": 1.73,
"learning_rate": 2.3540788837466327e-05,
"loss": 2.9592,
"step": 9270
},
{
"epoch": 1.73,
"learning_rate": 2.3506251295157838e-05,
"loss": 2.9028,
"step": 9280
},
{
"epoch": 1.73,
"learning_rate": 2.3471713752849348e-05,
"loss": 2.8719,
"step": 9290
},
{
"epoch": 1.73,
"learning_rate": 2.3437176210540858e-05,
"loss": 2.9314,
"step": 9300
},
{
"epoch": 1.74,
"learning_rate": 2.340263866823237e-05,
"loss": 3.0222,
"step": 9310
},
{
"epoch": 1.74,
"learning_rate": 2.336810112592388e-05,
"loss": 2.9664,
"step": 9320
},
{
"epoch": 1.74,
"learning_rate": 2.3333563583615393e-05,
"loss": 2.8377,
"step": 9330
},
{
"epoch": 1.74,
"learning_rate": 2.32990260413069e-05,
"loss": 2.9357,
"step": 9340
},
{
"epoch": 1.74,
"learning_rate": 2.3264488498998413e-05,
"loss": 2.8697,
"step": 9350
},
{
"epoch": 1.75,
"learning_rate": 2.3229950956689923e-05,
"loss": 2.909,
"step": 9360
},
{
"epoch": 1.75,
"learning_rate": 2.3195413414381434e-05,
"loss": 2.9791,
"step": 9370
},
{
"epoch": 1.75,
"learning_rate": 2.3160875872072944e-05,
"loss": 2.9093,
"step": 9380
},
{
"epoch": 1.75,
"learning_rate": 2.3126338329764454e-05,
"loss": 2.843,
"step": 9390
},
{
"epoch": 1.75,
"learning_rate": 2.3091800787455968e-05,
"loss": 2.889,
"step": 9400
},
{
"epoch": 1.75,
"learning_rate": 2.3057263245147475e-05,
"loss": 2.8633,
"step": 9410
},
{
"epoch": 1.76,
"learning_rate": 2.302272570283899e-05,
"loss": 2.9043,
"step": 9420
},
{
"epoch": 1.76,
"learning_rate": 2.2988188160530496e-05,
"loss": 2.8618,
"step": 9430
},
{
"epoch": 1.76,
"learning_rate": 2.295365061822201e-05,
"loss": 2.8755,
"step": 9440
},
{
"epoch": 1.76,
"learning_rate": 2.291911307591352e-05,
"loss": 2.8721,
"step": 9450
},
{
"epoch": 1.76,
"learning_rate": 2.288457553360503e-05,
"loss": 2.926,
"step": 9460
},
{
"epoch": 1.77,
"learning_rate": 2.285003799129654e-05,
"loss": 2.8687,
"step": 9470
},
{
"epoch": 1.77,
"learning_rate": 2.281550044898805e-05,
"loss": 2.9855,
"step": 9480
},
{
"epoch": 1.77,
"learning_rate": 2.278096290667956e-05,
"loss": 2.7932,
"step": 9490
},
{
"epoch": 1.77,
"learning_rate": 2.274642536437107e-05,
"loss": 2.8963,
"step": 9500
},
{
"epoch": 1.77,
"learning_rate": 2.271188782206258e-05,
"loss": 2.886,
"step": 9510
},
{
"epoch": 1.78,
"learning_rate": 2.2677350279754095e-05,
"loss": 2.8421,
"step": 9520
},
{
"epoch": 1.78,
"learning_rate": 2.2642812737445606e-05,
"loss": 2.9048,
"step": 9530
},
{
"epoch": 1.78,
"learning_rate": 2.2608275195137116e-05,
"loss": 2.8581,
"step": 9540
},
{
"epoch": 1.78,
"learning_rate": 2.2573737652828626e-05,
"loss": 2.8165,
"step": 9550
},
{
"epoch": 1.78,
"learning_rate": 2.2539200110520137e-05,
"loss": 2.8677,
"step": 9560
},
{
"epoch": 1.78,
"learning_rate": 2.2504662568211647e-05,
"loss": 2.7541,
"step": 9570
},
{
"epoch": 1.79,
"learning_rate": 2.2470125025903157e-05,
"loss": 2.907,
"step": 9580
},
{
"epoch": 1.79,
"learning_rate": 2.243558748359467e-05,
"loss": 2.9712,
"step": 9590
},
{
"epoch": 1.79,
"learning_rate": 2.2401049941286178e-05,
"loss": 2.945,
"step": 9600
},
{
"epoch": 1.79,
"learning_rate": 2.236651239897769e-05,
"loss": 2.9733,
"step": 9610
},
{
"epoch": 1.79,
"learning_rate": 2.23319748566692e-05,
"loss": 2.7933,
"step": 9620
},
{
"epoch": 1.8,
"learning_rate": 2.2297437314360712e-05,
"loss": 2.9037,
"step": 9630
},
{
"epoch": 1.8,
"learning_rate": 2.2262899772052222e-05,
"loss": 2.8159,
"step": 9640
},
{
"epoch": 1.8,
"learning_rate": 2.2228362229743733e-05,
"loss": 2.9867,
"step": 9650
},
{
"epoch": 1.8,
"learning_rate": 2.2193824687435243e-05,
"loss": 2.7537,
"step": 9660
},
{
"epoch": 1.8,
"learning_rate": 2.2159287145126753e-05,
"loss": 2.859,
"step": 9670
},
{
"epoch": 1.81,
"learning_rate": 2.2124749602818264e-05,
"loss": 2.8736,
"step": 9680
},
{
"epoch": 1.81,
"learning_rate": 2.2090212060509774e-05,
"loss": 2.8086,
"step": 9690
},
{
"epoch": 1.81,
"learning_rate": 2.2055674518201284e-05,
"loss": 2.913,
"step": 9700
},
{
"epoch": 1.81,
"learning_rate": 2.2021136975892798e-05,
"loss": 2.7818,
"step": 9710
},
{
"epoch": 1.81,
"learning_rate": 2.198659943358431e-05,
"loss": 2.8732,
"step": 9720
},
{
"epoch": 1.81,
"learning_rate": 2.195206189127582e-05,
"loss": 2.8173,
"step": 9730
},
{
"epoch": 1.82,
"learning_rate": 2.191752434896733e-05,
"loss": 2.8845,
"step": 9740
},
{
"epoch": 1.82,
"learning_rate": 2.188298680665884e-05,
"loss": 2.8686,
"step": 9750
},
{
"epoch": 1.82,
"learning_rate": 2.184844926435035e-05,
"loss": 2.9331,
"step": 9760
},
{
"epoch": 1.82,
"learning_rate": 2.181391172204186e-05,
"loss": 2.8525,
"step": 9770
},
{
"epoch": 1.82,
"learning_rate": 2.1779374179733374e-05,
"loss": 2.8304,
"step": 9780
},
{
"epoch": 1.83,
"learning_rate": 2.174483663742488e-05,
"loss": 2.7739,
"step": 9790
},
{
"epoch": 1.83,
"learning_rate": 2.1710299095116394e-05,
"loss": 2.9298,
"step": 9800
},
{
"epoch": 1.83,
"learning_rate": 2.16757615528079e-05,
"loss": 2.824,
"step": 9810
},
{
"epoch": 1.83,
"learning_rate": 2.1641224010499415e-05,
"loss": 2.7872,
"step": 9820
},
{
"epoch": 1.83,
"learning_rate": 2.1606686468190925e-05,
"loss": 2.7732,
"step": 9830
},
{
"epoch": 1.83,
"learning_rate": 2.1572148925882436e-05,
"loss": 2.7426,
"step": 9840
},
{
"epoch": 1.84,
"learning_rate": 2.1537611383573946e-05,
"loss": 2.82,
"step": 9850
},
{
"epoch": 1.84,
"learning_rate": 2.1503073841265456e-05,
"loss": 2.7538,
"step": 9860
},
{
"epoch": 1.84,
"learning_rate": 2.1468536298956967e-05,
"loss": 2.7856,
"step": 9870
},
{
"epoch": 1.84,
"learning_rate": 2.1433998756648477e-05,
"loss": 2.7411,
"step": 9880
},
{
"epoch": 1.84,
"learning_rate": 2.1399461214339987e-05,
"loss": 2.7934,
"step": 9890
},
{
"epoch": 1.85,
"learning_rate": 2.13649236720315e-05,
"loss": 2.8427,
"step": 9900
},
{
"epoch": 1.85,
"learning_rate": 2.133038612972301e-05,
"loss": 2.7685,
"step": 9910
},
{
"epoch": 1.85,
"learning_rate": 2.129584858741452e-05,
"loss": 2.7284,
"step": 9920
},
{
"epoch": 1.85,
"learning_rate": 2.1261311045106032e-05,
"loss": 2.8034,
"step": 9930
},
{
"epoch": 1.85,
"learning_rate": 2.1226773502797542e-05,
"loss": 2.8175,
"step": 9940
},
{
"epoch": 1.86,
"learning_rate": 2.1192235960489052e-05,
"loss": 2.7895,
"step": 9950
},
{
"epoch": 1.86,
"learning_rate": 2.1157698418180563e-05,
"loss": 2.8621,
"step": 9960
},
{
"epoch": 1.86,
"learning_rate": 2.1123160875872076e-05,
"loss": 2.7962,
"step": 9970
},
{
"epoch": 1.86,
"learning_rate": 2.1088623333563583e-05,
"loss": 2.7863,
"step": 9980
},
{
"epoch": 1.86,
"learning_rate": 2.1054085791255097e-05,
"loss": 2.7884,
"step": 9990
},
{
"epoch": 1.86,
"learning_rate": 2.1019548248946604e-05,
"loss": 2.7566,
"step": 10000
},
{
"epoch": 1.87,
"learning_rate": 2.0985010706638118e-05,
"loss": 2.8012,
"step": 10010
},
{
"epoch": 1.87,
"learning_rate": 2.0950473164329625e-05,
"loss": 2.8358,
"step": 10020
},
{
"epoch": 1.87,
"learning_rate": 2.091593562202114e-05,
"loss": 2.8367,
"step": 10030
},
{
"epoch": 1.87,
"learning_rate": 2.088139807971265e-05,
"loss": 2.7646,
"step": 10040
},
{
"epoch": 1.87,
"learning_rate": 2.084686053740416e-05,
"loss": 2.8934,
"step": 10050
},
{
"epoch": 1.88,
"learning_rate": 2.081232299509567e-05,
"loss": 2.8152,
"step": 10060
},
{
"epoch": 1.88,
"learning_rate": 2.077778545278718e-05,
"loss": 2.7449,
"step": 10070
},
{
"epoch": 1.88,
"learning_rate": 2.074324791047869e-05,
"loss": 2.7978,
"step": 10080
},
{
"epoch": 1.88,
"learning_rate": 2.0708710368170204e-05,
"loss": 2.7717,
"step": 10090
},
{
"epoch": 1.88,
"learning_rate": 2.0674172825861714e-05,
"loss": 2.8873,
"step": 10100
},
{
"epoch": 1.89,
"learning_rate": 2.0639635283553224e-05,
"loss": 2.8055,
"step": 10110
},
{
"epoch": 1.89,
"learning_rate": 2.0605097741244735e-05,
"loss": 2.6969,
"step": 10120
},
{
"epoch": 1.89,
"learning_rate": 2.0570560198936245e-05,
"loss": 2.7373,
"step": 10130
},
{
"epoch": 1.89,
"learning_rate": 2.0536022656627755e-05,
"loss": 2.7297,
"step": 10140
},
{
"epoch": 1.89,
"learning_rate": 2.0501485114319265e-05,
"loss": 2.824,
"step": 10150
},
{
"epoch": 1.89,
"learning_rate": 2.046694757201078e-05,
"loss": 2.6395,
"step": 10160
},
{
"epoch": 1.9,
"learning_rate": 2.0432410029702286e-05,
"loss": 2.7764,
"step": 10170
},
{
"epoch": 1.9,
"learning_rate": 2.03978724873938e-05,
"loss": 2.7993,
"step": 10180
},
{
"epoch": 1.9,
"learning_rate": 2.0363334945085307e-05,
"loss": 2.7092,
"step": 10190
},
{
"epoch": 1.9,
"learning_rate": 2.032879740277682e-05,
"loss": 2.8668,
"step": 10200
},
{
"epoch": 1.9,
"learning_rate": 2.0294259860468327e-05,
"loss": 2.7189,
"step": 10210
},
{
"epoch": 1.91,
"learning_rate": 2.025972231815984e-05,
"loss": 2.6979,
"step": 10220
},
{
"epoch": 1.91,
"learning_rate": 2.022518477585135e-05,
"loss": 2.7583,
"step": 10230
},
{
"epoch": 1.91,
"learning_rate": 2.0190647233542862e-05,
"loss": 2.8106,
"step": 10240
},
{
"epoch": 1.91,
"learning_rate": 2.0156109691234372e-05,
"loss": 2.7257,
"step": 10250
},
{
"epoch": 1.91,
"learning_rate": 2.0121572148925882e-05,
"loss": 2.6996,
"step": 10260
},
{
"epoch": 1.92,
"learning_rate": 2.0087034606617393e-05,
"loss": 2.6771,
"step": 10270
},
{
"epoch": 1.92,
"learning_rate": 2.0052497064308906e-05,
"loss": 2.8394,
"step": 10280
},
{
"epoch": 1.92,
"learning_rate": 2.0017959522000417e-05,
"loss": 2.7741,
"step": 10290
},
{
"epoch": 1.92,
"learning_rate": 1.9983421979691927e-05,
"loss": 2.7705,
"step": 10300
},
{
"epoch": 1.92,
"learning_rate": 1.9948884437383437e-05,
"loss": 2.7212,
"step": 10310
},
{
"epoch": 1.92,
"learning_rate": 1.9914346895074948e-05,
"loss": 2.773,
"step": 10320
},
{
"epoch": 1.93,
"learning_rate": 1.9879809352766458e-05,
"loss": 2.7534,
"step": 10330
},
{
"epoch": 1.93,
"learning_rate": 1.9845271810457968e-05,
"loss": 2.7153,
"step": 10340
},
{
"epoch": 1.93,
"learning_rate": 1.9810734268149482e-05,
"loss": 2.7342,
"step": 10350
},
{
"epoch": 1.93,
"learning_rate": 1.977619672584099e-05,
"loss": 2.7553,
"step": 10360
},
{
"epoch": 1.93,
"learning_rate": 1.9741659183532503e-05,
"loss": 2.7939,
"step": 10370
},
{
"epoch": 1.94,
"learning_rate": 1.970712164122401e-05,
"loss": 2.7801,
"step": 10380
},
{
"epoch": 1.94,
"learning_rate": 1.9672584098915523e-05,
"loss": 2.786,
"step": 10390
},
{
"epoch": 1.94,
"learning_rate": 1.963804655660703e-05,
"loss": 2.7158,
"step": 10400
},
{
"epoch": 1.94,
"learning_rate": 1.9603509014298544e-05,
"loss": 2.7806,
"step": 10410
},
{
"epoch": 1.94,
"learning_rate": 1.9568971471990054e-05,
"loss": 2.7648,
"step": 10420
},
{
"epoch": 1.94,
"learning_rate": 1.9534433929681564e-05,
"loss": 2.8309,
"step": 10430
},
{
"epoch": 1.95,
"learning_rate": 1.9499896387373075e-05,
"loss": 2.6813,
"step": 10440
},
{
"epoch": 1.95,
"learning_rate": 1.9465358845064585e-05,
"loss": 2.6845,
"step": 10450
},
{
"epoch": 1.95,
"learning_rate": 1.9430821302756095e-05,
"loss": 2.7858,
"step": 10460
},
{
"epoch": 1.95,
"learning_rate": 1.939628376044761e-05,
"loss": 2.8503,
"step": 10470
},
{
"epoch": 1.95,
"learning_rate": 1.936174621813912e-05,
"loss": 2.7545,
"step": 10480
},
{
"epoch": 1.96,
"learning_rate": 1.932720867583063e-05,
"loss": 2.6919,
"step": 10490
},
{
"epoch": 1.96,
"learning_rate": 1.929267113352214e-05,
"loss": 2.8175,
"step": 10500
},
{
"epoch": 1.96,
"learning_rate": 1.925813359121365e-05,
"loss": 2.8181,
"step": 10510
},
{
"epoch": 1.96,
"learning_rate": 1.922359604890516e-05,
"loss": 2.7865,
"step": 10520
},
{
"epoch": 1.96,
"learning_rate": 1.918905850659667e-05,
"loss": 2.8738,
"step": 10530
},
{
"epoch": 1.97,
"learning_rate": 1.9154520964288185e-05,
"loss": 2.7771,
"step": 10540
},
{
"epoch": 1.97,
"learning_rate": 1.911998342197969e-05,
"loss": 2.6842,
"step": 10550
},
{
"epoch": 1.97,
"learning_rate": 1.9085445879671205e-05,
"loss": 2.7889,
"step": 10560
},
{
"epoch": 1.97,
"learning_rate": 1.9050908337362712e-05,
"loss": 2.7784,
"step": 10570
},
{
"epoch": 1.97,
"learning_rate": 1.9016370795054226e-05,
"loss": 2.746,
"step": 10580
},
{
"epoch": 1.97,
"learning_rate": 1.8981833252745733e-05,
"loss": 2.733,
"step": 10590
},
{
"epoch": 1.98,
"learning_rate": 1.8947295710437247e-05,
"loss": 2.8084,
"step": 10600
},
{
"epoch": 1.98,
"learning_rate": 1.8912758168128757e-05,
"loss": 2.6748,
"step": 10610
},
{
"epoch": 1.98,
"learning_rate": 1.8878220625820267e-05,
"loss": 2.6682,
"step": 10620
},
{
"epoch": 1.98,
"learning_rate": 1.8843683083511778e-05,
"loss": 2.7535,
"step": 10630
},
{
"epoch": 1.98,
"learning_rate": 1.8809145541203288e-05,
"loss": 2.8174,
"step": 10640
},
{
"epoch": 1.99,
"learning_rate": 1.8774607998894798e-05,
"loss": 2.7326,
"step": 10650
},
{
"epoch": 1.99,
"learning_rate": 1.874007045658631e-05,
"loss": 2.6306,
"step": 10660
},
{
"epoch": 1.99,
"learning_rate": 1.8705532914277822e-05,
"loss": 2.6619,
"step": 10670
},
{
"epoch": 1.99,
"learning_rate": 1.8670995371969333e-05,
"loss": 2.6543,
"step": 10680
},
{
"epoch": 1.99,
"learning_rate": 1.8636457829660843e-05,
"loss": 2.7638,
"step": 10690
},
{
"epoch": 2.0,
"learning_rate": 1.8601920287352353e-05,
"loss": 2.7623,
"step": 10700
},
{
"epoch": 2.0,
"learning_rate": 1.8567382745043863e-05,
"loss": 2.7837,
"step": 10710
},
{
"epoch": 2.0,
"learning_rate": 1.8532845202735374e-05,
"loss": 2.694,
"step": 10720
},
{
"epoch": 2.0,
"eval_accuracy": 0.5199166098494782,
"eval_loss": 2.258553981781006,
"eval_runtime": 8350.0446,
"eval_samples_per_second": 9.134,
"eval_steps_per_second": 0.286,
"step": 10725
},
{
"epoch": 2.0,
"learning_rate": 1.8498307660426887e-05,
"loss": 2.7086,
"step": 10730
},
{
"epoch": 2.0,
"learning_rate": 1.8463770118118394e-05,
"loss": 2.5868,
"step": 10740
},
{
"epoch": 2.0,
"learning_rate": 1.8429232575809908e-05,
"loss": 2.6158,
"step": 10750
},
{
"epoch": 2.01,
"learning_rate": 1.8394695033501415e-05,
"loss": 2.5816,
"step": 10760
},
{
"epoch": 2.01,
"learning_rate": 1.836015749119293e-05,
"loss": 2.6675,
"step": 10770
},
{
"epoch": 2.01,
"learning_rate": 1.8325619948884436e-05,
"loss": 2.6007,
"step": 10780
},
{
"epoch": 2.01,
"learning_rate": 1.829108240657595e-05,
"loss": 2.7189,
"step": 10790
},
{
"epoch": 2.01,
"learning_rate": 1.825654486426746e-05,
"loss": 2.6269,
"step": 10800
},
{
"epoch": 2.02,
"learning_rate": 1.822200732195897e-05,
"loss": 2.6914,
"step": 10810
},
{
"epoch": 2.02,
"learning_rate": 1.818746977965048e-05,
"loss": 2.6807,
"step": 10820
},
{
"epoch": 2.02,
"learning_rate": 1.815293223734199e-05,
"loss": 2.6789,
"step": 10830
},
{
"epoch": 2.02,
"learning_rate": 1.81183946950335e-05,
"loss": 2.6979,
"step": 10840
},
{
"epoch": 2.02,
"learning_rate": 1.808385715272501e-05,
"loss": 2.7043,
"step": 10850
},
{
"epoch": 2.03,
"learning_rate": 1.8049319610416525e-05,
"loss": 2.5784,
"step": 10860
},
{
"epoch": 2.03,
"learning_rate": 1.8014782068108035e-05,
"loss": 2.765,
"step": 10870
},
{
"epoch": 2.03,
"learning_rate": 1.7980244525799546e-05,
"loss": 2.7079,
"step": 10880
},
{
"epoch": 2.03,
"learning_rate": 1.7945706983491056e-05,
"loss": 2.5952,
"step": 10890
},
{
"epoch": 2.03,
"learning_rate": 1.7911169441182566e-05,
"loss": 2.5811,
"step": 10900
},
{
"epoch": 2.03,
"learning_rate": 1.7876631898874077e-05,
"loss": 2.5979,
"step": 10910
},
{
"epoch": 2.04,
"learning_rate": 1.784209435656559e-05,
"loss": 2.5244,
"step": 10920
},
{
"epoch": 2.04,
"learning_rate": 1.7807556814257097e-05,
"loss": 2.5945,
"step": 10930
},
{
"epoch": 2.04,
"learning_rate": 1.777301927194861e-05,
"loss": 2.6323,
"step": 10940
},
{
"epoch": 2.04,
"learning_rate": 1.7738481729640118e-05,
"loss": 2.7229,
"step": 10950
},
{
"epoch": 2.04,
"learning_rate": 1.770394418733163e-05,
"loss": 2.6336,
"step": 10960
},
{
"epoch": 2.05,
"learning_rate": 1.766940664502314e-05,
"loss": 2.7559,
"step": 10970
},
{
"epoch": 2.05,
"learning_rate": 1.7634869102714652e-05,
"loss": 2.5932,
"step": 10980
},
{
"epoch": 2.05,
"learning_rate": 1.7600331560406162e-05,
"loss": 2.6705,
"step": 10990
},
{
"epoch": 2.05,
"learning_rate": 1.7565794018097673e-05,
"loss": 2.6559,
"step": 11000
},
{
"epoch": 2.05,
"learning_rate": 1.7531256475789183e-05,
"loss": 2.6464,
"step": 11010
},
{
"epoch": 2.05,
"learning_rate": 1.7496718933480693e-05,
"loss": 2.612,
"step": 11020
},
{
"epoch": 2.06,
"learning_rate": 1.7462181391172207e-05,
"loss": 2.6383,
"step": 11030
},
{
"epoch": 2.06,
"learning_rate": 1.7427643848863714e-05,
"loss": 2.6744,
"step": 11040
},
{
"epoch": 2.06,
"learning_rate": 1.7393106306555228e-05,
"loss": 2.6917,
"step": 11050
},
{
"epoch": 2.06,
"learning_rate": 1.7358568764246738e-05,
"loss": 2.6124,
"step": 11060
},
{
"epoch": 2.06,
"learning_rate": 1.732403122193825e-05,
"loss": 2.7056,
"step": 11070
},
{
"epoch": 2.07,
"learning_rate": 1.728949367962976e-05,
"loss": 2.666,
"step": 11080
},
{
"epoch": 2.07,
"learning_rate": 1.725495613732127e-05,
"loss": 2.5992,
"step": 11090
},
{
"epoch": 2.07,
"learning_rate": 1.722041859501278e-05,
"loss": 2.6046,
"step": 11100
},
{
"epoch": 2.07,
"learning_rate": 1.718588105270429e-05,
"loss": 2.5876,
"step": 11110
},
{
"epoch": 2.07,
"learning_rate": 1.71513435103958e-05,
"loss": 2.5939,
"step": 11120
},
{
"epoch": 2.08,
"learning_rate": 1.7116805968087314e-05,
"loss": 2.6956,
"step": 11130
},
{
"epoch": 2.08,
"learning_rate": 1.708226842577882e-05,
"loss": 2.6503,
"step": 11140
},
{
"epoch": 2.08,
"learning_rate": 1.7047730883470334e-05,
"loss": 2.606,
"step": 11150
},
{
"epoch": 2.08,
"learning_rate": 1.701319334116184e-05,
"loss": 2.6121,
"step": 11160
},
{
"epoch": 2.08,
"learning_rate": 1.6978655798853355e-05,
"loss": 2.5824,
"step": 11170
},
{
"epoch": 2.08,
"learning_rate": 1.6944118256544865e-05,
"loss": 2.5693,
"step": 11180
},
{
"epoch": 2.09,
"learning_rate": 1.6909580714236376e-05,
"loss": 2.6992,
"step": 11190
},
{
"epoch": 2.09,
"learning_rate": 1.6875043171927886e-05,
"loss": 2.6964,
"step": 11200
},
{
"epoch": 2.09,
"learning_rate": 1.6840505629619396e-05,
"loss": 2.5641,
"step": 11210
},
{
"epoch": 2.09,
"learning_rate": 1.680596808731091e-05,
"loss": 2.687,
"step": 11220
},
{
"epoch": 2.09,
"learning_rate": 1.6771430545002417e-05,
"loss": 2.7424,
"step": 11230
},
{
"epoch": 2.1,
"learning_rate": 1.673689300269393e-05,
"loss": 2.5703,
"step": 11240
},
{
"epoch": 2.1,
"learning_rate": 1.670235546038544e-05,
"loss": 2.6801,
"step": 11250
},
{
"epoch": 2.1,
"learning_rate": 1.666781791807695e-05,
"loss": 2.6199,
"step": 11260
},
{
"epoch": 2.1,
"learning_rate": 1.663328037576846e-05,
"loss": 2.5185,
"step": 11270
},
{
"epoch": 2.1,
"learning_rate": 1.6598742833459972e-05,
"loss": 2.6805,
"step": 11280
},
{
"epoch": 2.11,
"learning_rate": 1.6564205291151482e-05,
"loss": 2.6054,
"step": 11290
},
{
"epoch": 2.11,
"learning_rate": 1.6529667748842992e-05,
"loss": 2.5373,
"step": 11300
},
{
"epoch": 2.11,
"learning_rate": 1.6495130206534503e-05,
"loss": 2.6074,
"step": 11310
},
{
"epoch": 2.11,
"learning_rate": 1.6460592664226016e-05,
"loss": 2.5797,
"step": 11320
},
{
"epoch": 2.11,
"learning_rate": 1.6426055121917523e-05,
"loss": 2.6493,
"step": 11330
},
{
"epoch": 2.11,
"learning_rate": 1.6391517579609037e-05,
"loss": 2.5966,
"step": 11340
},
{
"epoch": 2.12,
"learning_rate": 1.6356980037300544e-05,
"loss": 2.6321,
"step": 11350
},
{
"epoch": 2.12,
"learning_rate": 1.6322442494992058e-05,
"loss": 2.6245,
"step": 11360
},
{
"epoch": 2.12,
"learning_rate": 1.6287904952683568e-05,
"loss": 2.6626,
"step": 11370
},
{
"epoch": 2.12,
"learning_rate": 1.6253367410375078e-05,
"loss": 2.6061,
"step": 11380
},
{
"epoch": 2.12,
"learning_rate": 1.621882986806659e-05,
"loss": 2.5854,
"step": 11390
},
{
"epoch": 2.13,
"learning_rate": 1.61842923257581e-05,
"loss": 2.6,
"step": 11400
},
{
"epoch": 2.13,
"learning_rate": 1.6149754783449613e-05,
"loss": 2.5424,
"step": 11410
},
{
"epoch": 2.13,
"learning_rate": 1.611521724114112e-05,
"loss": 2.6849,
"step": 11420
},
{
"epoch": 2.13,
"learning_rate": 1.6080679698832633e-05,
"loss": 2.5638,
"step": 11430
},
{
"epoch": 2.13,
"learning_rate": 1.6046142156524144e-05,
"loss": 2.6181,
"step": 11440
},
{
"epoch": 2.14,
"learning_rate": 1.6011604614215654e-05,
"loss": 2.5725,
"step": 11450
},
{
"epoch": 2.14,
"learning_rate": 1.5977067071907164e-05,
"loss": 2.5491,
"step": 11460
},
{
"epoch": 2.14,
"learning_rate": 1.5942529529598675e-05,
"loss": 2.6698,
"step": 11470
},
{
"epoch": 2.14,
"learning_rate": 1.5907991987290185e-05,
"loss": 2.6591,
"step": 11480
},
{
"epoch": 2.14,
"learning_rate": 1.5873454444981695e-05,
"loss": 2.669,
"step": 11490
},
{
"epoch": 2.14,
"learning_rate": 1.5838916902673205e-05,
"loss": 2.6449,
"step": 11500
},
{
"epoch": 2.15,
"learning_rate": 1.580437936036472e-05,
"loss": 2.6878,
"step": 11510
},
{
"epoch": 2.15,
"learning_rate": 1.5769841818056226e-05,
"loss": 2.5862,
"step": 11520
},
{
"epoch": 2.15,
"learning_rate": 1.573530427574774e-05,
"loss": 2.6372,
"step": 11530
},
{
"epoch": 2.15,
"learning_rate": 1.5700766733439247e-05,
"loss": 2.5786,
"step": 11540
},
{
"epoch": 2.15,
"learning_rate": 1.566622919113076e-05,
"loss": 2.525,
"step": 11550
},
{
"epoch": 2.16,
"learning_rate": 1.563169164882227e-05,
"loss": 2.5454,
"step": 11560
},
{
"epoch": 2.16,
"learning_rate": 1.559715410651378e-05,
"loss": 2.5995,
"step": 11570
},
{
"epoch": 2.16,
"learning_rate": 1.556261656420529e-05,
"loss": 2.5934,
"step": 11580
},
{
"epoch": 2.16,
"learning_rate": 1.5528079021896802e-05,
"loss": 2.5854,
"step": 11590
},
{
"epoch": 2.16,
"learning_rate": 1.5493541479588315e-05,
"loss": 2.4892,
"step": 11600
},
{
"epoch": 2.16,
"learning_rate": 1.5459003937279822e-05,
"loss": 2.5199,
"step": 11610
},
{
"epoch": 2.17,
"learning_rate": 1.5424466394971336e-05,
"loss": 2.6195,
"step": 11620
},
{
"epoch": 2.17,
"learning_rate": 1.5389928852662846e-05,
"loss": 2.5539,
"step": 11630
},
{
"epoch": 2.17,
"learning_rate": 1.5355391310354357e-05,
"loss": 2.4715,
"step": 11640
},
{
"epoch": 2.17,
"learning_rate": 1.5320853768045867e-05,
"loss": 2.636,
"step": 11650
},
{
"epoch": 2.17,
"learning_rate": 1.5286316225737377e-05,
"loss": 2.6113,
"step": 11660
},
{
"epoch": 2.18,
"learning_rate": 1.5251778683428888e-05,
"loss": 2.6125,
"step": 11670
},
{
"epoch": 2.18,
"learning_rate": 1.5217241141120398e-05,
"loss": 2.6328,
"step": 11680
},
{
"epoch": 2.18,
"learning_rate": 1.518270359881191e-05,
"loss": 2.6832,
"step": 11690
},
{
"epoch": 2.18,
"learning_rate": 1.5148166056503422e-05,
"loss": 2.6303,
"step": 11700
},
{
"epoch": 2.18,
"learning_rate": 1.511362851419493e-05,
"loss": 2.5381,
"step": 11710
},
{
"epoch": 2.19,
"learning_rate": 1.5079090971886443e-05,
"loss": 2.5419,
"step": 11720
},
{
"epoch": 2.19,
"learning_rate": 1.5044553429577951e-05,
"loss": 2.5489,
"step": 11730
},
{
"epoch": 2.19,
"learning_rate": 1.5010015887269463e-05,
"loss": 2.5821,
"step": 11740
},
{
"epoch": 2.19,
"learning_rate": 1.4975478344960972e-05,
"loss": 2.5817,
"step": 11750
},
{
"epoch": 2.19,
"learning_rate": 1.4940940802652484e-05,
"loss": 2.5954,
"step": 11760
},
{
"epoch": 2.19,
"learning_rate": 1.4906403260343996e-05,
"loss": 2.5759,
"step": 11770
},
{
"epoch": 2.2,
"learning_rate": 1.4871865718035504e-05,
"loss": 2.4825,
"step": 11780
},
{
"epoch": 2.2,
"learning_rate": 1.4837328175727016e-05,
"loss": 2.6348,
"step": 11790
},
{
"epoch": 2.2,
"learning_rate": 1.4802790633418525e-05,
"loss": 2.5029,
"step": 11800
},
{
"epoch": 2.2,
"learning_rate": 1.4768253091110037e-05,
"loss": 2.5852,
"step": 11810
},
{
"epoch": 2.2,
"learning_rate": 1.4733715548801549e-05,
"loss": 2.5978,
"step": 11820
},
{
"epoch": 2.21,
"learning_rate": 1.4699178006493058e-05,
"loss": 2.5382,
"step": 11830
},
{
"epoch": 2.21,
"learning_rate": 1.466464046418457e-05,
"loss": 2.5815,
"step": 11840
},
{
"epoch": 2.21,
"learning_rate": 1.463010292187608e-05,
"loss": 2.5992,
"step": 11850
},
{
"epoch": 2.21,
"learning_rate": 1.459556537956759e-05,
"loss": 2.5691,
"step": 11860
},
{
"epoch": 2.21,
"learning_rate": 1.45610278372591e-05,
"loss": 2.6351,
"step": 11870
},
{
"epoch": 2.22,
"learning_rate": 1.4526490294950613e-05,
"loss": 2.572,
"step": 11880
},
{
"epoch": 2.22,
"learning_rate": 1.4491952752642125e-05,
"loss": 2.5904,
"step": 11890
},
{
"epoch": 2.22,
"learning_rate": 1.4457415210333633e-05,
"loss": 2.5471,
"step": 11900
},
{
"epoch": 2.22,
"learning_rate": 1.4422877668025145e-05,
"loss": 2.4937,
"step": 11910
},
{
"epoch": 2.22,
"learning_rate": 1.4388340125716654e-05,
"loss": 2.4822,
"step": 11920
},
{
"epoch": 2.22,
"learning_rate": 1.4353802583408166e-05,
"loss": 2.5416,
"step": 11930
},
{
"epoch": 2.23,
"learning_rate": 1.4319265041099675e-05,
"loss": 2.4873,
"step": 11940
},
{
"epoch": 2.23,
"learning_rate": 1.4284727498791187e-05,
"loss": 2.4911,
"step": 11950
},
{
"epoch": 2.23,
"learning_rate": 1.4250189956482699e-05,
"loss": 2.4459,
"step": 11960
},
{
"epoch": 2.23,
"learning_rate": 1.4215652414174207e-05,
"loss": 2.499,
"step": 11970
},
{
"epoch": 2.23,
"learning_rate": 1.418111487186572e-05,
"loss": 2.4586,
"step": 11980
},
{
"epoch": 2.24,
"learning_rate": 1.4146577329557228e-05,
"loss": 2.5339,
"step": 11990
},
{
"epoch": 2.24,
"learning_rate": 1.411203978724874e-05,
"loss": 2.5919,
"step": 12000
},
{
"epoch": 2.24,
"learning_rate": 1.407750224494025e-05,
"loss": 2.5695,
"step": 12010
},
{
"epoch": 2.24,
"learning_rate": 1.404296470263176e-05,
"loss": 2.5624,
"step": 12020
},
{
"epoch": 2.24,
"learning_rate": 1.4008427160323273e-05,
"loss": 2.5149,
"step": 12030
},
{
"epoch": 2.25,
"learning_rate": 1.3973889618014783e-05,
"loss": 2.6278,
"step": 12040
},
{
"epoch": 2.25,
"learning_rate": 1.3939352075706293e-05,
"loss": 2.539,
"step": 12050
},
{
"epoch": 2.25,
"learning_rate": 1.3904814533397803e-05,
"loss": 2.4993,
"step": 12060
},
{
"epoch": 2.25,
"learning_rate": 1.3870276991089315e-05,
"loss": 2.576,
"step": 12070
},
{
"epoch": 2.25,
"learning_rate": 1.3835739448780827e-05,
"loss": 2.5318,
"step": 12080
},
{
"epoch": 2.25,
"learning_rate": 1.3801201906472336e-05,
"loss": 2.5886,
"step": 12090
},
{
"epoch": 2.26,
"learning_rate": 1.3766664364163848e-05,
"loss": 2.4935,
"step": 12100
},
{
"epoch": 2.26,
"learning_rate": 1.3732126821855357e-05,
"loss": 2.4726,
"step": 12110
},
{
"epoch": 2.26,
"learning_rate": 1.3697589279546869e-05,
"loss": 2.5471,
"step": 12120
},
{
"epoch": 2.26,
"learning_rate": 1.3663051737238377e-05,
"loss": 2.5019,
"step": 12130
},
{
"epoch": 2.26,
"learning_rate": 1.362851419492989e-05,
"loss": 2.5326,
"step": 12140
},
{
"epoch": 2.27,
"learning_rate": 1.3593976652621401e-05,
"loss": 2.5554,
"step": 12150
},
{
"epoch": 2.27,
"learning_rate": 1.355943911031291e-05,
"loss": 2.5263,
"step": 12160
},
{
"epoch": 2.27,
"learning_rate": 1.3524901568004422e-05,
"loss": 2.4685,
"step": 12170
},
{
"epoch": 2.27,
"learning_rate": 1.349036402569593e-05,
"loss": 2.5834,
"step": 12180
},
{
"epoch": 2.27,
"learning_rate": 1.3455826483387443e-05,
"loss": 2.4834,
"step": 12190
},
{
"epoch": 2.27,
"learning_rate": 1.3421288941078953e-05,
"loss": 2.5581,
"step": 12200
},
{
"epoch": 2.28,
"learning_rate": 1.3386751398770463e-05,
"loss": 2.4237,
"step": 12210
},
{
"epoch": 2.28,
"learning_rate": 1.3352213856461975e-05,
"loss": 2.5372,
"step": 12220
},
{
"epoch": 2.28,
"learning_rate": 1.3317676314153486e-05,
"loss": 2.5476,
"step": 12230
},
{
"epoch": 2.28,
"learning_rate": 1.3283138771844998e-05,
"loss": 2.5046,
"step": 12240
},
{
"epoch": 2.28,
"learning_rate": 1.3248601229536506e-05,
"loss": 2.5236,
"step": 12250
},
{
"epoch": 2.29,
"learning_rate": 1.3214063687228018e-05,
"loss": 2.4282,
"step": 12260
},
{
"epoch": 2.29,
"learning_rate": 1.317952614491953e-05,
"loss": 2.5041,
"step": 12270
},
{
"epoch": 2.29,
"learning_rate": 1.3144988602611039e-05,
"loss": 2.5305,
"step": 12280
},
{
"epoch": 2.29,
"learning_rate": 1.3110451060302551e-05,
"loss": 2.5322,
"step": 12290
},
{
"epoch": 2.29,
"learning_rate": 1.307591351799406e-05,
"loss": 2.443,
"step": 12300
},
{
"epoch": 2.3,
"learning_rate": 1.3041375975685571e-05,
"loss": 2.5025,
"step": 12310
},
{
"epoch": 2.3,
"learning_rate": 1.300683843337708e-05,
"loss": 2.6194,
"step": 12320
},
{
"epoch": 2.3,
"learning_rate": 1.2972300891068592e-05,
"loss": 2.5422,
"step": 12330
},
{
"epoch": 2.3,
"learning_rate": 1.2937763348760104e-05,
"loss": 2.487,
"step": 12340
},
{
"epoch": 2.3,
"learning_rate": 1.2903225806451613e-05,
"loss": 2.5176,
"step": 12350
},
{
"epoch": 2.3,
"learning_rate": 1.2868688264143125e-05,
"loss": 2.4831,
"step": 12360
},
{
"epoch": 2.31,
"learning_rate": 1.2834150721834633e-05,
"loss": 2.4596,
"step": 12370
},
{
"epoch": 2.31,
"learning_rate": 1.2799613179526145e-05,
"loss": 2.5229,
"step": 12380
},
{
"epoch": 2.31,
"learning_rate": 1.2765075637217656e-05,
"loss": 2.4673,
"step": 12390
},
{
"epoch": 2.31,
"learning_rate": 1.2730538094909166e-05,
"loss": 2.4803,
"step": 12400
},
{
"epoch": 2.31,
"learning_rate": 1.2696000552600678e-05,
"loss": 2.4534,
"step": 12410
},
{
"epoch": 2.32,
"learning_rate": 1.2661463010292188e-05,
"loss": 2.4959,
"step": 12420
},
{
"epoch": 2.32,
"learning_rate": 1.26269254679837e-05,
"loss": 2.523,
"step": 12430
},
{
"epoch": 2.32,
"learning_rate": 1.2592387925675209e-05,
"loss": 2.5406,
"step": 12440
},
{
"epoch": 2.32,
"learning_rate": 1.2557850383366721e-05,
"loss": 2.4748,
"step": 12450
},
{
"epoch": 2.32,
"learning_rate": 1.252331284105823e-05,
"loss": 2.5003,
"step": 12460
},
{
"epoch": 2.33,
"learning_rate": 1.2488775298749742e-05,
"loss": 2.5308,
"step": 12470
},
{
"epoch": 2.33,
"learning_rate": 1.2454237756441252e-05,
"loss": 2.5162,
"step": 12480
},
{
"epoch": 2.33,
"learning_rate": 1.2419700214132762e-05,
"loss": 2.4429,
"step": 12490
},
{
"epoch": 2.33,
"learning_rate": 1.2385162671824274e-05,
"loss": 2.5054,
"step": 12500
},
{
"epoch": 2.33,
"learning_rate": 1.2350625129515785e-05,
"loss": 2.427,
"step": 12510
},
{
"epoch": 2.33,
"learning_rate": 1.2316087587207295e-05,
"loss": 2.4848,
"step": 12520
},
{
"epoch": 2.34,
"learning_rate": 1.2281550044898805e-05,
"loss": 2.4365,
"step": 12530
},
{
"epoch": 2.34,
"learning_rate": 1.2247012502590316e-05,
"loss": 2.5547,
"step": 12540
},
{
"epoch": 2.34,
"learning_rate": 1.2212474960281826e-05,
"loss": 2.5358,
"step": 12550
},
{
"epoch": 2.34,
"learning_rate": 1.2177937417973338e-05,
"loss": 2.4882,
"step": 12560
},
{
"epoch": 2.34,
"learning_rate": 1.2143399875664848e-05,
"loss": 2.5398,
"step": 12570
},
{
"epoch": 2.35,
"learning_rate": 1.2108862333356358e-05,
"loss": 2.426,
"step": 12580
},
{
"epoch": 2.35,
"learning_rate": 1.2074324791047869e-05,
"loss": 2.5536,
"step": 12590
},
{
"epoch": 2.35,
"learning_rate": 1.203978724873938e-05,
"loss": 2.4513,
"step": 12600
},
{
"epoch": 2.35,
"learning_rate": 1.2005249706430891e-05,
"loss": 2.5256,
"step": 12610
},
{
"epoch": 2.35,
"learning_rate": 1.1970712164122401e-05,
"loss": 2.5244,
"step": 12620
},
{
"epoch": 2.36,
"learning_rate": 1.1936174621813913e-05,
"loss": 2.5293,
"step": 12630
},
{
"epoch": 2.36,
"learning_rate": 1.1901637079505424e-05,
"loss": 2.4588,
"step": 12640
},
{
"epoch": 2.36,
"learning_rate": 1.1867099537196934e-05,
"loss": 2.4325,
"step": 12650
},
{
"epoch": 2.36,
"learning_rate": 1.1832561994888444e-05,
"loss": 2.4101,
"step": 12660
},
{
"epoch": 2.36,
"learning_rate": 1.1798024452579955e-05,
"loss": 2.4884,
"step": 12670
},
{
"epoch": 2.36,
"learning_rate": 1.1763486910271465e-05,
"loss": 2.5393,
"step": 12680
},
{
"epoch": 2.37,
"learning_rate": 1.1728949367962977e-05,
"loss": 2.527,
"step": 12690
},
{
"epoch": 2.37,
"learning_rate": 1.1694411825654487e-05,
"loss": 2.5476,
"step": 12700
},
{
"epoch": 2.37,
"learning_rate": 1.1659874283345998e-05,
"loss": 2.5215,
"step": 12710
},
{
"epoch": 2.37,
"learning_rate": 1.1625336741037508e-05,
"loss": 2.4547,
"step": 12720
},
{
"epoch": 2.37,
"learning_rate": 1.1590799198729018e-05,
"loss": 2.3536,
"step": 12730
},
{
"epoch": 2.38,
"learning_rate": 1.1556261656420529e-05,
"loss": 2.4102,
"step": 12740
},
{
"epoch": 2.38,
"learning_rate": 1.1521724114112039e-05,
"loss": 2.5389,
"step": 12750
},
{
"epoch": 2.38,
"learning_rate": 1.1487186571803551e-05,
"loss": 2.4363,
"step": 12760
},
{
"epoch": 2.38,
"learning_rate": 1.1452649029495061e-05,
"loss": 2.5441,
"step": 12770
},
{
"epoch": 2.38,
"learning_rate": 1.1418111487186573e-05,
"loss": 2.4774,
"step": 12780
},
{
"epoch": 2.38,
"learning_rate": 1.1383573944878084e-05,
"loss": 2.4745,
"step": 12790
},
{
"epoch": 2.39,
"learning_rate": 1.1349036402569594e-05,
"loss": 2.4728,
"step": 12800
},
{
"epoch": 2.39,
"learning_rate": 1.1314498860261104e-05,
"loss": 2.5576,
"step": 12810
},
{
"epoch": 2.39,
"learning_rate": 1.1279961317952616e-05,
"loss": 2.5435,
"step": 12820
},
{
"epoch": 2.39,
"learning_rate": 1.1245423775644127e-05,
"loss": 2.4142,
"step": 12830
},
{
"epoch": 2.39,
"learning_rate": 1.1210886233335637e-05,
"loss": 2.4979,
"step": 12840
},
{
"epoch": 2.4,
"learning_rate": 1.1176348691027147e-05,
"loss": 2.582,
"step": 12850
},
{
"epoch": 2.4,
"learning_rate": 1.1141811148718657e-05,
"loss": 2.5088,
"step": 12860
},
{
"epoch": 2.4,
"learning_rate": 1.1107273606410168e-05,
"loss": 2.4736,
"step": 12870
},
{
"epoch": 2.4,
"learning_rate": 1.107273606410168e-05,
"loss": 2.5025,
"step": 12880
},
{
"epoch": 2.4,
"learning_rate": 1.103819852179319e-05,
"loss": 2.3659,
"step": 12890
},
{
"epoch": 2.41,
"learning_rate": 1.10036609794847e-05,
"loss": 2.4489,
"step": 12900
},
{
"epoch": 2.41,
"learning_rate": 1.096912343717621e-05,
"loss": 2.4468,
"step": 12910
},
{
"epoch": 2.41,
"learning_rate": 1.0934585894867721e-05,
"loss": 2.4301,
"step": 12920
},
{
"epoch": 2.41,
"learning_rate": 1.0900048352559231e-05,
"loss": 2.5407,
"step": 12930
},
{
"epoch": 2.41,
"learning_rate": 1.0865510810250742e-05,
"loss": 2.5414,
"step": 12940
},
{
"epoch": 2.41,
"learning_rate": 1.0830973267942254e-05,
"loss": 2.5377,
"step": 12950
},
{
"epoch": 2.42,
"learning_rate": 1.0796435725633764e-05,
"loss": 2.3887,
"step": 12960
},
{
"epoch": 2.42,
"learning_rate": 1.0761898183325276e-05,
"loss": 2.4716,
"step": 12970
},
{
"epoch": 2.42,
"learning_rate": 1.0727360641016786e-05,
"loss": 2.3875,
"step": 12980
},
{
"epoch": 2.42,
"learning_rate": 1.0692823098708297e-05,
"loss": 2.4645,
"step": 12990
},
{
"epoch": 2.42,
"learning_rate": 1.0658285556399807e-05,
"loss": 2.4588,
"step": 13000
},
{
"epoch": 2.43,
"learning_rate": 1.0623748014091319e-05,
"loss": 2.4394,
"step": 13010
},
{
"epoch": 2.43,
"learning_rate": 1.058921047178283e-05,
"loss": 2.4167,
"step": 13020
},
{
"epoch": 2.43,
"learning_rate": 1.055467292947434e-05,
"loss": 2.4317,
"step": 13030
},
{
"epoch": 2.43,
"learning_rate": 1.052013538716585e-05,
"loss": 2.46,
"step": 13040
},
{
"epoch": 2.43,
"learning_rate": 1.048559784485736e-05,
"loss": 2.5175,
"step": 13050
},
{
"epoch": 2.44,
"learning_rate": 1.045106030254887e-05,
"loss": 2.3986,
"step": 13060
},
{
"epoch": 2.44,
"learning_rate": 1.0416522760240381e-05,
"loss": 2.4969,
"step": 13070
},
{
"epoch": 2.44,
"learning_rate": 1.0381985217931893e-05,
"loss": 2.3781,
"step": 13080
},
{
"epoch": 2.44,
"learning_rate": 1.0347447675623403e-05,
"loss": 2.4569,
"step": 13090
},
{
"epoch": 2.44,
"learning_rate": 1.0312910133314914e-05,
"loss": 2.3831,
"step": 13100
},
{
"epoch": 2.44,
"learning_rate": 1.0278372591006424e-05,
"loss": 2.4431,
"step": 13110
},
{
"epoch": 2.45,
"learning_rate": 1.0243835048697934e-05,
"loss": 2.5117,
"step": 13120
},
{
"epoch": 2.45,
"learning_rate": 1.0209297506389444e-05,
"loss": 2.5629,
"step": 13130
},
{
"epoch": 2.45,
"learning_rate": 1.0174759964080956e-05,
"loss": 2.437,
"step": 13140
},
{
"epoch": 2.45,
"learning_rate": 1.0140222421772467e-05,
"loss": 2.5089,
"step": 13150
},
{
"epoch": 2.45,
"learning_rate": 1.0105684879463979e-05,
"loss": 2.5891,
"step": 13160
},
{
"epoch": 2.46,
"learning_rate": 1.0071147337155489e-05,
"loss": 2.4592,
"step": 13170
},
{
"epoch": 2.46,
"learning_rate": 1.0036609794847e-05,
"loss": 2.4947,
"step": 13180
},
{
"epoch": 2.46,
"learning_rate": 1.000207225253851e-05,
"loss": 2.3656,
"step": 13190
},
{
"epoch": 2.46,
"learning_rate": 9.96753471023002e-06,
"loss": 2.3021,
"step": 13200
},
{
"epoch": 2.46,
"learning_rate": 9.932997167921532e-06,
"loss": 2.5248,
"step": 13210
},
{
"epoch": 2.47,
"learning_rate": 9.898459625613042e-06,
"loss": 2.3746,
"step": 13220
},
{
"epoch": 2.47,
"learning_rate": 9.863922083304553e-06,
"loss": 2.4837,
"step": 13230
},
{
"epoch": 2.47,
"learning_rate": 9.829384540996063e-06,
"loss": 2.3482,
"step": 13240
},
{
"epoch": 2.47,
"learning_rate": 9.794846998687573e-06,
"loss": 2.3631,
"step": 13250
},
{
"epoch": 2.47,
"learning_rate": 9.760309456379084e-06,
"loss": 2.3923,
"step": 13260
},
{
"epoch": 2.47,
"learning_rate": 9.725771914070596e-06,
"loss": 2.465,
"step": 13270
},
{
"epoch": 2.48,
"learning_rate": 9.691234371762106e-06,
"loss": 2.5215,
"step": 13280
},
{
"epoch": 2.48,
"learning_rate": 9.656696829453616e-06,
"loss": 2.4189,
"step": 13290
},
{
"epoch": 2.48,
"learning_rate": 9.622159287145127e-06,
"loss": 2.4592,
"step": 13300
},
{
"epoch": 2.48,
"learning_rate": 9.587621744836637e-06,
"loss": 2.5317,
"step": 13310
},
{
"epoch": 2.48,
"learning_rate": 9.553084202528149e-06,
"loss": 2.5719,
"step": 13320
},
{
"epoch": 2.49,
"learning_rate": 9.51854666021966e-06,
"loss": 2.4286,
"step": 13330
},
{
"epoch": 2.49,
"learning_rate": 9.48400911791117e-06,
"loss": 2.3491,
"step": 13340
},
{
"epoch": 2.49,
"learning_rate": 9.449471575602682e-06,
"loss": 2.5172,
"step": 13350
},
{
"epoch": 2.49,
"learning_rate": 9.414934033294192e-06,
"loss": 2.4466,
"step": 13360
},
{
"epoch": 2.49,
"learning_rate": 9.380396490985702e-06,
"loss": 2.4197,
"step": 13370
},
{
"epoch": 2.49,
"learning_rate": 9.345858948677212e-06,
"loss": 2.5182,
"step": 13380
},
{
"epoch": 2.5,
"learning_rate": 9.311321406368723e-06,
"loss": 2.5291,
"step": 13390
},
{
"epoch": 2.5,
"learning_rate": 9.276783864060235e-06,
"loss": 2.5132,
"step": 13400
},
{
"epoch": 2.5,
"learning_rate": 9.242246321751745e-06,
"loss": 2.4365,
"step": 13410
},
{
"epoch": 2.5,
"learning_rate": 9.207708779443255e-06,
"loss": 2.5625,
"step": 13420
},
{
"epoch": 2.5,
"learning_rate": 9.173171237134766e-06,
"loss": 2.4663,
"step": 13430
},
{
"epoch": 2.51,
"learning_rate": 9.138633694826276e-06,
"loss": 2.5276,
"step": 13440
},
{
"epoch": 2.51,
"learning_rate": 9.104096152517786e-06,
"loss": 2.4061,
"step": 13450
},
{
"epoch": 2.51,
"learning_rate": 9.069558610209298e-06,
"loss": 2.4691,
"step": 13460
},
{
"epoch": 2.51,
"learning_rate": 9.035021067900809e-06,
"loss": 2.4038,
"step": 13470
},
{
"epoch": 2.51,
"learning_rate": 9.000483525592319e-06,
"loss": 2.4281,
"step": 13480
},
{
"epoch": 2.52,
"learning_rate": 8.96594598328383e-06,
"loss": 2.4752,
"step": 13490
},
{
"epoch": 2.52,
"learning_rate": 8.93140844097534e-06,
"loss": 2.3682,
"step": 13500
},
{
"epoch": 2.52,
"learning_rate": 8.896870898666852e-06,
"loss": 2.4398,
"step": 13510
},
{
"epoch": 2.52,
"learning_rate": 8.862333356358362e-06,
"loss": 2.4714,
"step": 13520
},
{
"epoch": 2.52,
"learning_rate": 8.827795814049874e-06,
"loss": 2.4158,
"step": 13530
},
{
"epoch": 2.52,
"learning_rate": 8.793258271741384e-06,
"loss": 2.4668,
"step": 13540
},
{
"epoch": 2.53,
"learning_rate": 8.758720729432895e-06,
"loss": 2.3691,
"step": 13550
},
{
"epoch": 2.53,
"learning_rate": 8.724183187124405e-06,
"loss": 2.3835,
"step": 13560
},
{
"epoch": 2.53,
"learning_rate": 8.689645644815915e-06,
"loss": 2.4404,
"step": 13570
},
{
"epoch": 2.53,
"learning_rate": 8.655108102507426e-06,
"loss": 2.4065,
"step": 13580
},
{
"epoch": 2.53,
"learning_rate": 8.620570560198938e-06,
"loss": 2.4134,
"step": 13590
},
{
"epoch": 2.54,
"learning_rate": 8.586033017890448e-06,
"loss": 2.3334,
"step": 13600
},
{
"epoch": 2.54,
"learning_rate": 8.551495475581958e-06,
"loss": 2.4672,
"step": 13610
},
{
"epoch": 2.54,
"learning_rate": 8.516957933273469e-06,
"loss": 2.3859,
"step": 13620
},
{
"epoch": 2.54,
"learning_rate": 8.482420390964979e-06,
"loss": 2.4169,
"step": 13630
},
{
"epoch": 2.54,
"learning_rate": 8.44788284865649e-06,
"loss": 2.4103,
"step": 13640
},
{
"epoch": 2.55,
"learning_rate": 8.413345306348e-06,
"loss": 2.492,
"step": 13650
},
{
"epoch": 2.55,
"learning_rate": 8.378807764039511e-06,
"loss": 2.4176,
"step": 13660
},
{
"epoch": 2.55,
"learning_rate": 8.344270221731022e-06,
"loss": 2.4864,
"step": 13670
},
{
"epoch": 2.55,
"learning_rate": 8.309732679422532e-06,
"loss": 2.412,
"step": 13680
},
{
"epoch": 2.55,
"learning_rate": 8.275195137114042e-06,
"loss": 2.2933,
"step": 13690
},
{
"epoch": 2.55,
"learning_rate": 8.240657594805554e-06,
"loss": 2.3668,
"step": 13700
},
{
"epoch": 2.56,
"learning_rate": 8.206120052497065e-06,
"loss": 2.4555,
"step": 13710
},
{
"epoch": 2.56,
"learning_rate": 8.171582510188577e-06,
"loss": 2.3584,
"step": 13720
},
{
"epoch": 2.56,
"learning_rate": 8.137044967880087e-06,
"loss": 2.3607,
"step": 13730
},
{
"epoch": 2.56,
"learning_rate": 8.102507425571597e-06,
"loss": 2.4085,
"step": 13740
},
{
"epoch": 2.56,
"learning_rate": 8.067969883263108e-06,
"loss": 2.3381,
"step": 13750
},
{
"epoch": 2.57,
"learning_rate": 8.033432340954618e-06,
"loss": 2.4618,
"step": 13760
},
{
"epoch": 2.57,
"learning_rate": 7.998894798646128e-06,
"loss": 2.3399,
"step": 13770
},
{
"epoch": 2.57,
"learning_rate": 7.96435725633764e-06,
"loss": 2.4523,
"step": 13780
},
{
"epoch": 2.57,
"learning_rate": 7.92981971402915e-06,
"loss": 2.387,
"step": 13790
},
{
"epoch": 2.57,
"learning_rate": 7.895282171720661e-06,
"loss": 2.3244,
"step": 13800
},
{
"epoch": 2.58,
"learning_rate": 7.860744629412171e-06,
"loss": 2.4542,
"step": 13810
},
{
"epoch": 2.58,
"learning_rate": 7.826207087103682e-06,
"loss": 2.4163,
"step": 13820
},
{
"epoch": 2.58,
"learning_rate": 7.791669544795192e-06,
"loss": 2.4133,
"step": 13830
},
{
"epoch": 2.58,
"learning_rate": 7.757132002486702e-06,
"loss": 2.5119,
"step": 13840
},
{
"epoch": 2.58,
"learning_rate": 7.722594460178214e-06,
"loss": 2.398,
"step": 13850
},
{
"epoch": 2.58,
"learning_rate": 7.688056917869725e-06,
"loss": 2.3494,
"step": 13860
},
{
"epoch": 2.59,
"learning_rate": 7.653519375561235e-06,
"loss": 2.3333,
"step": 13870
},
{
"epoch": 2.59,
"learning_rate": 7.618981833252746e-06,
"loss": 2.3259,
"step": 13880
},
{
"epoch": 2.59,
"learning_rate": 7.584444290944256e-06,
"loss": 2.3831,
"step": 13890
},
{
"epoch": 2.59,
"learning_rate": 7.549906748635767e-06,
"loss": 2.3466,
"step": 13900
},
{
"epoch": 2.59,
"learning_rate": 7.515369206327279e-06,
"loss": 2.3265,
"step": 13910
},
{
"epoch": 2.6,
"learning_rate": 7.480831664018789e-06,
"loss": 2.5178,
"step": 13920
},
{
"epoch": 2.6,
"learning_rate": 7.446294121710299e-06,
"loss": 2.434,
"step": 13930
},
{
"epoch": 2.6,
"learning_rate": 7.4117565794018105e-06,
"loss": 2.373,
"step": 13940
},
{
"epoch": 2.6,
"learning_rate": 7.377219037093321e-06,
"loss": 2.4323,
"step": 13950
},
{
"epoch": 2.6,
"learning_rate": 7.342681494784831e-06,
"loss": 2.4116,
"step": 13960
},
{
"epoch": 2.61,
"learning_rate": 7.308143952476341e-06,
"loss": 2.4483,
"step": 13970
},
{
"epoch": 2.61,
"learning_rate": 7.273606410167853e-06,
"loss": 2.316,
"step": 13980
},
{
"epoch": 2.61,
"learning_rate": 7.239068867859364e-06,
"loss": 2.4437,
"step": 13990
},
{
"epoch": 2.61,
"learning_rate": 7.204531325550874e-06,
"loss": 2.2788,
"step": 14000
},
{
"epoch": 2.61,
"learning_rate": 7.169993783242384e-06,
"loss": 2.3947,
"step": 14010
},
{
"epoch": 2.61,
"learning_rate": 7.1354562409338955e-06,
"loss": 2.3904,
"step": 14020
},
{
"epoch": 2.62,
"learning_rate": 7.100918698625406e-06,
"loss": 2.3955,
"step": 14030
},
{
"epoch": 2.62,
"learning_rate": 7.066381156316918e-06,
"loss": 2.3703,
"step": 14040
},
{
"epoch": 2.62,
"learning_rate": 7.031843614008428e-06,
"loss": 2.3459,
"step": 14050
},
{
"epoch": 2.62,
"learning_rate": 6.9973060716999385e-06,
"loss": 2.3317,
"step": 14060
},
{
"epoch": 2.62,
"learning_rate": 6.962768529391449e-06,
"loss": 2.4926,
"step": 14070
},
{
"epoch": 2.63,
"learning_rate": 6.928230987082959e-06,
"loss": 2.3021,
"step": 14080
},
{
"epoch": 2.63,
"learning_rate": 6.8936934447744694e-06,
"loss": 2.4277,
"step": 14090
},
{
"epoch": 2.63,
"learning_rate": 6.85915590246598e-06,
"loss": 2.4472,
"step": 14100
},
{
"epoch": 2.63,
"learning_rate": 6.824618360157492e-06,
"loss": 2.3815,
"step": 14110
},
{
"epoch": 2.63,
"learning_rate": 6.790080817849002e-06,
"loss": 2.4714,
"step": 14120
},
{
"epoch": 2.63,
"learning_rate": 6.755543275540513e-06,
"loss": 2.3671,
"step": 14130
},
{
"epoch": 2.64,
"learning_rate": 6.7210057332320235e-06,
"loss": 2.3541,
"step": 14140
},
{
"epoch": 2.64,
"learning_rate": 6.686468190923534e-06,
"loss": 2.3399,
"step": 14150
},
{
"epoch": 2.64,
"learning_rate": 6.651930648615044e-06,
"loss": 2.4081,
"step": 14160
},
{
"epoch": 2.64,
"learning_rate": 6.617393106306556e-06,
"loss": 2.4018,
"step": 14170
},
{
"epoch": 2.64,
"learning_rate": 6.5828555639980665e-06,
"loss": 2.4288,
"step": 14180
},
{
"epoch": 2.65,
"learning_rate": 6.548318021689577e-06,
"loss": 2.3876,
"step": 14190
},
{
"epoch": 2.65,
"learning_rate": 6.513780479381087e-06,
"loss": 2.3145,
"step": 14200
},
{
"epoch": 2.65,
"learning_rate": 6.479242937072598e-06,
"loss": 2.4065,
"step": 14210
},
{
"epoch": 2.65,
"learning_rate": 6.444705394764109e-06,
"loss": 2.4055,
"step": 14220
},
{
"epoch": 2.65,
"learning_rate": 6.410167852455621e-06,
"loss": 2.3118,
"step": 14230
},
{
"epoch": 2.66,
"learning_rate": 6.375630310147131e-06,
"loss": 2.36,
"step": 14240
},
{
"epoch": 2.66,
"learning_rate": 6.341092767838641e-06,
"loss": 2.3578,
"step": 14250
},
{
"epoch": 2.66,
"learning_rate": 6.3065552255301516e-06,
"loss": 2.3756,
"step": 14260
},
{
"epoch": 2.66,
"learning_rate": 6.272017683221662e-06,
"loss": 2.4196,
"step": 14270
},
{
"epoch": 2.66,
"learning_rate": 6.237480140913173e-06,
"loss": 2.4409,
"step": 14280
},
{
"epoch": 2.66,
"learning_rate": 6.202942598604683e-06,
"loss": 2.3316,
"step": 14290
},
{
"epoch": 2.67,
"learning_rate": 6.1684050562961945e-06,
"loss": 2.306,
"step": 14300
},
{
"epoch": 2.67,
"learning_rate": 6.133867513987706e-06,
"loss": 2.3856,
"step": 14310
},
{
"epoch": 2.67,
"learning_rate": 6.099329971679216e-06,
"loss": 2.3944,
"step": 14320
},
{
"epoch": 2.67,
"learning_rate": 6.064792429370726e-06,
"loss": 2.4568,
"step": 14330
},
{
"epoch": 2.67,
"learning_rate": 6.030254887062237e-06,
"loss": 2.3327,
"step": 14340
},
{
"epoch": 2.68,
"learning_rate": 5.995717344753748e-06,
"loss": 2.3498,
"step": 14350
},
{
"epoch": 2.68,
"learning_rate": 5.961179802445258e-06,
"loss": 2.4181,
"step": 14360
},
{
"epoch": 2.68,
"learning_rate": 5.926642260136768e-06,
"loss": 2.398,
"step": 14370
},
{
"epoch": 2.68,
"learning_rate": 5.89210471782828e-06,
"loss": 2.4317,
"step": 14380
},
{
"epoch": 2.68,
"learning_rate": 5.85756717551979e-06,
"loss": 2.4137,
"step": 14390
},
{
"epoch": 2.69,
"learning_rate": 5.823029633211301e-06,
"loss": 2.4382,
"step": 14400
},
{
"epoch": 2.69,
"learning_rate": 5.788492090902812e-06,
"loss": 2.3696,
"step": 14410
},
{
"epoch": 2.69,
"learning_rate": 5.7539545485943225e-06,
"loss": 2.4141,
"step": 14420
},
{
"epoch": 2.69,
"learning_rate": 5.719417006285833e-06,
"loss": 2.3535,
"step": 14430
},
{
"epoch": 2.69,
"learning_rate": 5.684879463977344e-06,
"loss": 2.408,
"step": 14440
},
{
"epoch": 2.69,
"learning_rate": 5.650341921668854e-06,
"loss": 2.354,
"step": 14450
},
{
"epoch": 2.7,
"learning_rate": 5.615804379360365e-06,
"loss": 2.4954,
"step": 14460
},
{
"epoch": 2.7,
"learning_rate": 5.581266837051876e-06,
"loss": 2.4316,
"step": 14470
},
{
"epoch": 2.7,
"learning_rate": 5.546729294743386e-06,
"loss": 2.3022,
"step": 14480
},
{
"epoch": 2.7,
"learning_rate": 5.512191752434897e-06,
"loss": 2.2843,
"step": 14490
},
{
"epoch": 2.7,
"learning_rate": 5.477654210126408e-06,
"loss": 2.4304,
"step": 14500
},
{
"epoch": 2.71,
"learning_rate": 5.443116667817919e-06,
"loss": 2.2816,
"step": 14510
},
{
"epoch": 2.71,
"learning_rate": 5.408579125509429e-06,
"loss": 2.3768,
"step": 14520
},
{
"epoch": 2.71,
"learning_rate": 5.374041583200939e-06,
"loss": 2.4476,
"step": 14530
},
{
"epoch": 2.71,
"learning_rate": 5.3395040408924506e-06,
"loss": 2.439,
"step": 14540
},
{
"epoch": 2.71,
"learning_rate": 5.304966498583961e-06,
"loss": 2.3349,
"step": 14550
},
{
"epoch": 2.72,
"learning_rate": 5.270428956275471e-06,
"loss": 2.542,
"step": 14560
},
{
"epoch": 2.72,
"learning_rate": 5.235891413966982e-06,
"loss": 2.3421,
"step": 14570
},
{
"epoch": 2.72,
"learning_rate": 5.201353871658493e-06,
"loss": 2.3747,
"step": 14580
},
{
"epoch": 2.72,
"learning_rate": 5.166816329350004e-06,
"loss": 2.3729,
"step": 14590
},
{
"epoch": 2.72,
"learning_rate": 5.132278787041515e-06,
"loss": 2.3492,
"step": 14600
},
{
"epoch": 2.72,
"learning_rate": 5.097741244733025e-06,
"loss": 2.2954,
"step": 14610
},
{
"epoch": 2.73,
"learning_rate": 5.063203702424536e-06,
"loss": 2.3548,
"step": 14620
},
{
"epoch": 2.73,
"learning_rate": 5.028666160116046e-06,
"loss": 2.2816,
"step": 14630
},
{
"epoch": 2.73,
"learning_rate": 4.994128617807557e-06,
"loss": 2.4028,
"step": 14640
},
{
"epoch": 2.73,
"learning_rate": 4.959591075499067e-06,
"loss": 2.3788,
"step": 14650
},
{
"epoch": 2.73,
"learning_rate": 4.925053533190578e-06,
"loss": 2.4394,
"step": 14660
},
{
"epoch": 2.74,
"learning_rate": 4.890515990882089e-06,
"loss": 2.2951,
"step": 14670
},
{
"epoch": 2.74,
"learning_rate": 4.8559784485736e-06,
"loss": 2.2777,
"step": 14680
},
{
"epoch": 2.74,
"learning_rate": 4.82144090626511e-06,
"loss": 2.3185,
"step": 14690
},
{
"epoch": 2.74,
"learning_rate": 4.7869033639566215e-06,
"loss": 2.4007,
"step": 14700
},
{
"epoch": 2.74,
"learning_rate": 4.752365821648132e-06,
"loss": 2.3751,
"step": 14710
},
{
"epoch": 2.74,
"learning_rate": 4.717828279339642e-06,
"loss": 2.3223,
"step": 14720
},
{
"epoch": 2.75,
"learning_rate": 4.683290737031153e-06,
"loss": 2.3298,
"step": 14730
},
{
"epoch": 2.75,
"learning_rate": 4.648753194722664e-06,
"loss": 2.3654,
"step": 14740
},
{
"epoch": 2.75,
"learning_rate": 4.614215652414174e-06,
"loss": 2.4975,
"step": 14750
},
{
"epoch": 2.75,
"learning_rate": 4.579678110105685e-06,
"loss": 2.4081,
"step": 14760
},
{
"epoch": 2.75,
"learning_rate": 4.545140567797196e-06,
"loss": 2.3517,
"step": 14770
},
{
"epoch": 2.76,
"learning_rate": 4.510603025488707e-06,
"loss": 2.4114,
"step": 14780
},
{
"epoch": 2.76,
"learning_rate": 4.476065483180217e-06,
"loss": 2.2955,
"step": 14790
},
{
"epoch": 2.76,
"learning_rate": 4.441527940871728e-06,
"loss": 2.2924,
"step": 14800
},
{
"epoch": 2.76,
"learning_rate": 4.406990398563238e-06,
"loss": 2.3289,
"step": 14810
},
{
"epoch": 2.76,
"learning_rate": 4.372452856254749e-06,
"loss": 2.4274,
"step": 14820
},
{
"epoch": 2.77,
"learning_rate": 4.33791531394626e-06,
"loss": 2.2682,
"step": 14830
},
{
"epoch": 2.77,
"learning_rate": 4.30337777163777e-06,
"loss": 2.3307,
"step": 14840
},
{
"epoch": 2.77,
"learning_rate": 4.2688402293292805e-06,
"loss": 2.3643,
"step": 14850
},
{
"epoch": 2.77,
"learning_rate": 4.234302687020792e-06,
"loss": 2.4233,
"step": 14860
},
{
"epoch": 2.77,
"learning_rate": 4.199765144712303e-06,
"loss": 2.4175,
"step": 14870
},
{
"epoch": 2.77,
"learning_rate": 4.165227602403813e-06,
"loss": 2.3321,
"step": 14880
},
{
"epoch": 2.78,
"learning_rate": 4.130690060095324e-06,
"loss": 2.4373,
"step": 14890
},
{
"epoch": 2.78,
"learning_rate": 4.096152517786835e-06,
"loss": 2.3562,
"step": 14900
},
{
"epoch": 2.78,
"learning_rate": 4.061614975478345e-06,
"loss": 2.3348,
"step": 14910
},
{
"epoch": 2.78,
"learning_rate": 4.027077433169856e-06,
"loss": 2.3721,
"step": 14920
},
{
"epoch": 2.78,
"learning_rate": 3.992539890861366e-06,
"loss": 2.3379,
"step": 14930
},
{
"epoch": 2.79,
"learning_rate": 3.958002348552877e-06,
"loss": 2.3698,
"step": 14940
},
{
"epoch": 2.79,
"learning_rate": 3.923464806244388e-06,
"loss": 2.4562,
"step": 14950
},
{
"epoch": 2.79,
"learning_rate": 3.888927263935899e-06,
"loss": 2.4839,
"step": 14960
},
{
"epoch": 2.79,
"learning_rate": 3.854389721627409e-06,
"loss": 2.3623,
"step": 14970
},
{
"epoch": 2.79,
"learning_rate": 3.81985217931892e-06,
"loss": 2.3421,
"step": 14980
},
{
"epoch": 2.8,
"learning_rate": 3.785314637010431e-06,
"loss": 2.3138,
"step": 14990
},
{
"epoch": 2.8,
"learning_rate": 3.750777094701941e-06,
"loss": 2.3843,
"step": 15000
},
{
"epoch": 2.8,
"learning_rate": 3.7162395523934515e-06,
"loss": 2.3591,
"step": 15010
},
{
"epoch": 2.8,
"learning_rate": 3.6817020100849626e-06,
"loss": 2.3636,
"step": 15020
},
{
"epoch": 2.8,
"learning_rate": 3.6471644677764734e-06,
"loss": 2.4267,
"step": 15030
},
{
"epoch": 2.8,
"learning_rate": 3.6126269254679837e-06,
"loss": 2.373,
"step": 15040
},
{
"epoch": 2.81,
"learning_rate": 3.578089383159495e-06,
"loss": 2.5028,
"step": 15050
},
{
"epoch": 2.81,
"learning_rate": 3.543551840851005e-06,
"loss": 2.3799,
"step": 15060
},
{
"epoch": 2.81,
"learning_rate": 3.509014298542516e-06,
"loss": 2.4805,
"step": 15070
},
{
"epoch": 2.81,
"learning_rate": 3.474476756234026e-06,
"loss": 2.2899,
"step": 15080
},
{
"epoch": 2.81,
"learning_rate": 3.4399392139255374e-06,
"loss": 2.3221,
"step": 15090
},
{
"epoch": 2.82,
"learning_rate": 3.4054016716170477e-06,
"loss": 2.3465,
"step": 15100
},
{
"epoch": 2.82,
"learning_rate": 3.3708641293085584e-06,
"loss": 2.3942,
"step": 15110
},
{
"epoch": 2.82,
"learning_rate": 3.3363265870000696e-06,
"loss": 2.4674,
"step": 15120
},
{
"epoch": 2.82,
"learning_rate": 3.30178904469158e-06,
"loss": 2.4301,
"step": 15130
},
{
"epoch": 2.82,
"learning_rate": 3.2672515023830902e-06,
"loss": 2.4088,
"step": 15140
},
{
"epoch": 2.83,
"learning_rate": 3.2327139600746014e-06,
"loss": 2.3602,
"step": 15150
},
{
"epoch": 2.83,
"learning_rate": 3.198176417766112e-06,
"loss": 2.3013,
"step": 15160
},
{
"epoch": 2.83,
"learning_rate": 3.1636388754576224e-06,
"loss": 2.3958,
"step": 15170
},
{
"epoch": 2.83,
"learning_rate": 3.1291013331491336e-06,
"loss": 2.3123,
"step": 15180
},
{
"epoch": 2.83,
"learning_rate": 3.094563790840644e-06,
"loss": 2.3162,
"step": 15190
},
{
"epoch": 2.83,
"learning_rate": 3.0600262485321547e-06,
"loss": 2.3614,
"step": 15200
},
{
"epoch": 2.84,
"learning_rate": 3.0254887062236654e-06,
"loss": 2.3247,
"step": 15210
},
{
"epoch": 2.84,
"learning_rate": 2.990951163915176e-06,
"loss": 2.3145,
"step": 15220
},
{
"epoch": 2.84,
"learning_rate": 2.9564136216066864e-06,
"loss": 2.3681,
"step": 15230
},
{
"epoch": 2.84,
"learning_rate": 2.921876079298197e-06,
"loss": 2.2981,
"step": 15240
},
{
"epoch": 2.84,
"learning_rate": 2.887338536989708e-06,
"loss": 2.346,
"step": 15250
},
{
"epoch": 2.85,
"learning_rate": 2.8528009946812187e-06,
"loss": 2.4164,
"step": 15260
},
{
"epoch": 2.85,
"learning_rate": 2.8182634523727294e-06,
"loss": 2.4174,
"step": 15270
},
{
"epoch": 2.85,
"learning_rate": 2.7837259100642397e-06,
"loss": 2.3599,
"step": 15280
},
{
"epoch": 2.85,
"learning_rate": 2.7491883677557505e-06,
"loss": 2.278,
"step": 15290
},
{
"epoch": 2.85,
"learning_rate": 2.7146508254472616e-06,
"loss": 2.3583,
"step": 15300
},
{
"epoch": 2.85,
"learning_rate": 2.680113283138772e-06,
"loss": 2.4094,
"step": 15310
},
{
"epoch": 2.86,
"learning_rate": 2.6455757408302827e-06,
"loss": 2.4016,
"step": 15320
},
{
"epoch": 2.86,
"learning_rate": 2.6110381985217934e-06,
"loss": 2.3733,
"step": 15330
},
{
"epoch": 2.86,
"learning_rate": 2.5765006562133037e-06,
"loss": 2.3657,
"step": 15340
},
{
"epoch": 2.86,
"learning_rate": 2.541963113904815e-06,
"loss": 2.3585,
"step": 15350
},
{
"epoch": 2.86,
"learning_rate": 2.507425571596325e-06,
"loss": 2.3034,
"step": 15360
},
{
"epoch": 2.87,
"learning_rate": 2.472888029287836e-06,
"loss": 2.399,
"step": 15370
},
{
"epoch": 2.87,
"learning_rate": 2.4383504869793467e-06,
"loss": 2.3786,
"step": 15380
},
{
"epoch": 2.87,
"learning_rate": 2.4038129446708574e-06,
"loss": 2.3629,
"step": 15390
},
{
"epoch": 2.87,
"learning_rate": 2.369275402362368e-06,
"loss": 2.3671,
"step": 15400
},
{
"epoch": 2.87,
"learning_rate": 2.334737860053879e-06,
"loss": 2.2268,
"step": 15410
},
{
"epoch": 2.88,
"learning_rate": 2.300200317745389e-06,
"loss": 2.3121,
"step": 15420
},
{
"epoch": 2.88,
"learning_rate": 2.2656627754369e-06,
"loss": 2.3288,
"step": 15430
},
{
"epoch": 2.88,
"learning_rate": 2.2311252331284107e-06,
"loss": 2.3856,
"step": 15440
},
{
"epoch": 2.88,
"learning_rate": 2.1965876908199214e-06,
"loss": 2.3113,
"step": 15450
},
{
"epoch": 2.88,
"learning_rate": 2.162050148511432e-06,
"loss": 2.2406,
"step": 15460
},
{
"epoch": 2.88,
"learning_rate": 2.1275126062029425e-06,
"loss": 2.353,
"step": 15470
},
{
"epoch": 2.89,
"learning_rate": 2.0929750638944532e-06,
"loss": 2.3918,
"step": 15480
},
{
"epoch": 2.89,
"learning_rate": 2.058437521585964e-06,
"loss": 2.4184,
"step": 15490
},
{
"epoch": 2.89,
"learning_rate": 2.0238999792774747e-06,
"loss": 2.3828,
"step": 15500
},
{
"epoch": 2.89,
"learning_rate": 1.9893624369689854e-06,
"loss": 2.4124,
"step": 15510
},
{
"epoch": 2.89,
"learning_rate": 1.9548248946604957e-06,
"loss": 2.3288,
"step": 15520
},
{
"epoch": 2.9,
"learning_rate": 1.920287352352007e-06,
"loss": 2.2583,
"step": 15530
},
{
"epoch": 2.9,
"learning_rate": 1.8857498100435174e-06,
"loss": 2.4088,
"step": 15540
},
{
"epoch": 2.9,
"learning_rate": 1.851212267735028e-06,
"loss": 2.2754,
"step": 15550
},
{
"epoch": 2.9,
"learning_rate": 1.8166747254265387e-06,
"loss": 2.3621,
"step": 15560
},
{
"epoch": 2.9,
"learning_rate": 1.7821371831180492e-06,
"loss": 2.3717,
"step": 15570
},
{
"epoch": 2.91,
"learning_rate": 1.74759964080956e-06,
"loss": 2.4536,
"step": 15580
},
{
"epoch": 2.91,
"learning_rate": 1.713062098501071e-06,
"loss": 2.3785,
"step": 15590
},
{
"epoch": 2.91,
"learning_rate": 1.6785245561925812e-06,
"loss": 2.2907,
"step": 15600
},
{
"epoch": 2.91,
"learning_rate": 1.6439870138840922e-06,
"loss": 2.3086,
"step": 15610
},
{
"epoch": 2.91,
"learning_rate": 1.609449471575603e-06,
"loss": 2.358,
"step": 15620
},
{
"epoch": 2.91,
"learning_rate": 1.5749119292671134e-06,
"loss": 2.3363,
"step": 15630
},
{
"epoch": 2.92,
"learning_rate": 1.5403743869586242e-06,
"loss": 2.3688,
"step": 15640
},
{
"epoch": 2.92,
"learning_rate": 1.5058368446501347e-06,
"loss": 2.3412,
"step": 15650
},
{
"epoch": 2.92,
"learning_rate": 1.4712993023416455e-06,
"loss": 2.3828,
"step": 15660
},
{
"epoch": 2.92,
"learning_rate": 1.436761760033156e-06,
"loss": 2.2863,
"step": 15670
},
{
"epoch": 2.92,
"learning_rate": 1.402224217724667e-06,
"loss": 2.5111,
"step": 15680
},
{
"epoch": 2.93,
"learning_rate": 1.3676866754161775e-06,
"loss": 2.4224,
"step": 15690
},
{
"epoch": 2.93,
"learning_rate": 1.3331491331076882e-06,
"loss": 2.4222,
"step": 15700
},
{
"epoch": 2.93,
"learning_rate": 1.2986115907991987e-06,
"loss": 2.3345,
"step": 15710
},
{
"epoch": 2.93,
"learning_rate": 1.2640740484907095e-06,
"loss": 2.2492,
"step": 15720
},
{
"epoch": 2.93,
"learning_rate": 1.2295365061822202e-06,
"loss": 2.4186,
"step": 15730
},
{
"epoch": 2.94,
"learning_rate": 1.1949989638737307e-06,
"loss": 2.328,
"step": 15740
},
{
"epoch": 2.94,
"learning_rate": 1.1604614215652415e-06,
"loss": 2.3137,
"step": 15750
},
{
"epoch": 2.94,
"learning_rate": 1.1259238792567522e-06,
"loss": 2.3326,
"step": 15760
},
{
"epoch": 2.94,
"learning_rate": 1.091386336948263e-06,
"loss": 2.3862,
"step": 15770
},
{
"epoch": 2.94,
"learning_rate": 1.0568487946397735e-06,
"loss": 2.3108,
"step": 15780
},
{
"epoch": 2.94,
"learning_rate": 1.022311252331284e-06,
"loss": 2.26,
"step": 15790
},
{
"epoch": 2.95,
"learning_rate": 9.87773710022795e-07,
"loss": 2.4293,
"step": 15800
},
{
"epoch": 2.95,
"learning_rate": 9.532361677143055e-07,
"loss": 2.3243,
"step": 15810
},
{
"epoch": 2.95,
"learning_rate": 9.186986254058161e-07,
"loss": 2.3935,
"step": 15820
},
{
"epoch": 2.95,
"learning_rate": 8.841610830973267e-07,
"loss": 2.3129,
"step": 15830
},
{
"epoch": 2.95,
"learning_rate": 8.496235407888376e-07,
"loss": 2.3353,
"step": 15840
},
{
"epoch": 2.96,
"learning_rate": 8.150859984803482e-07,
"loss": 2.2725,
"step": 15850
},
{
"epoch": 2.96,
"learning_rate": 7.805484561718589e-07,
"loss": 2.378,
"step": 15860
},
{
"epoch": 2.96,
"learning_rate": 7.460109138633696e-07,
"loss": 2.3231,
"step": 15870
},
{
"epoch": 2.96,
"learning_rate": 7.114733715548802e-07,
"loss": 2.3203,
"step": 15880
},
{
"epoch": 2.96,
"learning_rate": 6.769358292463909e-07,
"loss": 2.3593,
"step": 15890
},
{
"epoch": 2.96,
"learning_rate": 6.423982869379015e-07,
"loss": 2.3439,
"step": 15900
},
{
"epoch": 2.97,
"learning_rate": 6.078607446294122e-07,
"loss": 2.3775,
"step": 15910
},
{
"epoch": 2.97,
"learning_rate": 5.733232023209229e-07,
"loss": 2.3511,
"step": 15920
},
{
"epoch": 2.97,
"learning_rate": 5.387856600124336e-07,
"loss": 2.3449,
"step": 15930
},
{
"epoch": 2.97,
"learning_rate": 5.042481177039442e-07,
"loss": 2.3427,
"step": 15940
},
{
"epoch": 2.97,
"learning_rate": 4.697105753954549e-07,
"loss": 2.2693,
"step": 15950
},
{
"epoch": 2.98,
"learning_rate": 4.3517303308696555e-07,
"loss": 2.3398,
"step": 15960
},
{
"epoch": 2.98,
"learning_rate": 4.006354907784763e-07,
"loss": 2.2743,
"step": 15970
},
{
"epoch": 2.98,
"learning_rate": 3.6609794846998687e-07,
"loss": 2.2086,
"step": 15980
},
{
"epoch": 2.98,
"learning_rate": 3.3156040616149755e-07,
"loss": 2.3393,
"step": 15990
},
{
"epoch": 2.98,
"learning_rate": 2.9702286385300824e-07,
"loss": 2.3861,
"step": 16000
},
{
"epoch": 2.99,
"learning_rate": 2.624853215445189e-07,
"loss": 2.2871,
"step": 16010
},
{
"epoch": 2.99,
"learning_rate": 2.2794777923602959e-07,
"loss": 2.3119,
"step": 16020
},
{
"epoch": 2.99,
"learning_rate": 1.9341023692754024e-07,
"loss": 2.385,
"step": 16030
},
{
"epoch": 2.99,
"learning_rate": 1.5887269461905093e-07,
"loss": 2.2872,
"step": 16040
},
{
"epoch": 2.99,
"learning_rate": 1.243351523105616e-07,
"loss": 2.3681,
"step": 16050
},
{
"epoch": 2.99,
"learning_rate": 8.979761000207225e-08,
"loss": 2.3491,
"step": 16060
},
{
"epoch": 3.0,
"learning_rate": 5.526006769358292e-08,
"loss": 2.3968,
"step": 16070
},
{
"epoch": 3.0,
"learning_rate": 2.0722525385093597e-08,
"loss": 2.4475,
"step": 16080
},
{
"epoch": 3.0,
"eval_accuracy": 0.5851995594482614,
"eval_loss": 1.9156352281570435,
"eval_runtime": 8294.1564,
"eval_samples_per_second": 9.195,
"eval_steps_per_second": 0.287,
"step": 16086
},
{
"epoch": 3.0,
"step": 16086,
"total_flos": 1.6352487334240263e+20,
"train_loss": 3.7719367721047283,
"train_runtime": 628821.6026,
"train_samples_per_second": 3.275,
"train_steps_per_second": 0.026
}
],
"logging_steps": 10,
"max_steps": 16086,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.6352487334240263e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}