Deepsword-34B-Chat / trainer_state.json
TriadParty's picture
Upload 12 files
6cb25fb
raw
history blame
No virus
109 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.9979035639413,
"eval_steps": 500,
"global_step": 8940,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.9999907384105782e-05,
"loss": 1.9931,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.9999629537566803e-05,
"loss": 2.0507,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 2.9999166463814147e-05,
"loss": 1.9219,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 2.9998518168566207e-05,
"loss": 1.7246,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 2.9997684659828642e-05,
"loss": 1.7798,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 2.999666594789427e-05,
"loss": 1.6984,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 2.9995462045342955e-05,
"loss": 1.9176,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 2.999407296704142e-05,
"loss": 1.8109,
"step": 80
},
{
"epoch": 0.05,
"learning_rate": 2.9992498730143104e-05,
"loss": 1.9987,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 2.9990739354087918e-05,
"loss": 1.853,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 2.998879486060202e-05,
"loss": 1.8294,
"step": 110
},
{
"epoch": 0.07,
"learning_rate": 2.9986665273697548e-05,
"loss": 1.8909,
"step": 120
},
{
"epoch": 0.07,
"learning_rate": 2.998435061967231e-05,
"loss": 1.677,
"step": 130
},
{
"epoch": 0.08,
"learning_rate": 2.9981850927109472e-05,
"loss": 1.7677,
"step": 140
},
{
"epoch": 0.08,
"learning_rate": 2.997916622687721e-05,
"loss": 1.7973,
"step": 150
},
{
"epoch": 0.09,
"learning_rate": 2.9976296552128305e-05,
"loss": 1.8208,
"step": 160
},
{
"epoch": 0.1,
"learning_rate": 2.9973241938299756e-05,
"loss": 1.969,
"step": 170
},
{
"epoch": 0.1,
"learning_rate": 2.9970002423112342e-05,
"loss": 1.9945,
"step": 180
},
{
"epoch": 0.11,
"learning_rate": 2.9966578046570134e-05,
"loss": 1.9049,
"step": 190
},
{
"epoch": 0.11,
"learning_rate": 2.996296885096003e-05,
"loss": 1.7058,
"step": 200
},
{
"epoch": 0.12,
"learning_rate": 2.9959174880851214e-05,
"loss": 1.9533,
"step": 210
},
{
"epoch": 0.12,
"learning_rate": 2.9955196183094604e-05,
"loss": 1.8319,
"step": 220
},
{
"epoch": 0.13,
"learning_rate": 2.9951032806822297e-05,
"loss": 1.9198,
"step": 230
},
{
"epoch": 0.13,
"learning_rate": 2.9946684803446928e-05,
"loss": 1.9084,
"step": 240
},
{
"epoch": 0.14,
"learning_rate": 2.9942152226661065e-05,
"loss": 1.6764,
"step": 250
},
{
"epoch": 0.15,
"learning_rate": 2.9937435132436517e-05,
"loss": 1.8921,
"step": 260
},
{
"epoch": 0.15,
"learning_rate": 2.9932533579023685e-05,
"loss": 1.8521,
"step": 270
},
{
"epoch": 0.16,
"learning_rate": 2.9927447626950795e-05,
"loss": 1.8514,
"step": 280
},
{
"epoch": 0.16,
"learning_rate": 2.9922177339023182e-05,
"loss": 1.7181,
"step": 290
},
{
"epoch": 0.17,
"learning_rate": 2.9916722780322504e-05,
"loss": 1.5153,
"step": 300
},
{
"epoch": 0.17,
"learning_rate": 2.9911084018205935e-05,
"loss": 1.8034,
"step": 310
},
{
"epoch": 0.18,
"learning_rate": 2.9905261122305344e-05,
"loss": 1.717,
"step": 320
},
{
"epoch": 0.18,
"learning_rate": 2.9899254164526427e-05,
"loss": 1.7627,
"step": 330
},
{
"epoch": 0.19,
"learning_rate": 2.9893063219047815e-05,
"loss": 2.0163,
"step": 340
},
{
"epoch": 0.2,
"learning_rate": 2.9886688362320173e-05,
"loss": 1.8683,
"step": 350
},
{
"epoch": 0.2,
"learning_rate": 2.988012967306524e-05,
"loss": 1.872,
"step": 360
},
{
"epoch": 0.21,
"learning_rate": 2.9873387232274864e-05,
"loss": 1.7667,
"step": 370
},
{
"epoch": 0.21,
"learning_rate": 2.9866461123210005e-05,
"loss": 1.8722,
"step": 380
},
{
"epoch": 0.22,
"learning_rate": 2.9859351431399698e-05,
"loss": 1.7925,
"step": 390
},
{
"epoch": 0.22,
"learning_rate": 2.985205824464001e-05,
"loss": 1.8615,
"step": 400
},
{
"epoch": 0.23,
"learning_rate": 2.9844581652992936e-05,
"loss": 1.8086,
"step": 410
},
{
"epoch": 0.23,
"learning_rate": 2.983692174878531e-05,
"loss": 1.6497,
"step": 420
},
{
"epoch": 0.24,
"learning_rate": 2.982907862660765e-05,
"loss": 1.7696,
"step": 430
},
{
"epoch": 0.25,
"learning_rate": 2.9821052383312987e-05,
"loss": 1.8549,
"step": 440
},
{
"epoch": 0.25,
"learning_rate": 2.981284311801569e-05,
"loss": 1.8394,
"step": 450
},
{
"epoch": 0.26,
"learning_rate": 2.980445093209021e-05,
"loss": 1.7328,
"step": 460
},
{
"epoch": 0.26,
"learning_rate": 2.9795875929169853e-05,
"loss": 1.7978,
"step": 470
},
{
"epoch": 0.27,
"learning_rate": 2.9787118215145502e-05,
"loss": 1.747,
"step": 480
},
{
"epoch": 0.27,
"learning_rate": 2.977817789816429e-05,
"loss": 1.5575,
"step": 490
},
{
"epoch": 0.28,
"learning_rate": 2.976905508862828e-05,
"loss": 1.7842,
"step": 500
},
{
"epoch": 0.29,
"learning_rate": 2.9759749899193087e-05,
"loss": 1.7003,
"step": 510
},
{
"epoch": 0.29,
"learning_rate": 2.9750262444766502e-05,
"loss": 1.8354,
"step": 520
},
{
"epoch": 0.3,
"learning_rate": 2.9740592842507077e-05,
"loss": 1.7622,
"step": 530
},
{
"epoch": 0.3,
"learning_rate": 2.9730741211822654e-05,
"loss": 1.6064,
"step": 540
},
{
"epoch": 0.31,
"learning_rate": 2.972070767436891e-05,
"loss": 1.8158,
"step": 550
},
{
"epoch": 0.31,
"learning_rate": 2.9710492354047857e-05,
"loss": 1.5377,
"step": 560
},
{
"epoch": 0.32,
"learning_rate": 2.9700095377006294e-05,
"loss": 1.7267,
"step": 570
},
{
"epoch": 0.32,
"learning_rate": 2.968951687163426e-05,
"loss": 1.8174,
"step": 580
},
{
"epoch": 0.33,
"learning_rate": 2.9678756968563467e-05,
"loss": 1.7955,
"step": 590
},
{
"epoch": 0.34,
"learning_rate": 2.9667815800665637e-05,
"loss": 1.6887,
"step": 600
},
{
"epoch": 0.34,
"learning_rate": 2.9656693503050924e-05,
"loss": 1.8191,
"step": 610
},
{
"epoch": 0.35,
"learning_rate": 2.9645390213066193e-05,
"loss": 1.6998,
"step": 620
},
{
"epoch": 0.35,
"learning_rate": 2.963390607029335e-05,
"loss": 1.8392,
"step": 630
},
{
"epoch": 0.36,
"learning_rate": 2.9622241216547623e-05,
"loss": 1.7178,
"step": 640
},
{
"epoch": 0.36,
"learning_rate": 2.9610395795875786e-05,
"loss": 1.7564,
"step": 650
},
{
"epoch": 0.37,
"learning_rate": 2.9598369954554405e-05,
"loss": 1.8589,
"step": 660
},
{
"epoch": 0.37,
"learning_rate": 2.9586163841088023e-05,
"loss": 1.7673,
"step": 670
},
{
"epoch": 0.38,
"learning_rate": 2.957377760620732e-05,
"loss": 1.6346,
"step": 680
},
{
"epoch": 0.39,
"learning_rate": 2.9561211402867255e-05,
"loss": 1.778,
"step": 690
},
{
"epoch": 0.39,
"learning_rate": 2.9548465386245185e-05,
"loss": 1.7146,
"step": 700
},
{
"epoch": 0.4,
"learning_rate": 2.953553971373894e-05,
"loss": 1.8665,
"step": 710
},
{
"epoch": 0.4,
"learning_rate": 2.952243454496488e-05,
"loss": 1.9009,
"step": 720
},
{
"epoch": 0.41,
"learning_rate": 2.9509150041755938e-05,
"loss": 1.5969,
"step": 730
},
{
"epoch": 0.41,
"learning_rate": 2.9495686368159592e-05,
"loss": 1.8997,
"step": 740
},
{
"epoch": 0.42,
"learning_rate": 2.9482043690435862e-05,
"loss": 1.7278,
"step": 750
},
{
"epoch": 0.42,
"learning_rate": 2.946822217705526e-05,
"loss": 1.728,
"step": 760
},
{
"epoch": 0.43,
"learning_rate": 2.9454221998696694e-05,
"loss": 1.73,
"step": 770
},
{
"epoch": 0.44,
"learning_rate": 2.9440043328245366e-05,
"loss": 1.8922,
"step": 780
},
{
"epoch": 0.44,
"learning_rate": 2.9425686340790645e-05,
"loss": 1.778,
"step": 790
},
{
"epoch": 0.45,
"learning_rate": 2.9411151213623894e-05,
"loss": 1.8411,
"step": 800
},
{
"epoch": 0.45,
"learning_rate": 2.939643812623629e-05,
"loss": 1.5569,
"step": 810
},
{
"epoch": 0.46,
"learning_rate": 2.938154726031659e-05,
"loss": 1.717,
"step": 820
},
{
"epoch": 0.46,
"learning_rate": 2.9366478799748926e-05,
"loss": 1.8671,
"step": 830
},
{
"epoch": 0.47,
"learning_rate": 2.9351232930610473e-05,
"loss": 1.7678,
"step": 840
},
{
"epoch": 0.48,
"learning_rate": 2.9335809841169212e-05,
"loss": 1.8159,
"step": 850
},
{
"epoch": 0.48,
"learning_rate": 2.932020972188157e-05,
"loss": 1.6383,
"step": 860
},
{
"epoch": 0.49,
"learning_rate": 2.9304432765390085e-05,
"loss": 1.7186,
"step": 870
},
{
"epoch": 0.49,
"learning_rate": 2.9288479166521014e-05,
"loss": 1.7309,
"step": 880
},
{
"epoch": 0.5,
"learning_rate": 2.927234912228193e-05,
"loss": 1.9257,
"step": 890
},
{
"epoch": 0.5,
"learning_rate": 2.9256042831859295e-05,
"loss": 1.8056,
"step": 900
},
{
"epoch": 0.51,
"learning_rate": 2.9239560496616004e-05,
"loss": 1.6982,
"step": 910
},
{
"epoch": 0.51,
"learning_rate": 2.9222902320088882e-05,
"loss": 1.8193,
"step": 920
},
{
"epoch": 0.52,
"learning_rate": 2.9206068507986184e-05,
"loss": 1.6496,
"step": 930
},
{
"epoch": 0.53,
"learning_rate": 2.9189059268185057e-05,
"loss": 1.6039,
"step": 940
},
{
"epoch": 0.53,
"learning_rate": 2.9171874810728956e-05,
"loss": 1.6262,
"step": 950
},
{
"epoch": 0.54,
"learning_rate": 2.9154515347825065e-05,
"loss": 1.8033,
"step": 960
},
{
"epoch": 0.54,
"learning_rate": 2.913698109384168e-05,
"loss": 1.74,
"step": 970
},
{
"epoch": 0.55,
"learning_rate": 2.9119272265305546e-05,
"loss": 1.5725,
"step": 980
},
{
"epoch": 0.55,
"learning_rate": 2.9101389080899197e-05,
"loss": 1.8037,
"step": 990
},
{
"epoch": 0.56,
"learning_rate": 2.9083331761458247e-05,
"loss": 1.6694,
"step": 1000
},
{
"epoch": 0.56,
"learning_rate": 2.9065100529968675e-05,
"loss": 1.7803,
"step": 1010
},
{
"epoch": 0.57,
"learning_rate": 2.904669561156404e-05,
"loss": 1.8956,
"step": 1020
},
{
"epoch": 0.58,
"learning_rate": 2.902811723352275e-05,
"loss": 1.7192,
"step": 1030
},
{
"epoch": 0.58,
"learning_rate": 2.9009365625265215e-05,
"loss": 1.8229,
"step": 1040
},
{
"epoch": 0.59,
"learning_rate": 2.899044101835103e-05,
"loss": 1.5561,
"step": 1050
},
{
"epoch": 0.59,
"learning_rate": 2.8971343646476114e-05,
"loss": 1.5352,
"step": 1060
},
{
"epoch": 0.6,
"learning_rate": 2.8952073745469824e-05,
"loss": 1.8296,
"step": 1070
},
{
"epoch": 0.6,
"learning_rate": 2.893263155329204e-05,
"loss": 1.6643,
"step": 1080
},
{
"epoch": 0.61,
"learning_rate": 2.8913017310030236e-05,
"loss": 1.9615,
"step": 1090
},
{
"epoch": 0.61,
"learning_rate": 2.8893231257896502e-05,
"loss": 1.8237,
"step": 1100
},
{
"epoch": 0.62,
"learning_rate": 2.887327364122455e-05,
"loss": 1.8205,
"step": 1110
},
{
"epoch": 0.63,
"learning_rate": 2.8853144706466725e-05,
"loss": 1.6237,
"step": 1120
},
{
"epoch": 0.63,
"learning_rate": 2.883284470219093e-05,
"loss": 1.8204,
"step": 1130
},
{
"epoch": 0.64,
"learning_rate": 2.881237387907757e-05,
"loss": 1.903,
"step": 1140
},
{
"epoch": 0.64,
"learning_rate": 2.8791732489916455e-05,
"loss": 1.7786,
"step": 1150
},
{
"epoch": 0.65,
"learning_rate": 2.8770920789603687e-05,
"loss": 1.636,
"step": 1160
},
{
"epoch": 0.65,
"learning_rate": 2.874993903513849e-05,
"loss": 1.7493,
"step": 1170
},
{
"epoch": 0.66,
"learning_rate": 2.8728787485620068e-05,
"loss": 1.6591,
"step": 1180
},
{
"epoch": 0.67,
"learning_rate": 2.8707466402244362e-05,
"loss": 1.7473,
"step": 1190
},
{
"epoch": 0.67,
"learning_rate": 2.868597604830088e-05,
"loss": 1.5405,
"step": 1200
},
{
"epoch": 0.68,
"learning_rate": 2.866431668916939e-05,
"loss": 1.7913,
"step": 1210
},
{
"epoch": 0.68,
"learning_rate": 2.864248859231669e-05,
"loss": 1.6366,
"step": 1220
},
{
"epoch": 0.69,
"learning_rate": 2.8620492027293268e-05,
"loss": 1.8807,
"step": 1230
},
{
"epoch": 0.69,
"learning_rate": 2.859832726573e-05,
"loss": 1.8165,
"step": 1240
},
{
"epoch": 0.7,
"learning_rate": 2.857599458133478e-05,
"loss": 1.7687,
"step": 1250
},
{
"epoch": 0.7,
"learning_rate": 2.855349424988915e-05,
"loss": 1.6081,
"step": 1260
},
{
"epoch": 0.71,
"learning_rate": 2.8530826549244873e-05,
"loss": 1.6237,
"step": 1270
},
{
"epoch": 0.72,
"learning_rate": 2.8507991759320545e-05,
"loss": 1.8295,
"step": 1280
},
{
"epoch": 0.72,
"learning_rate": 2.848499016209809e-05,
"loss": 1.6061,
"step": 1290
},
{
"epoch": 0.73,
"learning_rate": 2.8461822041619312e-05,
"loss": 1.7933,
"step": 1300
},
{
"epoch": 0.73,
"learning_rate": 2.8438487683982364e-05,
"loss": 1.6181,
"step": 1310
},
{
"epoch": 0.74,
"learning_rate": 2.841498737733824e-05,
"loss": 1.7074,
"step": 1320
},
{
"epoch": 0.74,
"learning_rate": 2.839132141188719e-05,
"loss": 1.7032,
"step": 1330
},
{
"epoch": 0.75,
"learning_rate": 2.8367490079875154e-05,
"loss": 1.6583,
"step": 1340
},
{
"epoch": 0.75,
"learning_rate": 2.8343493675590156e-05,
"loss": 1.756,
"step": 1350
},
{
"epoch": 0.76,
"learning_rate": 2.831933249535865e-05,
"loss": 1.8585,
"step": 1360
},
{
"epoch": 0.77,
"learning_rate": 2.829500683754187e-05,
"loss": 1.832,
"step": 1370
},
{
"epoch": 0.77,
"learning_rate": 2.827051700253217e-05,
"loss": 1.505,
"step": 1380
},
{
"epoch": 0.78,
"learning_rate": 2.8245863292749266e-05,
"loss": 1.7991,
"step": 1390
},
{
"epoch": 0.78,
"learning_rate": 2.8221046012636558e-05,
"loss": 1.7818,
"step": 1400
},
{
"epoch": 0.79,
"learning_rate": 2.819606546865731e-05,
"loss": 1.6368,
"step": 1410
},
{
"epoch": 0.79,
"learning_rate": 2.817092196929091e-05,
"loss": 1.7841,
"step": 1420
},
{
"epoch": 0.8,
"learning_rate": 2.8145615825029062e-05,
"loss": 1.5794,
"step": 1430
},
{
"epoch": 0.81,
"learning_rate": 2.8120147348371912e-05,
"loss": 2.0193,
"step": 1440
},
{
"epoch": 0.81,
"learning_rate": 2.809451685382423e-05,
"loss": 1.7968,
"step": 1450
},
{
"epoch": 0.82,
"learning_rate": 2.8068724657891507e-05,
"loss": 1.7328,
"step": 1460
},
{
"epoch": 0.82,
"learning_rate": 2.8042771079076044e-05,
"loss": 1.8429,
"step": 1470
},
{
"epoch": 0.83,
"learning_rate": 2.801665643787303e-05,
"loss": 1.7821,
"step": 1480
},
{
"epoch": 0.83,
"learning_rate": 2.7990381056766583e-05,
"loss": 1.7093,
"step": 1490
},
{
"epoch": 0.84,
"learning_rate": 2.7963945260225748e-05,
"loss": 1.7474,
"step": 1500
},
{
"epoch": 0.84,
"learning_rate": 2.7937349374700527e-05,
"loss": 1.6828,
"step": 1510
},
{
"epoch": 0.85,
"learning_rate": 2.7910593728617813e-05,
"loss": 1.7136,
"step": 1520
},
{
"epoch": 0.86,
"learning_rate": 2.7883678652377348e-05,
"loss": 1.9265,
"step": 1530
},
{
"epoch": 0.86,
"learning_rate": 2.7856604478347655e-05,
"loss": 1.7746,
"step": 1540
},
{
"epoch": 0.87,
"learning_rate": 2.7829371540861896e-05,
"loss": 1.9113,
"step": 1550
},
{
"epoch": 0.87,
"learning_rate": 2.7801980176213798e-05,
"loss": 1.7417,
"step": 1560
},
{
"epoch": 0.88,
"learning_rate": 2.7774430722653455e-05,
"loss": 1.6811,
"step": 1570
},
{
"epoch": 0.88,
"learning_rate": 2.7746723520383174e-05,
"loss": 1.6879,
"step": 1580
},
{
"epoch": 0.89,
"learning_rate": 2.771885891155326e-05,
"loss": 1.8756,
"step": 1590
},
{
"epoch": 0.89,
"learning_rate": 2.76908372402578e-05,
"loss": 1.6813,
"step": 1600
},
{
"epoch": 0.9,
"learning_rate": 2.7662658852530412e-05,
"loss": 1.7613,
"step": 1610
},
{
"epoch": 0.91,
"learning_rate": 2.763432409633998e-05,
"loss": 1.801,
"step": 1620
},
{
"epoch": 0.91,
"learning_rate": 2.760583332158634e-05,
"loss": 1.8535,
"step": 1630
},
{
"epoch": 0.92,
"learning_rate": 2.7577186880095966e-05,
"loss": 1.6062,
"step": 1640
},
{
"epoch": 0.92,
"learning_rate": 2.7548385125617626e-05,
"loss": 1.6124,
"step": 1650
},
{
"epoch": 0.93,
"learning_rate": 2.7519428413818034e-05,
"loss": 1.7667,
"step": 1660
},
{
"epoch": 0.93,
"learning_rate": 2.7490317102277412e-05,
"loss": 1.9633,
"step": 1670
},
{
"epoch": 0.94,
"learning_rate": 2.7461051550485116e-05,
"loss": 1.734,
"step": 1680
},
{
"epoch": 0.94,
"learning_rate": 2.7431632119835187e-05,
"loss": 1.5784,
"step": 1690
},
{
"epoch": 0.95,
"learning_rate": 2.740205917362186e-05,
"loss": 1.6318,
"step": 1700
},
{
"epoch": 0.96,
"learning_rate": 2.7372333077035128e-05,
"loss": 1.6347,
"step": 1710
},
{
"epoch": 0.96,
"learning_rate": 2.7342454197156194e-05,
"loss": 1.7411,
"step": 1720
},
{
"epoch": 0.97,
"learning_rate": 2.7312422902952934e-05,
"loss": 1.5086,
"step": 1730
},
{
"epoch": 0.97,
"learning_rate": 2.728223956527539e-05,
"loss": 1.6674,
"step": 1740
},
{
"epoch": 0.98,
"learning_rate": 2.7251904556851125e-05,
"loss": 1.6486,
"step": 1750
},
{
"epoch": 0.98,
"learning_rate": 2.722141825228066e-05,
"loss": 1.6771,
"step": 1760
},
{
"epoch": 0.99,
"learning_rate": 2.719078102803286e-05,
"loss": 1.7241,
"step": 1770
},
{
"epoch": 1.0,
"learning_rate": 2.7159993262440228e-05,
"loss": 1.9123,
"step": 1780
},
{
"epoch": 1.0,
"learning_rate": 2.7129055335694296e-05,
"loss": 1.8134,
"step": 1790
},
{
"epoch": 1.01,
"learning_rate": 2.7097967629840906e-05,
"loss": 1.4213,
"step": 1800
},
{
"epoch": 1.01,
"learning_rate": 2.706673052877547e-05,
"loss": 1.5441,
"step": 1810
},
{
"epoch": 1.02,
"learning_rate": 2.703534441823827e-05,
"loss": 1.4331,
"step": 1820
},
{
"epoch": 1.02,
"learning_rate": 2.7003809685809657e-05,
"loss": 1.4868,
"step": 1830
},
{
"epoch": 1.03,
"learning_rate": 2.6972126720905293e-05,
"loss": 1.3508,
"step": 1840
},
{
"epoch": 1.03,
"learning_rate": 2.694029591477133e-05,
"loss": 1.5357,
"step": 1850
},
{
"epoch": 1.04,
"learning_rate": 2.6908317660479583e-05,
"loss": 1.5517,
"step": 1860
},
{
"epoch": 1.05,
"learning_rate": 2.6876192352922654e-05,
"loss": 1.404,
"step": 1870
},
{
"epoch": 1.05,
"learning_rate": 2.68439203888091e-05,
"loss": 1.5658,
"step": 1880
},
{
"epoch": 1.06,
"learning_rate": 2.6811502166658487e-05,
"loss": 1.3443,
"step": 1890
},
{
"epoch": 1.06,
"learning_rate": 2.6778938086796512e-05,
"loss": 1.4317,
"step": 1900
},
{
"epoch": 1.07,
"learning_rate": 2.6746228551350013e-05,
"loss": 1.5423,
"step": 1910
},
{
"epoch": 1.07,
"learning_rate": 2.6713373964242043e-05,
"loss": 1.5985,
"step": 1920
},
{
"epoch": 1.08,
"learning_rate": 2.668037473118687e-05,
"loss": 1.4155,
"step": 1930
},
{
"epoch": 1.08,
"learning_rate": 2.6647231259684946e-05,
"loss": 1.3675,
"step": 1940
},
{
"epoch": 1.09,
"learning_rate": 2.6613943959017917e-05,
"loss": 1.5824,
"step": 1950
},
{
"epoch": 1.1,
"learning_rate": 2.6580513240243524e-05,
"loss": 1.4854,
"step": 1960
},
{
"epoch": 1.1,
"learning_rate": 2.654693951619056e-05,
"loss": 1.4632,
"step": 1970
},
{
"epoch": 1.11,
"learning_rate": 2.651322320145375e-05,
"loss": 1.6141,
"step": 1980
},
{
"epoch": 1.11,
"learning_rate": 2.6479364712388652e-05,
"loss": 1.4791,
"step": 1990
},
{
"epoch": 1.12,
"learning_rate": 2.6445364467106494e-05,
"loss": 1.3545,
"step": 2000
},
{
"epoch": 1.12,
"learning_rate": 2.6411222885469025e-05,
"loss": 1.4002,
"step": 2010
},
{
"epoch": 1.13,
"learning_rate": 2.637694038908333e-05,
"loss": 1.4607,
"step": 2020
},
{
"epoch": 1.13,
"learning_rate": 2.6342517401296618e-05,
"loss": 1.3421,
"step": 2030
},
{
"epoch": 1.14,
"learning_rate": 2.630795434719099e-05,
"loss": 1.5387,
"step": 2040
},
{
"epoch": 1.15,
"learning_rate": 2.6273251653578202e-05,
"loss": 1.4617,
"step": 2050
},
{
"epoch": 1.15,
"learning_rate": 2.623840974899439e-05,
"loss": 1.2332,
"step": 2060
},
{
"epoch": 1.16,
"learning_rate": 2.620342906369477e-05,
"loss": 1.402,
"step": 2070
},
{
"epoch": 1.16,
"learning_rate": 2.616831002964834e-05,
"loss": 1.512,
"step": 2080
},
{
"epoch": 1.17,
"learning_rate": 2.613305308053253e-05,
"loss": 1.5325,
"step": 2090
},
{
"epoch": 1.17,
"learning_rate": 2.609765865172786e-05,
"loss": 1.5493,
"step": 2100
},
{
"epoch": 1.18,
"learning_rate": 2.6062127180312545e-05,
"loss": 1.4947,
"step": 2110
},
{
"epoch": 1.19,
"learning_rate": 2.6026459105057127e-05,
"loss": 1.4473,
"step": 2120
},
{
"epoch": 1.19,
"learning_rate": 2.599065486641902e-05,
"loss": 1.5214,
"step": 2130
},
{
"epoch": 1.2,
"learning_rate": 2.5954714906537116e-05,
"loss": 1.4677,
"step": 2140
},
{
"epoch": 1.2,
"learning_rate": 2.591863966922627e-05,
"loss": 1.5503,
"step": 2150
},
{
"epoch": 1.21,
"learning_rate": 2.5882429599971872e-05,
"loss": 1.4827,
"step": 2160
},
{
"epoch": 1.21,
"learning_rate": 2.5846085145924314e-05,
"loss": 1.6534,
"step": 2170
},
{
"epoch": 1.22,
"learning_rate": 2.580960675589347e-05,
"loss": 1.5245,
"step": 2180
},
{
"epoch": 1.22,
"learning_rate": 2.577299488034318e-05,
"loss": 1.3599,
"step": 2190
},
{
"epoch": 1.23,
"learning_rate": 2.573624997138565e-05,
"loss": 1.4521,
"step": 2200
},
{
"epoch": 1.24,
"learning_rate": 2.569937248277589e-05,
"loss": 1.5231,
"step": 2210
},
{
"epoch": 1.24,
"learning_rate": 2.5662362869906123e-05,
"loss": 1.4871,
"step": 2220
},
{
"epoch": 1.25,
"learning_rate": 2.562522158980012e-05,
"loss": 1.3648,
"step": 2230
},
{
"epoch": 1.25,
"learning_rate": 2.558794910110761e-05,
"loss": 1.4033,
"step": 2240
},
{
"epoch": 1.26,
"learning_rate": 2.5550545864098562e-05,
"loss": 1.306,
"step": 2250
},
{
"epoch": 1.26,
"learning_rate": 2.5513012340657557e-05,
"loss": 1.3426,
"step": 2260
},
{
"epoch": 1.27,
"learning_rate": 2.5475348994278025e-05,
"loss": 1.4083,
"step": 2270
},
{
"epoch": 1.27,
"learning_rate": 2.5437556290056575e-05,
"loss": 1.385,
"step": 2280
},
{
"epoch": 1.28,
"learning_rate": 2.5399634694687224e-05,
"loss": 1.3602,
"step": 2290
},
{
"epoch": 1.29,
"learning_rate": 2.5361584676455627e-05,
"loss": 1.351,
"step": 2300
},
{
"epoch": 1.29,
"learning_rate": 2.5323406705233308e-05,
"loss": 1.2498,
"step": 2310
},
{
"epoch": 1.3,
"learning_rate": 2.5285101252471874e-05,
"loss": 1.4985,
"step": 2320
},
{
"epoch": 1.3,
"learning_rate": 2.5246668791197148e-05,
"loss": 1.3095,
"step": 2330
},
{
"epoch": 1.31,
"learning_rate": 2.5208109796003364e-05,
"loss": 1.3414,
"step": 2340
},
{
"epoch": 1.31,
"learning_rate": 2.5169424743047306e-05,
"loss": 1.5398,
"step": 2350
},
{
"epoch": 1.32,
"learning_rate": 2.5130614110042415e-05,
"loss": 1.469,
"step": 2360
},
{
"epoch": 1.32,
"learning_rate": 2.5091678376252874e-05,
"loss": 1.453,
"step": 2370
},
{
"epoch": 1.33,
"learning_rate": 2.5052618022487733e-05,
"loss": 1.4264,
"step": 2380
},
{
"epoch": 1.34,
"learning_rate": 2.5013433531094934e-05,
"loss": 1.5988,
"step": 2390
},
{
"epoch": 1.34,
"learning_rate": 2.4974125385955374e-05,
"loss": 1.3332,
"step": 2400
},
{
"epoch": 1.35,
"learning_rate": 2.4934694072476913e-05,
"loss": 1.38,
"step": 2410
},
{
"epoch": 1.35,
"learning_rate": 2.4895140077588412e-05,
"loss": 1.5836,
"step": 2420
},
{
"epoch": 1.36,
"learning_rate": 2.4855463889733682e-05,
"loss": 1.4058,
"step": 2430
},
{
"epoch": 1.36,
"learning_rate": 2.481566599886546e-05,
"loss": 1.3771,
"step": 2440
},
{
"epoch": 1.37,
"learning_rate": 2.47757468964394e-05,
"loss": 1.2675,
"step": 2450
},
{
"epoch": 1.38,
"learning_rate": 2.473570707540793e-05,
"loss": 1.4936,
"step": 2460
},
{
"epoch": 1.38,
"learning_rate": 2.4695547030214237e-05,
"loss": 1.5591,
"step": 2470
},
{
"epoch": 1.39,
"learning_rate": 2.4655267256786126e-05,
"loss": 1.4071,
"step": 2480
},
{
"epoch": 1.39,
"learning_rate": 2.4614868252529886e-05,
"loss": 1.5266,
"step": 2490
},
{
"epoch": 1.4,
"learning_rate": 2.4574350516324176e-05,
"loss": 1.4467,
"step": 2500
},
{
"epoch": 1.4,
"learning_rate": 2.453371454851384e-05,
"loss": 1.415,
"step": 2510
},
{
"epoch": 1.41,
"learning_rate": 2.4492960850903757e-05,
"loss": 1.386,
"step": 2520
},
{
"epoch": 1.41,
"learning_rate": 2.4452089926752597e-05,
"loss": 1.4706,
"step": 2530
},
{
"epoch": 1.42,
"learning_rate": 2.4411102280766658e-05,
"loss": 1.4687,
"step": 2540
},
{
"epoch": 1.43,
"learning_rate": 2.436999841909361e-05,
"loss": 1.5284,
"step": 2550
},
{
"epoch": 1.43,
"learning_rate": 2.432877884931623e-05,
"loss": 1.2576,
"step": 2560
},
{
"epoch": 1.44,
"learning_rate": 2.428744408044617e-05,
"loss": 1.4134,
"step": 2570
},
{
"epoch": 1.44,
"learning_rate": 2.4245994622917636e-05,
"loss": 1.3715,
"step": 2580
},
{
"epoch": 1.45,
"learning_rate": 2.4204430988581104e-05,
"loss": 1.642,
"step": 2590
},
{
"epoch": 1.45,
"learning_rate": 2.4162753690696998e-05,
"loss": 1.4369,
"step": 2600
},
{
"epoch": 1.46,
"learning_rate": 2.4120963243929348e-05,
"loss": 1.3596,
"step": 2610
},
{
"epoch": 1.46,
"learning_rate": 2.4079060164339427e-05,
"loss": 1.4818,
"step": 2620
},
{
"epoch": 1.47,
"learning_rate": 2.4037044969379397e-05,
"loss": 1.5069,
"step": 2630
},
{
"epoch": 1.48,
"learning_rate": 2.3994918177885906e-05,
"loss": 1.467,
"step": 2640
},
{
"epoch": 1.48,
"learning_rate": 2.3952680310073684e-05,
"loss": 1.403,
"step": 2650
},
{
"epoch": 1.49,
"learning_rate": 2.391033188752911e-05,
"loss": 1.4376,
"step": 2660
},
{
"epoch": 1.49,
"learning_rate": 2.386787343320379e-05,
"loss": 1.3847,
"step": 2670
},
{
"epoch": 1.5,
"learning_rate": 2.38253054714081e-05,
"loss": 1.4773,
"step": 2680
},
{
"epoch": 1.5,
"learning_rate": 2.3782628527804666e-05,
"loss": 1.6497,
"step": 2690
},
{
"epoch": 1.51,
"learning_rate": 2.3739843129401932e-05,
"loss": 1.2788,
"step": 2700
},
{
"epoch": 1.52,
"learning_rate": 2.3696949804547632e-05,
"loss": 1.5292,
"step": 2710
},
{
"epoch": 1.52,
"learning_rate": 2.365394908292224e-05,
"loss": 1.3128,
"step": 2720
},
{
"epoch": 1.53,
"learning_rate": 2.361084149553247e-05,
"loss": 1.5073,
"step": 2730
},
{
"epoch": 1.53,
"learning_rate": 2.356762757470468e-05,
"loss": 1.5516,
"step": 2740
},
{
"epoch": 1.54,
"learning_rate": 2.3524307854078337e-05,
"loss": 1.5336,
"step": 2750
},
{
"epoch": 1.54,
"learning_rate": 2.3480882868599383e-05,
"loss": 1.5017,
"step": 2760
},
{
"epoch": 1.55,
"learning_rate": 2.3437353154513675e-05,
"loss": 1.4366,
"step": 2770
},
{
"epoch": 1.55,
"learning_rate": 2.3393719249360335e-05,
"loss": 1.3542,
"step": 2780
},
{
"epoch": 1.56,
"learning_rate": 2.3349981691965104e-05,
"loss": 1.3029,
"step": 2790
},
{
"epoch": 1.57,
"learning_rate": 2.3306141022433728e-05,
"loss": 1.5828,
"step": 2800
},
{
"epoch": 1.57,
"learning_rate": 2.3262197782145237e-05,
"loss": 1.3533,
"step": 2810
},
{
"epoch": 1.58,
"learning_rate": 2.3218152513745306e-05,
"loss": 1.624,
"step": 2820
},
{
"epoch": 1.58,
"learning_rate": 2.3174005761139516e-05,
"loss": 1.399,
"step": 2830
},
{
"epoch": 1.59,
"learning_rate": 2.3129758069486665e-05,
"loss": 1.4804,
"step": 2840
},
{
"epoch": 1.59,
"learning_rate": 2.308540998519203e-05,
"loss": 1.4307,
"step": 2850
},
{
"epoch": 1.6,
"learning_rate": 2.3040962055900598e-05,
"loss": 1.1812,
"step": 2860
},
{
"epoch": 1.6,
"learning_rate": 2.299641483049034e-05,
"loss": 1.4941,
"step": 2870
},
{
"epoch": 1.61,
"learning_rate": 2.2951768859065405e-05,
"loss": 1.5981,
"step": 2880
},
{
"epoch": 1.62,
"learning_rate": 2.2907024692949328e-05,
"loss": 1.4082,
"step": 2890
},
{
"epoch": 1.62,
"learning_rate": 2.2862182884678243e-05,
"loss": 1.5103,
"step": 2900
},
{
"epoch": 1.63,
"learning_rate": 2.2817243987994043e-05,
"loss": 1.5133,
"step": 2910
},
{
"epoch": 1.63,
"learning_rate": 2.2772208557837526e-05,
"loss": 1.3188,
"step": 2920
},
{
"epoch": 1.64,
"learning_rate": 2.2727077150341594e-05,
"loss": 1.4041,
"step": 2930
},
{
"epoch": 1.64,
"learning_rate": 2.2681850322824324e-05,
"loss": 1.3602,
"step": 2940
},
{
"epoch": 1.65,
"learning_rate": 2.2636528633782136e-05,
"loss": 1.4814,
"step": 2950
},
{
"epoch": 1.65,
"learning_rate": 2.2591112642882853e-05,
"loss": 1.5465,
"step": 2960
},
{
"epoch": 1.66,
"learning_rate": 2.254560291095883e-05,
"loss": 1.595,
"step": 2970
},
{
"epoch": 1.67,
"learning_rate": 2.25e-05,
"loss": 1.3574,
"step": 2980
},
{
"epoch": 1.67,
"learning_rate": 2.2454304473146947e-05,
"loss": 1.3198,
"step": 2990
},
{
"epoch": 1.68,
"learning_rate": 2.2408516894683952e-05,
"loss": 1.5806,
"step": 3000
},
{
"epoch": 1.68,
"learning_rate": 2.2362637830032016e-05,
"loss": 1.4193,
"step": 3010
},
{
"epoch": 1.69,
"learning_rate": 2.2316667845741885e-05,
"loss": 1.3187,
"step": 3020
},
{
"epoch": 1.69,
"learning_rate": 2.2270607509487047e-05,
"loss": 1.4614,
"step": 3030
},
{
"epoch": 1.7,
"learning_rate": 2.2224457390056747e-05,
"loss": 1.433,
"step": 3040
},
{
"epoch": 1.71,
"learning_rate": 2.2178218057348914e-05,
"loss": 1.5312,
"step": 3050
},
{
"epoch": 1.71,
"learning_rate": 2.2131890082363176e-05,
"loss": 1.5062,
"step": 3060
},
{
"epoch": 1.72,
"learning_rate": 2.2085474037193775e-05,
"loss": 1.3538,
"step": 3070
},
{
"epoch": 1.72,
"learning_rate": 2.2038970495022515e-05,
"loss": 1.3529,
"step": 3080
},
{
"epoch": 1.73,
"learning_rate": 2.199238003011168e-05,
"loss": 1.5783,
"step": 3090
},
{
"epoch": 1.73,
"learning_rate": 2.194570321779695e-05,
"loss": 1.2123,
"step": 3100
},
{
"epoch": 1.74,
"learning_rate": 2.1898940634480282e-05,
"loss": 1.5476,
"step": 3110
},
{
"epoch": 1.74,
"learning_rate": 2.185209285762281e-05,
"loss": 1.2515,
"step": 3120
},
{
"epoch": 1.75,
"learning_rate": 2.1805160465737695e-05,
"loss": 1.4989,
"step": 3130
},
{
"epoch": 1.76,
"learning_rate": 2.1758144038383e-05,
"loss": 1.4593,
"step": 3140
},
{
"epoch": 1.76,
"learning_rate": 2.171104415615452e-05,
"loss": 1.3734,
"step": 3150
},
{
"epoch": 1.77,
"learning_rate": 2.166386140067861e-05,
"loss": 1.4711,
"step": 3160
},
{
"epoch": 1.77,
"learning_rate": 2.1616596354605025e-05,
"loss": 1.6208,
"step": 3170
},
{
"epoch": 1.78,
"learning_rate": 2.1569249601599697e-05,
"loss": 1.5184,
"step": 3180
},
{
"epoch": 1.78,
"learning_rate": 2.1521821726337537e-05,
"loss": 1.3588,
"step": 3190
},
{
"epoch": 1.79,
"learning_rate": 2.147431331449522e-05,
"loss": 1.2866,
"step": 3200
},
{
"epoch": 1.79,
"learning_rate": 2.1426724952743962e-05,
"loss": 1.2663,
"step": 3210
},
{
"epoch": 1.8,
"learning_rate": 2.1379057228742246e-05,
"loss": 1.4232,
"step": 3220
},
{
"epoch": 1.81,
"learning_rate": 2.1331310731128595e-05,
"loss": 1.4431,
"step": 3230
},
{
"epoch": 1.81,
"learning_rate": 2.128348604951428e-05,
"loss": 1.2133,
"step": 3240
},
{
"epoch": 1.82,
"learning_rate": 2.1235583774476055e-05,
"loss": 1.3886,
"step": 3250
},
{
"epoch": 1.82,
"learning_rate": 2.1187604497548862e-05,
"loss": 1.4493,
"step": 3260
},
{
"epoch": 1.83,
"learning_rate": 2.1139548811218522e-05,
"loss": 1.4091,
"step": 3270
},
{
"epoch": 1.83,
"learning_rate": 2.1091417308914406e-05,
"loss": 1.529,
"step": 3280
},
{
"epoch": 1.84,
"learning_rate": 2.1043210585002142e-05,
"loss": 1.4315,
"step": 3290
},
{
"epoch": 1.84,
"learning_rate": 2.0994929234776232e-05,
"loss": 1.4574,
"step": 3300
},
{
"epoch": 1.85,
"learning_rate": 2.0946573854452746e-05,
"loss": 1.6205,
"step": 3310
},
{
"epoch": 1.86,
"learning_rate": 2.089814504116191e-05,
"loss": 1.4905,
"step": 3320
},
{
"epoch": 1.86,
"learning_rate": 2.084964339294078e-05,
"loss": 1.4739,
"step": 3330
},
{
"epoch": 1.87,
"learning_rate": 2.0801069508725815e-05,
"loss": 1.3977,
"step": 3340
},
{
"epoch": 1.87,
"learning_rate": 2.0752423988345518e-05,
"loss": 1.3952,
"step": 3350
},
{
"epoch": 1.88,
"learning_rate": 2.0703707432513006e-05,
"loss": 1.2229,
"step": 3360
},
{
"epoch": 1.88,
"learning_rate": 2.0654920442818595e-05,
"loss": 1.2975,
"step": 3370
},
{
"epoch": 1.89,
"learning_rate": 2.0606063621722374e-05,
"loss": 1.44,
"step": 3380
},
{
"epoch": 1.9,
"learning_rate": 2.0557137572546765e-05,
"loss": 1.5033,
"step": 3390
},
{
"epoch": 1.9,
"learning_rate": 2.050814289946908e-05,
"loss": 1.4125,
"step": 3400
},
{
"epoch": 1.91,
"learning_rate": 2.0459080207514043e-05,
"loss": 1.3961,
"step": 3410
},
{
"epoch": 1.91,
"learning_rate": 2.0409950102546334e-05,
"loss": 1.4267,
"step": 3420
},
{
"epoch": 1.92,
"learning_rate": 2.0360753191263094e-05,
"loss": 1.3989,
"step": 3430
},
{
"epoch": 1.92,
"learning_rate": 2.0311490081186458e-05,
"loss": 1.5078,
"step": 3440
},
{
"epoch": 1.93,
"learning_rate": 2.0262161380656014e-05,
"loss": 1.4087,
"step": 3450
},
{
"epoch": 1.93,
"learning_rate": 2.021276769882133e-05,
"loss": 1.5574,
"step": 3460
},
{
"epoch": 1.94,
"learning_rate": 2.016330964563441e-05,
"loss": 1.441,
"step": 3470
},
{
"epoch": 1.95,
"learning_rate": 2.0113787831842155e-05,
"loss": 1.3556,
"step": 3480
},
{
"epoch": 1.95,
"learning_rate": 2.0064202868978848e-05,
"loss": 1.2099,
"step": 3490
},
{
"epoch": 1.96,
"learning_rate": 2.0014555369358576e-05,
"loss": 1.4836,
"step": 3500
},
{
"epoch": 1.96,
"learning_rate": 1.9964845946067682e-05,
"loss": 1.5642,
"step": 3510
},
{
"epoch": 1.97,
"learning_rate": 1.9915075212957192e-05,
"loss": 1.4846,
"step": 3520
},
{
"epoch": 1.97,
"learning_rate": 1.9865243784635234e-05,
"loss": 1.4596,
"step": 3530
},
{
"epoch": 1.98,
"learning_rate": 1.9815352276459447e-05,
"loss": 1.4857,
"step": 3540
},
{
"epoch": 1.98,
"learning_rate": 1.976540130452938e-05,
"loss": 1.2959,
"step": 3550
},
{
"epoch": 1.99,
"learning_rate": 1.9715391485678893e-05,
"loss": 1.4679,
"step": 3560
},
{
"epoch": 2.0,
"learning_rate": 1.966532343746854e-05,
"loss": 1.5293,
"step": 3570
},
{
"epoch": 2.0,
"learning_rate": 1.961519777817791e-05,
"loss": 1.1697,
"step": 3580
},
{
"epoch": 2.01,
"learning_rate": 1.956501512679805e-05,
"loss": 1.0897,
"step": 3590
},
{
"epoch": 2.01,
"learning_rate": 1.9514776103023783e-05,
"loss": 0.9986,
"step": 3600
},
{
"epoch": 2.02,
"learning_rate": 1.9464481327246044e-05,
"loss": 1.1953,
"step": 3610
},
{
"epoch": 2.02,
"learning_rate": 1.9414131420544258e-05,
"loss": 1.2769,
"step": 3620
},
{
"epoch": 2.03,
"learning_rate": 1.9363727004678644e-05,
"loss": 1.0053,
"step": 3630
},
{
"epoch": 2.03,
"learning_rate": 1.9313268702082536e-05,
"loss": 0.893,
"step": 3640
},
{
"epoch": 2.04,
"learning_rate": 1.9262757135854714e-05,
"loss": 1.266,
"step": 3650
},
{
"epoch": 2.05,
"learning_rate": 1.921219292975169e-05,
"loss": 0.9813,
"step": 3660
},
{
"epoch": 2.05,
"learning_rate": 1.916157670818003e-05,
"loss": 1.2662,
"step": 3670
},
{
"epoch": 2.06,
"learning_rate": 1.9110909096188606e-05,
"loss": 1.0087,
"step": 3680
},
{
"epoch": 2.06,
"learning_rate": 1.9060190719460912e-05,
"loss": 0.9801,
"step": 3690
},
{
"epoch": 2.07,
"learning_rate": 1.900942220430733e-05,
"loss": 1.0824,
"step": 3700
},
{
"epoch": 2.07,
"learning_rate": 1.8958604177657374e-05,
"loss": 1.1608,
"step": 3710
},
{
"epoch": 2.08,
"learning_rate": 1.890773726705198e-05,
"loss": 1.2136,
"step": 3720
},
{
"epoch": 2.09,
"learning_rate": 1.885682210063574e-05,
"loss": 1.0815,
"step": 3730
},
{
"epoch": 2.09,
"learning_rate": 1.880585930714914e-05,
"loss": 1.2484,
"step": 3740
},
{
"epoch": 2.1,
"learning_rate": 1.8754849515920798e-05,
"loss": 1.1277,
"step": 3750
},
{
"epoch": 2.1,
"learning_rate": 1.8703793356859717e-05,
"loss": 0.9475,
"step": 3760
},
{
"epoch": 2.11,
"learning_rate": 1.8652691460447466e-05,
"loss": 1.1215,
"step": 3770
},
{
"epoch": 2.11,
"learning_rate": 1.8601544457730426e-05,
"loss": 1.1936,
"step": 3780
},
{
"epoch": 2.12,
"learning_rate": 1.855035298031198e-05,
"loss": 0.9637,
"step": 3790
},
{
"epoch": 2.12,
"learning_rate": 1.849911766034472e-05,
"loss": 0.9681,
"step": 3800
},
{
"epoch": 2.13,
"learning_rate": 1.8447839130522654e-05,
"loss": 1.0851,
"step": 3810
},
{
"epoch": 2.14,
"learning_rate": 1.8396518024073356e-05,
"loss": 1.0022,
"step": 3820
},
{
"epoch": 2.14,
"learning_rate": 1.834515497475019e-05,
"loss": 0.8679,
"step": 3830
},
{
"epoch": 2.15,
"learning_rate": 1.8293750616824443e-05,
"loss": 0.9723,
"step": 3840
},
{
"epoch": 2.15,
"learning_rate": 1.824230558507754e-05,
"loss": 1.1052,
"step": 3850
},
{
"epoch": 2.16,
"learning_rate": 1.819082051479315e-05,
"loss": 1.0025,
"step": 3860
},
{
"epoch": 2.16,
"learning_rate": 1.81392960417494e-05,
"loss": 0.9817,
"step": 3870
},
{
"epoch": 2.17,
"learning_rate": 1.8087732802210956e-05,
"loss": 0.9456,
"step": 3880
},
{
"epoch": 2.17,
"learning_rate": 1.8036131432921236e-05,
"loss": 1.221,
"step": 3890
},
{
"epoch": 2.18,
"learning_rate": 1.7984492571094497e-05,
"loss": 1.0591,
"step": 3900
},
{
"epoch": 2.19,
"learning_rate": 1.7932816854407988e-05,
"loss": 1.12,
"step": 3910
},
{
"epoch": 2.19,
"learning_rate": 1.788110492099407e-05,
"loss": 0.8899,
"step": 3920
},
{
"epoch": 2.2,
"learning_rate": 1.7829357409432347e-05,
"loss": 1.1492,
"step": 3930
},
{
"epoch": 2.2,
"learning_rate": 1.777757495874174e-05,
"loss": 0.9886,
"step": 3940
},
{
"epoch": 2.21,
"learning_rate": 1.7725758208372657e-05,
"loss": 1.1356,
"step": 3950
},
{
"epoch": 2.21,
"learning_rate": 1.7673907798199054e-05,
"loss": 1.1823,
"step": 3960
},
{
"epoch": 2.22,
"learning_rate": 1.7622024368510548e-05,
"loss": 1.0527,
"step": 3970
},
{
"epoch": 2.23,
"learning_rate": 1.75701085600045e-05,
"loss": 1.1962,
"step": 3980
},
{
"epoch": 2.23,
"learning_rate": 1.7518161013778113e-05,
"loss": 1.139,
"step": 3990
},
{
"epoch": 2.24,
"learning_rate": 1.7466182371320518e-05,
"loss": 1.0645,
"step": 4000
},
{
"epoch": 2.24,
"learning_rate": 1.741417327450483e-05,
"loss": 1.2509,
"step": 4010
},
{
"epoch": 2.25,
"learning_rate": 1.7362134365580268e-05,
"loss": 0.8846,
"step": 4020
},
{
"epoch": 2.25,
"learning_rate": 1.7310066287164165e-05,
"loss": 1.23,
"step": 4030
},
{
"epoch": 2.26,
"learning_rate": 1.7257969682234073e-05,
"loss": 1.088,
"step": 4040
},
{
"epoch": 2.26,
"learning_rate": 1.7205845194119813e-05,
"loss": 1.0684,
"step": 4050
},
{
"epoch": 2.27,
"learning_rate": 1.7153693466495538e-05,
"loss": 1.0745,
"step": 4060
},
{
"epoch": 2.28,
"learning_rate": 1.7101515143371748e-05,
"loss": 0.9667,
"step": 4070
},
{
"epoch": 2.28,
"learning_rate": 1.7049310869087402e-05,
"loss": 1.0551,
"step": 4080
},
{
"epoch": 2.29,
"learning_rate": 1.6997081288301895e-05,
"loss": 1.1669,
"step": 4090
},
{
"epoch": 2.29,
"learning_rate": 1.6944827045987148e-05,
"loss": 1.052,
"step": 4100
},
{
"epoch": 2.3,
"learning_rate": 1.6892548787419598e-05,
"loss": 1.0022,
"step": 4110
},
{
"epoch": 2.3,
"learning_rate": 1.6840247158172277e-05,
"loss": 1.1195,
"step": 4120
},
{
"epoch": 2.31,
"learning_rate": 1.67879228041068e-05,
"loss": 1.2091,
"step": 4130
},
{
"epoch": 2.31,
"learning_rate": 1.673557637136542e-05,
"loss": 1.2096,
"step": 4140
},
{
"epoch": 2.32,
"learning_rate": 1.6683208506363023e-05,
"loss": 1.0987,
"step": 4150
},
{
"epoch": 2.33,
"learning_rate": 1.663081985577916e-05,
"loss": 1.0734,
"step": 4160
},
{
"epoch": 2.33,
"learning_rate": 1.6578411066550065e-05,
"loss": 1.1041,
"step": 4170
},
{
"epoch": 2.34,
"learning_rate": 1.652598278586065e-05,
"loss": 1.0299,
"step": 4180
},
{
"epoch": 2.34,
"learning_rate": 1.647353566113653e-05,
"loss": 0.9826,
"step": 4190
},
{
"epoch": 2.35,
"learning_rate": 1.6421070340036026e-05,
"loss": 1.1207,
"step": 4200
},
{
"epoch": 2.35,
"learning_rate": 1.6368587470442145e-05,
"loss": 1.0136,
"step": 4210
},
{
"epoch": 2.36,
"learning_rate": 1.631608770045461e-05,
"loss": 0.936,
"step": 4220
},
{
"epoch": 2.36,
"learning_rate": 1.626357167838185e-05,
"loss": 0.923,
"step": 4230
},
{
"epoch": 2.37,
"learning_rate": 1.6211040052732958e-05,
"loss": 1.0497,
"step": 4240
},
{
"epoch": 2.38,
"learning_rate": 1.6158493472209747e-05,
"loss": 1.2218,
"step": 4250
},
{
"epoch": 2.38,
"learning_rate": 1.6105932585698687e-05,
"loss": 0.9943,
"step": 4260
},
{
"epoch": 2.39,
"learning_rate": 1.605335804226291e-05,
"loss": 1.3231,
"step": 4270
},
{
"epoch": 2.39,
"learning_rate": 1.6000770491134195e-05,
"loss": 0.9965,
"step": 4280
},
{
"epoch": 2.4,
"learning_rate": 1.5948170581704954e-05,
"loss": 1.0948,
"step": 4290
},
{
"epoch": 2.4,
"learning_rate": 1.5895558963520207e-05,
"loss": 1.1235,
"step": 4300
},
{
"epoch": 2.41,
"learning_rate": 1.5842936286269562e-05,
"loss": 1.159,
"step": 4310
},
{
"epoch": 2.42,
"learning_rate": 1.5790303199779194e-05,
"loss": 1.2239,
"step": 4320
},
{
"epoch": 2.42,
"learning_rate": 1.5737660354003818e-05,
"loss": 1.1273,
"step": 4330
},
{
"epoch": 2.43,
"learning_rate": 1.568500839901867e-05,
"loss": 1.1047,
"step": 4340
},
{
"epoch": 2.43,
"learning_rate": 1.5632347985011458e-05,
"loss": 1.174,
"step": 4350
},
{
"epoch": 2.44,
"learning_rate": 1.5579679762274377e-05,
"loss": 1.0841,
"step": 4360
},
{
"epoch": 2.44,
"learning_rate": 1.552700438119601e-05,
"loss": 0.9872,
"step": 4370
},
{
"epoch": 2.45,
"learning_rate": 1.5474322492253382e-05,
"loss": 1.2699,
"step": 4380
},
{
"epoch": 2.45,
"learning_rate": 1.5421634746003853e-05,
"loss": 1.2508,
"step": 4390
},
{
"epoch": 2.46,
"learning_rate": 1.5368941793077115e-05,
"loss": 0.9948,
"step": 4400
},
{
"epoch": 2.47,
"learning_rate": 1.5316244284167168e-05,
"loss": 1.1606,
"step": 4410
},
{
"epoch": 2.47,
"learning_rate": 1.5263542870024263e-05,
"loss": 1.0425,
"step": 4420
},
{
"epoch": 2.48,
"learning_rate": 1.5210838201446885e-05,
"loss": 0.9227,
"step": 4430
},
{
"epoch": 2.48,
"learning_rate": 1.5158130929273695e-05,
"loss": 1.2139,
"step": 4440
},
{
"epoch": 2.49,
"learning_rate": 1.5105421704375514e-05,
"loss": 1.0629,
"step": 4450
},
{
"epoch": 2.49,
"learning_rate": 1.505271117764728e-05,
"loss": 1.093,
"step": 4460
},
{
"epoch": 2.5,
"learning_rate": 1.5e-05,
"loss": 1.0341,
"step": 4470
},
{
"epoch": 2.5,
"learning_rate": 1.494728882235272e-05,
"loss": 0.9888,
"step": 4480
},
{
"epoch": 2.51,
"learning_rate": 1.4894578295624485e-05,
"loss": 1.0607,
"step": 4490
},
{
"epoch": 2.52,
"learning_rate": 1.4841869070726307e-05,
"loss": 1.1264,
"step": 4500
},
{
"epoch": 2.52,
"learning_rate": 1.4789161798553122e-05,
"loss": 1.075,
"step": 4510
},
{
"epoch": 2.53,
"learning_rate": 1.473645712997574e-05,
"loss": 0.9039,
"step": 4520
},
{
"epoch": 2.53,
"learning_rate": 1.4683755715832838e-05,
"loss": 0.9022,
"step": 4530
},
{
"epoch": 2.54,
"learning_rate": 1.4631058206922886e-05,
"loss": 1.0198,
"step": 4540
},
{
"epoch": 2.54,
"learning_rate": 1.457836525399615e-05,
"loss": 0.9282,
"step": 4550
},
{
"epoch": 2.55,
"learning_rate": 1.4525677507746615e-05,
"loss": 1.0936,
"step": 4560
},
{
"epoch": 2.55,
"learning_rate": 1.447299561880399e-05,
"loss": 1.1499,
"step": 4570
},
{
"epoch": 2.56,
"learning_rate": 1.4420320237725632e-05,
"loss": 0.9929,
"step": 4580
},
{
"epoch": 2.57,
"learning_rate": 1.4367652014988546e-05,
"loss": 1.0801,
"step": 4590
},
{
"epoch": 2.57,
"learning_rate": 1.4314991600981336e-05,
"loss": 1.1018,
"step": 4600
},
{
"epoch": 2.58,
"learning_rate": 1.4262339645996185e-05,
"loss": 1.1583,
"step": 4610
},
{
"epoch": 2.58,
"learning_rate": 1.4209696800220807e-05,
"loss": 1.1983,
"step": 4620
},
{
"epoch": 2.59,
"learning_rate": 1.4157063713730438e-05,
"loss": 1.0392,
"step": 4630
},
{
"epoch": 2.59,
"learning_rate": 1.4104441036479797e-05,
"loss": 0.9127,
"step": 4640
},
{
"epoch": 2.6,
"learning_rate": 1.4051829418295048e-05,
"loss": 1.0644,
"step": 4650
},
{
"epoch": 2.61,
"learning_rate": 1.399922950886581e-05,
"loss": 1.062,
"step": 4660
},
{
"epoch": 2.61,
"learning_rate": 1.3946641957737092e-05,
"loss": 1.0935,
"step": 4670
},
{
"epoch": 2.62,
"learning_rate": 1.3894067414301315e-05,
"loss": 1.0234,
"step": 4680
},
{
"epoch": 2.62,
"learning_rate": 1.3841506527790252e-05,
"loss": 1.1424,
"step": 4690
},
{
"epoch": 2.63,
"learning_rate": 1.3788959947267042e-05,
"loss": 1.0066,
"step": 4700
},
{
"epoch": 2.63,
"learning_rate": 1.3736428321618159e-05,
"loss": 1.0277,
"step": 4710
},
{
"epoch": 2.64,
"learning_rate": 1.3683912299545391e-05,
"loss": 1.058,
"step": 4720
},
{
"epoch": 2.64,
"learning_rate": 1.3631412529557857e-05,
"loss": 0.9107,
"step": 4730
},
{
"epoch": 2.65,
"learning_rate": 1.3578929659963977e-05,
"loss": 1.1074,
"step": 4740
},
{
"epoch": 2.66,
"learning_rate": 1.3526464338863467e-05,
"loss": 0.9071,
"step": 4750
},
{
"epoch": 2.66,
"learning_rate": 1.347401721413935e-05,
"loss": 1.1601,
"step": 4760
},
{
"epoch": 2.67,
"learning_rate": 1.342158893344994e-05,
"loss": 1.131,
"step": 4770
},
{
"epoch": 2.67,
"learning_rate": 1.336918014422084e-05,
"loss": 0.9435,
"step": 4780
},
{
"epoch": 2.68,
"learning_rate": 1.3316791493636981e-05,
"loss": 1.0465,
"step": 4790
},
{
"epoch": 2.68,
"learning_rate": 1.3264423628634583e-05,
"loss": 1.3012,
"step": 4800
},
{
"epoch": 2.69,
"learning_rate": 1.3212077195893198e-05,
"loss": 1.148,
"step": 4810
},
{
"epoch": 2.69,
"learning_rate": 1.3159752841827724e-05,
"loss": 1.1589,
"step": 4820
},
{
"epoch": 2.7,
"learning_rate": 1.3107451212580401e-05,
"loss": 0.8191,
"step": 4830
},
{
"epoch": 2.71,
"learning_rate": 1.3055172954012856e-05,
"loss": 1.0242,
"step": 4840
},
{
"epoch": 2.71,
"learning_rate": 1.3002918711698107e-05,
"loss": 1.0947,
"step": 4850
},
{
"epoch": 2.72,
"learning_rate": 1.2950689130912599e-05,
"loss": 0.9649,
"step": 4860
},
{
"epoch": 2.72,
"learning_rate": 1.2898484856628251e-05,
"loss": 1.1755,
"step": 4870
},
{
"epoch": 2.73,
"learning_rate": 1.2846306533504465e-05,
"loss": 1.2437,
"step": 4880
},
{
"epoch": 2.73,
"learning_rate": 1.2794154805880186e-05,
"loss": 0.9776,
"step": 4890
},
{
"epoch": 2.74,
"learning_rate": 1.2742030317765933e-05,
"loss": 1.0467,
"step": 4900
},
{
"epoch": 2.74,
"learning_rate": 1.2689933712835843e-05,
"loss": 1.2041,
"step": 4910
},
{
"epoch": 2.75,
"learning_rate": 1.2637865634419735e-05,
"loss": 1.1177,
"step": 4920
},
{
"epoch": 2.76,
"learning_rate": 1.258582672549517e-05,
"loss": 0.7324,
"step": 4930
},
{
"epoch": 2.76,
"learning_rate": 1.2533817628679485e-05,
"loss": 1.2101,
"step": 4940
},
{
"epoch": 2.77,
"learning_rate": 1.2481838986221887e-05,
"loss": 1.2195,
"step": 4950
},
{
"epoch": 2.77,
"learning_rate": 1.24298914399955e-05,
"loss": 0.9909,
"step": 4960
},
{
"epoch": 2.78,
"learning_rate": 1.2377975631489455e-05,
"loss": 0.9634,
"step": 4970
},
{
"epoch": 2.78,
"learning_rate": 1.2326092201800948e-05,
"loss": 0.9202,
"step": 4980
},
{
"epoch": 2.79,
"learning_rate": 1.2274241791627344e-05,
"loss": 0.9118,
"step": 4990
},
{
"epoch": 2.8,
"learning_rate": 1.2222425041258267e-05,
"loss": 1.128,
"step": 5000
},
{
"epoch": 2.8,
"learning_rate": 1.2170642590567659e-05,
"loss": 1.0011,
"step": 5010
},
{
"epoch": 2.81,
"learning_rate": 1.2118895079005929e-05,
"loss": 1.0674,
"step": 5020
},
{
"epoch": 2.81,
"learning_rate": 1.2067183145592016e-05,
"loss": 1.1944,
"step": 5030
},
{
"epoch": 2.82,
"learning_rate": 1.2015507428905509e-05,
"loss": 1.0382,
"step": 5040
},
{
"epoch": 2.82,
"learning_rate": 1.1963868567078766e-05,
"loss": 0.9095,
"step": 5050
},
{
"epoch": 2.83,
"learning_rate": 1.1912267197789047e-05,
"loss": 0.9054,
"step": 5060
},
{
"epoch": 2.83,
"learning_rate": 1.1860703958250604e-05,
"loss": 1.1068,
"step": 5070
},
{
"epoch": 2.84,
"learning_rate": 1.1809179485206847e-05,
"loss": 1.1493,
"step": 5080
},
{
"epoch": 2.85,
"learning_rate": 1.1757694414922458e-05,
"loss": 1.0363,
"step": 5090
},
{
"epoch": 2.85,
"learning_rate": 1.1706249383175558e-05,
"loss": 0.8985,
"step": 5100
},
{
"epoch": 2.86,
"learning_rate": 1.1654845025249815e-05,
"loss": 1.2631,
"step": 5110
},
{
"epoch": 2.86,
"learning_rate": 1.1603481975926643e-05,
"loss": 0.9992,
"step": 5120
},
{
"epoch": 2.87,
"learning_rate": 1.1552160869477348e-05,
"loss": 0.974,
"step": 5130
},
{
"epoch": 2.87,
"learning_rate": 1.1500882339655278e-05,
"loss": 1.0951,
"step": 5140
},
{
"epoch": 2.88,
"learning_rate": 1.144964701968802e-05,
"loss": 1.2146,
"step": 5150
},
{
"epoch": 2.88,
"learning_rate": 1.1398455542269578e-05,
"loss": 1.0782,
"step": 5160
},
{
"epoch": 2.89,
"learning_rate": 1.1347308539552538e-05,
"loss": 0.9803,
"step": 5170
},
{
"epoch": 2.9,
"learning_rate": 1.1296206643140284e-05,
"loss": 0.9446,
"step": 5180
},
{
"epoch": 2.9,
"learning_rate": 1.1245150484079204e-05,
"loss": 0.8893,
"step": 5190
},
{
"epoch": 2.91,
"learning_rate": 1.1194140692850863e-05,
"loss": 0.9835,
"step": 5200
},
{
"epoch": 2.91,
"learning_rate": 1.1143177899364261e-05,
"loss": 1.0186,
"step": 5210
},
{
"epoch": 2.92,
"learning_rate": 1.1092262732948017e-05,
"loss": 0.7689,
"step": 5220
},
{
"epoch": 2.92,
"learning_rate": 1.1041395822342632e-05,
"loss": 1.0269,
"step": 5230
},
{
"epoch": 2.93,
"learning_rate": 1.0990577795692676e-05,
"loss": 0.8962,
"step": 5240
},
{
"epoch": 2.94,
"learning_rate": 1.093980928053909e-05,
"loss": 1.0122,
"step": 5250
},
{
"epoch": 2.94,
"learning_rate": 1.0889090903811397e-05,
"loss": 1.0469,
"step": 5260
},
{
"epoch": 2.95,
"learning_rate": 1.0838423291819975e-05,
"loss": 0.9124,
"step": 5270
},
{
"epoch": 2.95,
"learning_rate": 1.0787807070248306e-05,
"loss": 0.7852,
"step": 5280
},
{
"epoch": 2.96,
"learning_rate": 1.0737242864145287e-05,
"loss": 0.9694,
"step": 5290
},
{
"epoch": 2.96,
"learning_rate": 1.068673129791747e-05,
"loss": 1.0638,
"step": 5300
},
{
"epoch": 2.97,
"learning_rate": 1.063627299532136e-05,
"loss": 1.0425,
"step": 5310
},
{
"epoch": 2.97,
"learning_rate": 1.0585868579455745e-05,
"loss": 1.1105,
"step": 5320
},
{
"epoch": 2.98,
"learning_rate": 1.0535518672753957e-05,
"loss": 1.0176,
"step": 5330
},
{
"epoch": 2.99,
"learning_rate": 1.0485223896976221e-05,
"loss": 0.9901,
"step": 5340
},
{
"epoch": 2.99,
"learning_rate": 1.043498487320195e-05,
"loss": 1.0419,
"step": 5350
},
{
"epoch": 3.0,
"learning_rate": 1.0384802221822098e-05,
"loss": 1.1056,
"step": 5360
},
{
"epoch": 3.0,
"learning_rate": 1.0334676562531469e-05,
"loss": 1.034,
"step": 5370
},
{
"epoch": 3.01,
"learning_rate": 1.028460851432111e-05,
"loss": 0.774,
"step": 5380
},
{
"epoch": 3.01,
"learning_rate": 1.0234598695470621e-05,
"loss": 0.6653,
"step": 5390
},
{
"epoch": 3.02,
"learning_rate": 1.0184647723540557e-05,
"loss": 0.8275,
"step": 5400
},
{
"epoch": 3.02,
"learning_rate": 1.0134756215364765e-05,
"loss": 0.8653,
"step": 5410
},
{
"epoch": 3.03,
"learning_rate": 1.0084924787042809e-05,
"loss": 0.8393,
"step": 5420
},
{
"epoch": 3.04,
"learning_rate": 1.0035154053932322e-05,
"loss": 0.9842,
"step": 5430
},
{
"epoch": 3.04,
"learning_rate": 9.985444630641426e-06,
"loss": 0.7353,
"step": 5440
},
{
"epoch": 3.05,
"learning_rate": 9.935797131021156e-06,
"loss": 0.8148,
"step": 5450
},
{
"epoch": 3.05,
"learning_rate": 9.886212168157848e-06,
"loss": 0.6374,
"step": 5460
},
{
"epoch": 3.06,
"learning_rate": 9.836690354365593e-06,
"loss": 0.7298,
"step": 5470
},
{
"epoch": 3.06,
"learning_rate": 9.787232301178669e-06,
"loss": 1.0081,
"step": 5480
},
{
"epoch": 3.07,
"learning_rate": 9.737838619343992e-06,
"loss": 0.7143,
"step": 5490
},
{
"epoch": 3.07,
"learning_rate": 9.688509918813547e-06,
"loss": 0.7084,
"step": 5500
},
{
"epoch": 3.08,
"learning_rate": 9.639246808736909e-06,
"loss": 0.7417,
"step": 5510
},
{
"epoch": 3.09,
"learning_rate": 9.590049897453668e-06,
"loss": 0.6705,
"step": 5520
},
{
"epoch": 3.09,
"learning_rate": 9.540919792485957e-06,
"loss": 0.6495,
"step": 5530
},
{
"epoch": 3.1,
"learning_rate": 9.491857100530919e-06,
"loss": 0.7477,
"step": 5540
},
{
"epoch": 3.1,
"learning_rate": 9.442862427453234e-06,
"loss": 1.0138,
"step": 5550
},
{
"epoch": 3.11,
"learning_rate": 9.393936378277632e-06,
"loss": 0.8123,
"step": 5560
},
{
"epoch": 3.11,
"learning_rate": 9.34507955718141e-06,
"loss": 0.8901,
"step": 5570
},
{
"epoch": 3.12,
"learning_rate": 9.296292567486997e-06,
"loss": 0.7812,
"step": 5580
},
{
"epoch": 3.13,
"learning_rate": 9.247576011654484e-06,
"loss": 0.6551,
"step": 5590
},
{
"epoch": 3.13,
"learning_rate": 9.198930491274188e-06,
"loss": 0.6861,
"step": 5600
},
{
"epoch": 3.14,
"learning_rate": 9.150356607059226e-06,
"loss": 0.9263,
"step": 5610
},
{
"epoch": 3.14,
"learning_rate": 9.10185495883809e-06,
"loss": 0.981,
"step": 5620
},
{
"epoch": 3.15,
"learning_rate": 9.053426145547259e-06,
"loss": 0.8589,
"step": 5630
},
{
"epoch": 3.15,
"learning_rate": 9.005070765223768e-06,
"loss": 0.6899,
"step": 5640
},
{
"epoch": 3.16,
"learning_rate": 8.95678941499786e-06,
"loss": 0.8632,
"step": 5650
},
{
"epoch": 3.16,
"learning_rate": 8.908582691085593e-06,
"loss": 0.7516,
"step": 5660
},
{
"epoch": 3.17,
"learning_rate": 8.860451188781479e-06,
"loss": 0.759,
"step": 5670
},
{
"epoch": 3.18,
"learning_rate": 8.812395502451139e-06,
"loss": 0.5798,
"step": 5680
},
{
"epoch": 3.18,
"learning_rate": 8.764416225523948e-06,
"loss": 0.9403,
"step": 5690
},
{
"epoch": 3.19,
"learning_rate": 8.716513950485725e-06,
"loss": 0.7402,
"step": 5700
},
{
"epoch": 3.19,
"learning_rate": 8.66868926887141e-06,
"loss": 0.6167,
"step": 5710
},
{
"epoch": 3.2,
"learning_rate": 8.620942771257755e-06,
"loss": 0.6868,
"step": 5720
},
{
"epoch": 3.2,
"learning_rate": 8.573275047256042e-06,
"loss": 0.8338,
"step": 5730
},
{
"epoch": 3.21,
"learning_rate": 8.525686685504781e-06,
"loss": 0.7377,
"step": 5740
},
{
"epoch": 3.21,
"learning_rate": 8.478178273662465e-06,
"loss": 0.585,
"step": 5750
},
{
"epoch": 3.22,
"learning_rate": 8.430750398400309e-06,
"loss": 0.732,
"step": 5760
},
{
"epoch": 3.23,
"learning_rate": 8.383403645394975e-06,
"loss": 0.8296,
"step": 5770
},
{
"epoch": 3.23,
"learning_rate": 8.336138599321391e-06,
"loss": 0.6214,
"step": 5780
},
{
"epoch": 3.24,
"learning_rate": 8.288955843845484e-06,
"loss": 0.6419,
"step": 5790
},
{
"epoch": 3.24,
"learning_rate": 8.241855961617e-06,
"loss": 0.7092,
"step": 5800
},
{
"epoch": 3.25,
"learning_rate": 8.194839534262308e-06,
"loss": 0.7304,
"step": 5810
},
{
"epoch": 3.25,
"learning_rate": 8.147907142377198e-06,
"loss": 0.6233,
"step": 5820
},
{
"epoch": 3.26,
"learning_rate": 8.10105936551972e-06,
"loss": 0.8835,
"step": 5830
},
{
"epoch": 3.26,
"learning_rate": 8.054296782203052e-06,
"loss": 0.8074,
"step": 5840
},
{
"epoch": 3.27,
"learning_rate": 8.007619969888318e-06,
"loss": 0.8599,
"step": 5850
},
{
"epoch": 3.28,
"learning_rate": 7.961029504977487e-06,
"loss": 0.7917,
"step": 5860
},
{
"epoch": 3.28,
"learning_rate": 7.914525962806226e-06,
"loss": 0.6366,
"step": 5870
},
{
"epoch": 3.29,
"learning_rate": 7.868109917636823e-06,
"loss": 0.5526,
"step": 5880
},
{
"epoch": 3.29,
"learning_rate": 7.821781942651086e-06,
"loss": 0.7957,
"step": 5890
},
{
"epoch": 3.3,
"learning_rate": 7.77554260994326e-06,
"loss": 0.7298,
"step": 5900
},
{
"epoch": 3.3,
"learning_rate": 7.729392490512952e-06,
"loss": 0.5928,
"step": 5910
},
{
"epoch": 3.31,
"learning_rate": 7.683332154258117e-06,
"loss": 0.9263,
"step": 5920
},
{
"epoch": 3.32,
"learning_rate": 7.637362169967983e-06,
"loss": 0.9067,
"step": 5930
},
{
"epoch": 3.32,
"learning_rate": 7.591483105316048e-06,
"loss": 0.7889,
"step": 5940
},
{
"epoch": 3.33,
"learning_rate": 7.545695526853057e-06,
"loss": 0.8889,
"step": 5950
},
{
"epoch": 3.33,
"learning_rate": 7.500000000000004e-06,
"loss": 0.7118,
"step": 5960
},
{
"epoch": 3.34,
"learning_rate": 7.454397089041173e-06,
"loss": 0.7987,
"step": 5970
},
{
"epoch": 3.34,
"learning_rate": 7.408887357117147e-06,
"loss": 0.8916,
"step": 5980
},
{
"epoch": 3.35,
"learning_rate": 7.36347136621787e-06,
"loss": 0.6258,
"step": 5990
},
{
"epoch": 3.35,
"learning_rate": 7.3181496771756754e-06,
"loss": 0.7727,
"step": 6000
},
{
"epoch": 3.36,
"learning_rate": 7.272922849658405e-06,
"loss": 0.881,
"step": 6010
},
{
"epoch": 3.37,
"learning_rate": 7.227791442162475e-06,
"loss": 0.8175,
"step": 6020
},
{
"epoch": 3.37,
"learning_rate": 7.182756012005967e-06,
"loss": 0.6031,
"step": 6030
},
{
"epoch": 3.38,
"learning_rate": 7.137817115321759e-06,
"loss": 0.8563,
"step": 6040
},
{
"epoch": 3.38,
"learning_rate": 7.092975307050673e-06,
"loss": 0.7655,
"step": 6050
},
{
"epoch": 3.39,
"learning_rate": 7.048231140934595e-06,
"loss": 0.7223,
"step": 6060
},
{
"epoch": 3.39,
"learning_rate": 7.0035851695096615e-06,
"loss": 0.6133,
"step": 6070
},
{
"epoch": 3.4,
"learning_rate": 6.959037944099402e-06,
"loss": 0.6826,
"step": 6080
},
{
"epoch": 3.4,
"learning_rate": 6.914590014807977e-06,
"loss": 0.6777,
"step": 6090
},
{
"epoch": 3.41,
"learning_rate": 6.870241930513338e-06,
"loss": 0.7286,
"step": 6100
},
{
"epoch": 3.42,
"learning_rate": 6.825994238860491e-06,
"loss": 0.6593,
"step": 6110
},
{
"epoch": 3.42,
"learning_rate": 6.781847486254698e-06,
"loss": 0.5628,
"step": 6120
},
{
"epoch": 3.43,
"learning_rate": 6.737802217854763e-06,
"loss": 0.7376,
"step": 6130
},
{
"epoch": 3.43,
"learning_rate": 6.6938589775662705e-06,
"loss": 0.7518,
"step": 6140
},
{
"epoch": 3.44,
"learning_rate": 6.650018308034893e-06,
"loss": 0.7183,
"step": 6150
},
{
"epoch": 3.44,
"learning_rate": 6.6062807506396696e-06,
"loss": 0.682,
"step": 6160
},
{
"epoch": 3.45,
"learning_rate": 6.562646845486326e-06,
"loss": 0.8939,
"step": 6170
},
{
"epoch": 3.45,
"learning_rate": 6.519117131400617e-06,
"loss": 0.8305,
"step": 6180
},
{
"epoch": 3.46,
"learning_rate": 6.475692145921664e-06,
"loss": 0.6763,
"step": 6190
},
{
"epoch": 3.47,
"learning_rate": 6.432372425295321e-06,
"loss": 0.8523,
"step": 6200
},
{
"epoch": 3.47,
"learning_rate": 6.389158504467531e-06,
"loss": 0.7089,
"step": 6210
},
{
"epoch": 3.48,
"learning_rate": 6.346050917077762e-06,
"loss": 0.682,
"step": 6220
},
{
"epoch": 3.48,
"learning_rate": 6.30305019545237e-06,
"loss": 0.9184,
"step": 6230
},
{
"epoch": 3.49,
"learning_rate": 6.260156870598072e-06,
"loss": 0.9937,
"step": 6240
},
{
"epoch": 3.49,
"learning_rate": 6.217371472195339e-06,
"loss": 0.834,
"step": 6250
},
{
"epoch": 3.5,
"learning_rate": 6.174694528591902e-06,
"loss": 0.7776,
"step": 6260
},
{
"epoch": 3.51,
"learning_rate": 6.132126566796203e-06,
"loss": 0.8525,
"step": 6270
},
{
"epoch": 3.51,
"learning_rate": 6.089668112470892e-06,
"loss": 0.7621,
"step": 6280
},
{
"epoch": 3.52,
"learning_rate": 6.047319689926323e-06,
"loss": 0.754,
"step": 6290
},
{
"epoch": 3.52,
"learning_rate": 6.005081822114096e-06,
"loss": 0.5099,
"step": 6300
},
{
"epoch": 3.53,
"learning_rate": 5.9629550306206025e-06,
"loss": 0.8952,
"step": 6310
},
{
"epoch": 3.53,
"learning_rate": 5.920939835660572e-06,
"loss": 0.6263,
"step": 6320
},
{
"epoch": 3.54,
"learning_rate": 5.879036756070654e-06,
"loss": 0.7957,
"step": 6330
},
{
"epoch": 3.54,
"learning_rate": 5.8372463093030006e-06,
"loss": 0.7933,
"step": 6340
},
{
"epoch": 3.55,
"learning_rate": 5.795569011418897e-06,
"loss": 0.7915,
"step": 6350
},
{
"epoch": 3.56,
"learning_rate": 5.754005377082365e-06,
"loss": 0.5351,
"step": 6360
},
{
"epoch": 3.56,
"learning_rate": 5.712555919553834e-06,
"loss": 0.8146,
"step": 6370
},
{
"epoch": 3.57,
"learning_rate": 5.671221150683772e-06,
"loss": 0.7158,
"step": 6380
},
{
"epoch": 3.57,
"learning_rate": 5.6300015809063935e-06,
"loss": 0.7009,
"step": 6390
},
{
"epoch": 3.58,
"learning_rate": 5.588897719233341e-06,
"loss": 0.8588,
"step": 6400
},
{
"epoch": 3.58,
"learning_rate": 5.547910073247408e-06,
"loss": 0.7597,
"step": 6410
},
{
"epoch": 3.59,
"learning_rate": 5.507039149096251e-06,
"loss": 0.7043,
"step": 6420
},
{
"epoch": 3.59,
"learning_rate": 5.466285451486161e-06,
"loss": 0.7282,
"step": 6430
},
{
"epoch": 3.6,
"learning_rate": 5.425649483675827e-06,
"loss": 0.7333,
"step": 6440
},
{
"epoch": 3.61,
"learning_rate": 5.385131747470118e-06,
"loss": 0.7429,
"step": 6450
},
{
"epoch": 3.61,
"learning_rate": 5.3447327432138755e-06,
"loss": 0.7519,
"step": 6460
},
{
"epoch": 3.62,
"learning_rate": 5.304452969785761e-06,
"loss": 0.6528,
"step": 6470
},
{
"epoch": 3.62,
"learning_rate": 5.264292924592074e-06,
"loss": 0.6022,
"step": 6480
},
{
"epoch": 3.63,
"learning_rate": 5.2242531035606044e-06,
"loss": 0.7947,
"step": 6490
},
{
"epoch": 3.63,
"learning_rate": 5.184334001134542e-06,
"loss": 0.7925,
"step": 6500
},
{
"epoch": 3.64,
"learning_rate": 5.144536110266324e-06,
"loss": 0.6298,
"step": 6510
},
{
"epoch": 3.65,
"learning_rate": 5.104859922411587e-06,
"loss": 0.8203,
"step": 6520
},
{
"epoch": 3.65,
"learning_rate": 5.065305927523083e-06,
"loss": 0.7585,
"step": 6530
},
{
"epoch": 3.66,
"learning_rate": 5.025874614044631e-06,
"loss": 0.681,
"step": 6540
},
{
"epoch": 3.66,
"learning_rate": 4.986566468905072e-06,
"loss": 0.8582,
"step": 6550
},
{
"epoch": 3.67,
"learning_rate": 4.9473819775122716e-06,
"loss": 0.873,
"step": 6560
},
{
"epoch": 3.67,
"learning_rate": 4.908321623747127e-06,
"loss": 0.741,
"step": 6570
},
{
"epoch": 3.68,
"learning_rate": 4.86938588995759e-06,
"loss": 0.708,
"step": 6580
},
{
"epoch": 3.68,
"learning_rate": 4.830575256952693e-06,
"loss": 0.8091,
"step": 6590
},
{
"epoch": 3.69,
"learning_rate": 4.791890203996634e-06,
"loss": 0.8071,
"step": 6600
},
{
"epoch": 3.7,
"learning_rate": 4.753331208802857e-06,
"loss": 0.8301,
"step": 6610
},
{
"epoch": 3.7,
"learning_rate": 4.714898747528129e-06,
"loss": 0.8096,
"step": 6620
},
{
"epoch": 3.71,
"learning_rate": 4.676593294766693e-06,
"loss": 0.4324,
"step": 6630
},
{
"epoch": 3.71,
"learning_rate": 4.638415323544376e-06,
"loss": 0.8962,
"step": 6640
},
{
"epoch": 3.72,
"learning_rate": 4.600365305312776e-06,
"loss": 0.7619,
"step": 6650
},
{
"epoch": 3.72,
"learning_rate": 4.562443709943424e-06,
"loss": 0.6719,
"step": 6660
},
{
"epoch": 3.73,
"learning_rate": 4.5246510057219746e-06,
"loss": 0.685,
"step": 6670
},
{
"epoch": 3.73,
"learning_rate": 4.4869876593424484e-06,
"loss": 1.1371,
"step": 6680
},
{
"epoch": 3.74,
"learning_rate": 4.4494541359014375e-06,
"loss": 0.889,
"step": 6690
},
{
"epoch": 3.75,
"learning_rate": 4.412050898892393e-06,
"loss": 0.609,
"step": 6700
},
{
"epoch": 3.75,
"learning_rate": 4.374778410199882e-06,
"loss": 0.7881,
"step": 6710
},
{
"epoch": 3.76,
"learning_rate": 4.337637130093879e-06,
"loss": 0.8446,
"step": 6720
},
{
"epoch": 3.76,
"learning_rate": 4.300627517224106e-06,
"loss": 0.7776,
"step": 6730
},
{
"epoch": 3.77,
"learning_rate": 4.263750028614352e-06,
"loss": 0.8431,
"step": 6740
},
{
"epoch": 3.77,
"learning_rate": 4.227005119656819e-06,
"loss": 0.5648,
"step": 6750
},
{
"epoch": 3.78,
"learning_rate": 4.1903932441065315e-06,
"loss": 0.7488,
"step": 6760
},
{
"epoch": 3.78,
"learning_rate": 4.153914854075689e-06,
"loss": 0.7767,
"step": 6770
},
{
"epoch": 3.79,
"learning_rate": 4.117570400028129e-06,
"loss": 0.9048,
"step": 6780
},
{
"epoch": 3.8,
"learning_rate": 4.081360330773733e-06,
"loss": 0.5816,
"step": 6790
},
{
"epoch": 3.8,
"learning_rate": 4.045285093462887e-06,
"loss": 0.6289,
"step": 6800
},
{
"epoch": 3.81,
"learning_rate": 4.009345133580981e-06,
"loss": 0.8942,
"step": 6810
},
{
"epoch": 3.81,
"learning_rate": 3.973540894942876e-06,
"loss": 0.8111,
"step": 6820
},
{
"epoch": 3.82,
"learning_rate": 3.937872819687455e-06,
"loss": 0.7009,
"step": 6830
},
{
"epoch": 3.82,
"learning_rate": 3.902341348272144e-06,
"loss": 0.7661,
"step": 6840
},
{
"epoch": 3.83,
"learning_rate": 3.86694691946747e-06,
"loss": 0.8373,
"step": 6850
},
{
"epoch": 3.84,
"learning_rate": 3.831689970351659e-06,
"loss": 0.7097,
"step": 6860
},
{
"epoch": 3.84,
"learning_rate": 3.7965709363052332e-06,
"loss": 0.7518,
"step": 6870
},
{
"epoch": 3.85,
"learning_rate": 3.7615902510056175e-06,
"loss": 0.5933,
"step": 6880
},
{
"epoch": 3.85,
"learning_rate": 3.7267483464218034e-06,
"loss": 0.6463,
"step": 6890
},
{
"epoch": 3.86,
"learning_rate": 3.6920456528090147e-06,
"loss": 0.8285,
"step": 6900
},
{
"epoch": 3.86,
"learning_rate": 3.657482598703385e-06,
"loss": 0.7768,
"step": 6910
},
{
"epoch": 3.87,
"learning_rate": 3.6230596109166738e-06,
"loss": 0.8577,
"step": 6920
},
{
"epoch": 3.87,
"learning_rate": 3.5887771145309766e-06,
"loss": 0.8913,
"step": 6930
},
{
"epoch": 3.88,
"learning_rate": 3.5546355328935113e-06,
"loss": 0.8783,
"step": 6940
},
{
"epoch": 3.89,
"learning_rate": 3.5206352876113507e-06,
"loss": 0.8534,
"step": 6950
},
{
"epoch": 3.89,
"learning_rate": 3.4867767985462513e-06,
"loss": 0.4958,
"step": 6960
},
{
"epoch": 3.9,
"learning_rate": 3.4530604838094454e-06,
"loss": 0.5908,
"step": 6970
},
{
"epoch": 3.9,
"learning_rate": 3.4194867597564783e-06,
"loss": 0.604,
"step": 6980
},
{
"epoch": 3.91,
"learning_rate": 3.3860560409820833e-06,
"loss": 0.7594,
"step": 6990
},
{
"epoch": 3.91,
"learning_rate": 3.352768740315055e-06,
"loss": 0.8048,
"step": 7000
},
{
"epoch": 3.92,
"learning_rate": 3.3196252688131386e-06,
"loss": 0.6781,
"step": 7010
},
{
"epoch": 3.92,
"learning_rate": 3.28662603575796e-06,
"loss": 0.7134,
"step": 7020
},
{
"epoch": 3.93,
"learning_rate": 3.2537714486499892e-06,
"loss": 0.7228,
"step": 7030
},
{
"epoch": 3.94,
"learning_rate": 3.2210619132034893e-06,
"loss": 0.6406,
"step": 7040
},
{
"epoch": 3.94,
"learning_rate": 3.188497833341513e-06,
"loss": 0.7482,
"step": 7050
},
{
"epoch": 3.95,
"learning_rate": 3.1560796111909017e-06,
"loss": 0.8413,
"step": 7060
},
{
"epoch": 3.95,
"learning_rate": 3.1238076470773493e-06,
"loss": 0.8051,
"step": 7070
},
{
"epoch": 3.96,
"learning_rate": 3.0916823395204204e-06,
"loss": 0.8283,
"step": 7080
},
{
"epoch": 3.96,
"learning_rate": 3.0597040852286714e-06,
"loss": 0.6651,
"step": 7090
},
{
"epoch": 3.97,
"learning_rate": 3.027873279094709e-06,
"loss": 1.0971,
"step": 7100
},
{
"epoch": 3.97,
"learning_rate": 2.9961903141903453e-06,
"loss": 0.7016,
"step": 7110
},
{
"epoch": 3.98,
"learning_rate": 2.964655581761732e-06,
"loss": 0.7017,
"step": 7120
},
{
"epoch": 3.99,
"learning_rate": 2.9332694712245313e-06,
"loss": 0.7302,
"step": 7130
},
{
"epoch": 3.99,
"learning_rate": 2.9020323701590984e-06,
"loss": 0.6876,
"step": 7140
},
{
"epoch": 4.0,
"learning_rate": 2.8709446643057036e-06,
"loss": 0.7673,
"step": 7150
},
{
"epoch": 4.0,
"learning_rate": 2.8400067375597737e-06,
"loss": 0.6402,
"step": 7160
},
{
"epoch": 4.01,
"learning_rate": 2.8092189719671423e-06,
"loss": 0.4801,
"step": 7170
},
{
"epoch": 4.01,
"learning_rate": 2.7785817477193393e-06,
"loss": 0.621,
"step": 7180
},
{
"epoch": 4.02,
"learning_rate": 2.7480954431488763e-06,
"loss": 0.6656,
"step": 7190
},
{
"epoch": 4.03,
"learning_rate": 2.7177604347246133e-06,
"loss": 0.4288,
"step": 7200
},
{
"epoch": 4.03,
"learning_rate": 2.6875770970470655e-06,
"loss": 0.4964,
"step": 7210
},
{
"epoch": 4.04,
"learning_rate": 2.657545802843815e-06,
"loss": 0.6245,
"step": 7220
},
{
"epoch": 4.04,
"learning_rate": 2.627666922964874e-06,
"loss": 0.5853,
"step": 7230
},
{
"epoch": 4.05,
"learning_rate": 2.597940826378141e-06,
"loss": 0.5975,
"step": 7240
},
{
"epoch": 4.05,
"learning_rate": 2.5683678801648163e-06,
"loss": 0.8613,
"step": 7250
},
{
"epoch": 4.06,
"learning_rate": 2.5389484495148845e-06,
"loss": 0.4891,
"step": 7260
},
{
"epoch": 4.06,
"learning_rate": 2.5096828977225924e-06,
"loss": 0.6346,
"step": 7270
},
{
"epoch": 4.07,
"learning_rate": 2.480571586181969e-06,
"loss": 0.7335,
"step": 7280
},
{
"epoch": 4.08,
"learning_rate": 2.4516148743823713e-06,
"loss": 0.6687,
"step": 7290
},
{
"epoch": 4.08,
"learning_rate": 2.422813119904035e-06,
"loss": 0.5421,
"step": 7300
},
{
"epoch": 4.09,
"learning_rate": 2.3941666784136624e-06,
"loss": 0.7315,
"step": 7310
},
{
"epoch": 4.09,
"learning_rate": 2.365675903660019e-06,
"loss": 0.6212,
"step": 7320
},
{
"epoch": 4.1,
"learning_rate": 2.3373411474695895e-06,
"loss": 0.7525,
"step": 7330
},
{
"epoch": 4.1,
"learning_rate": 2.309162759742203e-06,
"loss": 0.7307,
"step": 7340
},
{
"epoch": 4.11,
"learning_rate": 2.281141088446746e-06,
"loss": 0.6237,
"step": 7350
},
{
"epoch": 4.11,
"learning_rate": 2.2532764796168293e-06,
"loss": 0.7105,
"step": 7360
},
{
"epoch": 4.12,
"learning_rate": 2.2255692773465442e-06,
"loss": 0.7834,
"step": 7370
},
{
"epoch": 4.13,
"learning_rate": 2.1980198237862003e-06,
"loss": 0.4241,
"step": 7380
},
{
"epoch": 4.13,
"learning_rate": 2.1706284591381058e-06,
"loss": 0.5714,
"step": 7390
},
{
"epoch": 4.14,
"learning_rate": 2.143395521652352e-06,
"loss": 0.4715,
"step": 7400
},
{
"epoch": 4.14,
"learning_rate": 2.1163213476226535e-06,
"loss": 0.5136,
"step": 7410
},
{
"epoch": 4.15,
"learning_rate": 2.0894062713821875e-06,
"loss": 0.5505,
"step": 7420
},
{
"epoch": 4.15,
"learning_rate": 2.062650625299474e-06,
"loss": 0.5112,
"step": 7430
},
{
"epoch": 4.16,
"learning_rate": 2.036054739774252e-06,
"loss": 0.4989,
"step": 7440
},
{
"epoch": 4.16,
"learning_rate": 2.0096189432334194e-06,
"loss": 0.788,
"step": 7450
},
{
"epoch": 4.17,
"learning_rate": 1.983343562126969e-06,
"loss": 0.6772,
"step": 7460
},
{
"epoch": 4.18,
"learning_rate": 1.957228920923958e-06,
"loss": 0.5829,
"step": 7470
},
{
"epoch": 4.18,
"learning_rate": 1.931275342108497e-06,
"loss": 0.5513,
"step": 7480
},
{
"epoch": 4.19,
"learning_rate": 1.9054831461757704e-06,
"loss": 0.5969,
"step": 7490
},
{
"epoch": 4.19,
"learning_rate": 1.8798526516280873e-06,
"loss": 0.6546,
"step": 7500
},
{
"epoch": 4.2,
"learning_rate": 1.8543841749709377e-06,
"loss": 0.7873,
"step": 7510
},
{
"epoch": 4.2,
"learning_rate": 1.8290780307090893e-06,
"loss": 0.6751,
"step": 7520
},
{
"epoch": 4.21,
"learning_rate": 1.8039345313426958e-06,
"loss": 0.7515,
"step": 7530
},
{
"epoch": 4.22,
"learning_rate": 1.7789539873634465e-06,
"loss": 0.7192,
"step": 7540
},
{
"epoch": 4.22,
"learning_rate": 1.7541367072507314e-06,
"loss": 0.6749,
"step": 7550
},
{
"epoch": 4.23,
"learning_rate": 1.7294829974678338e-06,
"loss": 0.6236,
"step": 7560
},
{
"epoch": 4.23,
"learning_rate": 1.7049931624581294e-06,
"loss": 0.6538,
"step": 7570
},
{
"epoch": 4.24,
"learning_rate": 1.6806675046413523e-06,
"loss": 0.8152,
"step": 7580
},
{
"epoch": 4.24,
"learning_rate": 1.6565063244098416e-06,
"loss": 0.7328,
"step": 7590
},
{
"epoch": 4.25,
"learning_rate": 1.6325099201248434e-06,
"loss": 0.4668,
"step": 7600
},
{
"epoch": 4.25,
"learning_rate": 1.6086785881128125e-06,
"loss": 0.731,
"step": 7610
},
{
"epoch": 4.26,
"learning_rate": 1.5850126226617611e-06,
"loss": 0.759,
"step": 7620
},
{
"epoch": 4.27,
"learning_rate": 1.5615123160176354e-06,
"loss": 0.6618,
"step": 7630
},
{
"epoch": 4.27,
"learning_rate": 1.538177958380692e-06,
"loss": 0.5365,
"step": 7640
},
{
"epoch": 4.28,
"learning_rate": 1.5150098379019113e-06,
"loss": 0.7881,
"step": 7650
},
{
"epoch": 4.28,
"learning_rate": 1.4920082406794577e-06,
"loss": 0.5379,
"step": 7660
},
{
"epoch": 4.29,
"learning_rate": 1.4691734507551285e-06,
"loss": 0.5792,
"step": 7670
},
{
"epoch": 4.29,
"learning_rate": 1.4465057501108547e-06,
"loss": 0.6737,
"step": 7680
},
{
"epoch": 4.3,
"learning_rate": 1.4240054186652214e-06,
"loss": 0.6142,
"step": 7690
},
{
"epoch": 4.3,
"learning_rate": 1.4016727342700013e-06,
"loss": 0.5541,
"step": 7700
},
{
"epoch": 4.31,
"learning_rate": 1.3795079727067327e-06,
"loss": 0.5935,
"step": 7710
},
{
"epoch": 4.32,
"learning_rate": 1.35751140768331e-06,
"loss": 0.6752,
"step": 7720
},
{
"epoch": 4.32,
"learning_rate": 1.335683310830611e-06,
"loss": 0.6772,
"step": 7730
},
{
"epoch": 4.33,
"learning_rate": 1.314023951699126e-06,
"loss": 0.5887,
"step": 7740
},
{
"epoch": 4.33,
"learning_rate": 1.2925335977556401e-06,
"loss": 0.5533,
"step": 7750
},
{
"epoch": 4.34,
"learning_rate": 1.2712125143799352e-06,
"loss": 0.4973,
"step": 7760
},
{
"epoch": 4.34,
"learning_rate": 1.2500609648615098e-06,
"loss": 0.6422,
"step": 7770
},
{
"epoch": 4.35,
"learning_rate": 1.2290792103963134e-06,
"loss": 0.6261,
"step": 7780
},
{
"epoch": 4.35,
"learning_rate": 1.208267510083545e-06,
"loss": 0.5058,
"step": 7790
},
{
"epoch": 4.36,
"learning_rate": 1.1876261209224315e-06,
"loss": 0.5909,
"step": 7800
},
{
"epoch": 4.37,
"learning_rate": 1.1671552978090706e-06,
"loss": 0.5333,
"step": 7810
},
{
"epoch": 4.37,
"learning_rate": 1.146855293533276e-06,
"loss": 0.502,
"step": 7820
},
{
"epoch": 4.38,
"learning_rate": 1.1267263587754494e-06,
"loss": 0.6833,
"step": 7830
},
{
"epoch": 4.38,
"learning_rate": 1.1067687421034988e-06,
"loss": 0.8602,
"step": 7840
},
{
"epoch": 4.39,
"learning_rate": 1.086982689969761e-06,
"loss": 0.56,
"step": 7850
},
{
"epoch": 4.39,
"learning_rate": 1.0673684467079593e-06,
"loss": 0.5161,
"step": 7860
},
{
"epoch": 4.4,
"learning_rate": 1.047926254530177e-06,
"loss": 0.7071,
"step": 7870
},
{
"epoch": 4.41,
"learning_rate": 1.028656353523888e-06,
"loss": 0.625,
"step": 7880
},
{
"epoch": 4.41,
"learning_rate": 1.009558981648972e-06,
"loss": 0.371,
"step": 7890
},
{
"epoch": 4.42,
"learning_rate": 9.906343747347884e-07,
"loss": 0.5811,
"step": 7900
},
{
"epoch": 4.42,
"learning_rate": 9.718827664772534e-07,
"loss": 0.626,
"step": 7910
},
{
"epoch": 4.43,
"learning_rate": 9.533043884359616e-07,
"loss": 0.4953,
"step": 7920
},
{
"epoch": 4.43,
"learning_rate": 9.348994700313307e-07,
"loss": 0.7068,
"step": 7930
},
{
"epoch": 4.44,
"learning_rate": 9.166682385417518e-07,
"loss": 0.6365,
"step": 7940
},
{
"epoch": 4.44,
"learning_rate": 8.986109191008035e-07,
"loss": 0.6267,
"step": 7950
},
{
"epoch": 4.45,
"learning_rate": 8.807277346944536e-07,
"loss": 0.5179,
"step": 7960
},
{
"epoch": 4.46,
"learning_rate": 8.630189061583204e-07,
"loss": 0.7016,
"step": 7970
},
{
"epoch": 4.46,
"learning_rate": 8.454846521749359e-07,
"loss": 0.4857,
"step": 7980
},
{
"epoch": 4.47,
"learning_rate": 8.281251892710484e-07,
"loss": 0.5935,
"step": 7990
},
{
"epoch": 4.47,
"learning_rate": 8.109407318149458e-07,
"loss": 0.6689,
"step": 8000
},
{
"epoch": 4.48,
"learning_rate": 7.939314920138152e-07,
"loss": 0.5584,
"step": 8010
},
{
"epoch": 4.48,
"learning_rate": 7.770976799111173e-07,
"loss": 0.803,
"step": 8020
},
{
"epoch": 4.49,
"learning_rate": 7.604395033839978e-07,
"loss": 0.5971,
"step": 8030
},
{
"epoch": 4.49,
"learning_rate": 7.439571681407054e-07,
"loss": 0.5327,
"step": 8040
},
{
"epoch": 4.5,
"learning_rate": 7.276508777180723e-07,
"loss": 0.5802,
"step": 8050
},
{
"epoch": 4.51,
"learning_rate": 7.115208334789902e-07,
"loss": 0.5295,
"step": 8060
},
{
"epoch": 4.51,
"learning_rate": 6.955672346099179e-07,
"loss": 0.5187,
"step": 8070
},
{
"epoch": 4.52,
"learning_rate": 6.797902781184323e-07,
"loss": 0.583,
"step": 8080
},
{
"epoch": 4.52,
"learning_rate": 6.64190158830792e-07,
"loss": 0.6176,
"step": 8090
},
{
"epoch": 4.53,
"learning_rate": 6.487670693895303e-07,
"loss": 0.6096,
"step": 8100
},
{
"epoch": 4.53,
"learning_rate": 6.335212002510782e-07,
"loss": 0.6362,
"step": 8110
},
{
"epoch": 4.54,
"learning_rate": 6.184527396834083e-07,
"loss": 0.6799,
"step": 8120
},
{
"epoch": 4.55,
"learning_rate": 6.035618737637111e-07,
"loss": 0.645,
"step": 8130
},
{
"epoch": 4.55,
"learning_rate": 5.888487863761044e-07,
"loss": 0.7412,
"step": 8140
},
{
"epoch": 4.56,
"learning_rate": 5.74313659209354e-07,
"loss": 0.6077,
"step": 8150
},
{
"epoch": 4.56,
"learning_rate": 5.599566717546351e-07,
"loss": 0.5758,
"step": 8160
},
{
"epoch": 4.57,
"learning_rate": 5.457780013033092e-07,
"loss": 0.6381,
"step": 8170
},
{
"epoch": 4.57,
"learning_rate": 5.317778229447412e-07,
"loss": 0.5576,
"step": 8180
},
{
"epoch": 4.58,
"learning_rate": 5.179563095641404e-07,
"loss": 0.4126,
"step": 8190
},
{
"epoch": 4.58,
"learning_rate": 5.043136318404129e-07,
"loss": 0.6127,
"step": 8200
},
{
"epoch": 4.59,
"learning_rate": 4.908499582440645e-07,
"loss": 0.5565,
"step": 8210
},
{
"epoch": 4.6,
"learning_rate": 4.775654550351194e-07,
"loss": 0.6685,
"step": 8220
},
{
"epoch": 4.6,
"learning_rate": 4.6446028626106143e-07,
"loss": 0.5998,
"step": 8230
},
{
"epoch": 4.61,
"learning_rate": 4.515346137548193e-07,
"loss": 0.4885,
"step": 8240
},
{
"epoch": 4.61,
"learning_rate": 4.387885971327499e-07,
"loss": 0.7275,
"step": 8250
},
{
"epoch": 4.62,
"learning_rate": 4.2622239379268457e-07,
"loss": 0.6938,
"step": 8260
},
{
"epoch": 4.62,
"learning_rate": 4.1383615891197765e-07,
"loss": 0.7283,
"step": 8270
},
{
"epoch": 4.63,
"learning_rate": 4.016300454455946e-07,
"loss": 0.6842,
"step": 8280
},
{
"epoch": 4.63,
"learning_rate": 3.8960420412421484e-07,
"loss": 0.4978,
"step": 8290
},
{
"epoch": 4.64,
"learning_rate": 3.777587834523788e-07,
"loss": 0.5604,
"step": 8300
},
{
"epoch": 4.65,
"learning_rate": 3.66093929706649e-07,
"loss": 0.6231,
"step": 8310
},
{
"epoch": 4.65,
"learning_rate": 3.546097869338083e-07,
"loss": 0.5676,
"step": 8320
},
{
"epoch": 4.66,
"learning_rate": 3.433064969490779e-07,
"loss": 0.5001,
"step": 8330
},
{
"epoch": 4.66,
"learning_rate": 3.3218419933436217e-07,
"loss": 0.5672,
"step": 8340
},
{
"epoch": 4.67,
"learning_rate": 3.21243031436535e-07,
"loss": 0.4714,
"step": 8350
},
{
"epoch": 4.67,
"learning_rate": 3.104831283657378e-07,
"loss": 0.5244,
"step": 8360
},
{
"epoch": 4.68,
"learning_rate": 2.9990462299370747e-07,
"loss": 0.4943,
"step": 8370
},
{
"epoch": 4.68,
"learning_rate": 2.8950764595214284e-07,
"loss": 0.6879,
"step": 8380
},
{
"epoch": 4.69,
"learning_rate": 2.792923256310892e-07,
"loss": 0.5334,
"step": 8390
},
{
"epoch": 4.7,
"learning_rate": 2.6925878817734786e-07,
"loss": 0.4144,
"step": 8400
},
{
"epoch": 4.7,
"learning_rate": 2.594071574929241e-07,
"loss": 0.5978,
"step": 8410
},
{
"epoch": 4.71,
"learning_rate": 2.4973755523349684e-07,
"loss": 0.6409,
"step": 8420
},
{
"epoch": 4.71,
"learning_rate": 2.4025010080691465e-07,
"loss": 0.4919,
"step": 8430
},
{
"epoch": 4.72,
"learning_rate": 2.3094491137172046e-07,
"loss": 0.5487,
"step": 8440
},
{
"epoch": 4.72,
"learning_rate": 2.218221018357075e-07,
"loss": 0.5602,
"step": 8450
},
{
"epoch": 4.73,
"learning_rate": 2.128817848544956e-07,
"loss": 0.5135,
"step": 8460
},
{
"epoch": 4.74,
"learning_rate": 2.0412407083014562e-07,
"loss": 0.7505,
"step": 8470
},
{
"epoch": 4.74,
"learning_rate": 1.955490679097921e-07,
"loss": 0.5345,
"step": 8480
},
{
"epoch": 4.75,
"learning_rate": 1.871568819843128e-07,
"loss": 0.579,
"step": 8490
},
{
"epoch": 4.75,
"learning_rate": 1.7894761668701297e-07,
"loss": 0.7364,
"step": 8500
},
{
"epoch": 4.76,
"learning_rate": 1.7092137339235149e-07,
"loss": 0.5175,
"step": 8510
},
{
"epoch": 4.76,
"learning_rate": 1.6307825121469165e-07,
"loss": 0.5939,
"step": 8520
},
{
"epoch": 4.77,
"learning_rate": 1.554183470070658e-07,
"loss": 0.6569,
"step": 8530
},
{
"epoch": 4.77,
"learning_rate": 1.4794175535999266e-07,
"loss": 0.6375,
"step": 8540
},
{
"epoch": 4.78,
"learning_rate": 1.406485686003034e-07,
"loss": 0.5261,
"step": 8550
},
{
"epoch": 4.79,
"learning_rate": 1.3353887678999588e-07,
"loss": 0.5118,
"step": 8560
},
{
"epoch": 4.79,
"learning_rate": 1.266127677251372e-07,
"loss": 0.708,
"step": 8570
},
{
"epoch": 4.8,
"learning_rate": 1.198703269347612e-07,
"loss": 0.6055,
"step": 8580
},
{
"epoch": 4.8,
"learning_rate": 1.1331163767982766e-07,
"loss": 0.6106,
"step": 8590
},
{
"epoch": 4.81,
"learning_rate": 1.0693678095218484e-07,
"loss": 0.6911,
"step": 8600
},
{
"epoch": 4.81,
"learning_rate": 1.0074583547357352e-07,
"loss": 0.5702,
"step": 8610
},
{
"epoch": 4.82,
"learning_rate": 9.473887769465617e-08,
"loss": 0.5446,
"step": 8620
},
{
"epoch": 4.82,
"learning_rate": 8.891598179406602e-08,
"loss": 0.7544,
"step": 8630
},
{
"epoch": 4.83,
"learning_rate": 8.327721967749779e-08,
"loss": 0.4747,
"step": 8640
},
{
"epoch": 4.84,
"learning_rate": 7.782266097681845e-08,
"loss": 0.38,
"step": 8650
},
{
"epoch": 4.84,
"learning_rate": 7.255237304920448e-08,
"loss": 0.6897,
"step": 8660
},
{
"epoch": 4.85,
"learning_rate": 6.746642097631595e-08,
"loss": 0.68,
"step": 8670
},
{
"epoch": 4.85,
"learning_rate": 6.256486756348212e-08,
"loss": 0.6287,
"step": 8680
},
{
"epoch": 4.86,
"learning_rate": 5.784777333893876e-08,
"loss": 0.6572,
"step": 8690
},
{
"epoch": 4.86,
"learning_rate": 5.331519655307371e-08,
"loss": 0.4205,
"step": 8700
},
{
"epoch": 4.87,
"learning_rate": 4.8967193177704154e-08,
"loss": 0.6278,
"step": 8710
},
{
"epoch": 4.87,
"learning_rate": 4.4803816905397147e-08,
"loss": 0.7136,
"step": 8720
},
{
"epoch": 4.88,
"learning_rate": 4.082511914879017e-08,
"loss": 0.4985,
"step": 8730
},
{
"epoch": 4.89,
"learning_rate": 3.7031149039971624e-08,
"loss": 0.7084,
"step": 8740
},
{
"epoch": 4.89,
"learning_rate": 3.342195342986798e-08,
"loss": 0.7644,
"step": 8750
},
{
"epoch": 4.9,
"learning_rate": 2.9997576887660915e-08,
"loss": 0.667,
"step": 8760
},
{
"epoch": 4.9,
"learning_rate": 2.6758061700244417e-08,
"loss": 0.6198,
"step": 8770
},
{
"epoch": 4.91,
"learning_rate": 2.3703447871698538e-08,
"loss": 0.4983,
"step": 8780
},
{
"epoch": 4.91,
"learning_rate": 2.0833773122791445e-08,
"loss": 0.5163,
"step": 8790
},
{
"epoch": 4.92,
"learning_rate": 1.814907289052814e-08,
"loss": 0.4993,
"step": 8800
},
{
"epoch": 4.93,
"learning_rate": 1.5649380327692475e-08,
"loss": 0.5254,
"step": 8810
},
{
"epoch": 4.93,
"learning_rate": 1.3334726302454136e-08,
"loss": 0.6785,
"step": 8820
},
{
"epoch": 4.94,
"learning_rate": 1.1205139397980624e-08,
"loss": 0.5321,
"step": 8830
},
{
"epoch": 4.94,
"learning_rate": 9.260645912084198e-09,
"loss": 0.6349,
"step": 8840
},
{
"epoch": 4.95,
"learning_rate": 7.501269856897142e-09,
"loss": 0.5274,
"step": 8850
},
{
"epoch": 4.95,
"learning_rate": 5.927032958580325e-09,
"loss": 0.5635,
"step": 8860
},
{
"epoch": 4.96,
"learning_rate": 4.537954657046761e-09,
"loss": 0.4729,
"step": 8870
},
{
"epoch": 4.96,
"learning_rate": 3.3340521057284577e-09,
"loss": 0.5237,
"step": 8880
},
{
"epoch": 4.97,
"learning_rate": 2.31534017135826e-09,
"loss": 0.6055,
"step": 8890
},
{
"epoch": 4.98,
"learning_rate": 1.4818314337916584e-09,
"loss": 0.5299,
"step": 8900
},
{
"epoch": 4.98,
"learning_rate": 8.335361858519131e-10,
"loss": 0.616,
"step": 8910
},
{
"epoch": 4.99,
"learning_rate": 3.704624331951623e-10,
"loss": 0.6275,
"step": 8920
},
{
"epoch": 4.99,
"learning_rate": 9.261589422215888e-11,
"loss": 0.7436,
"step": 8930
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 0.6342,
"step": 8940
},
{
"epoch": 5.0,
"step": 8940,
"total_flos": 8.111099524901732e+18,
"train_loss": 1.126360292909396,
"train_runtime": 83987.1568,
"train_samples_per_second": 0.852,
"train_steps_per_second": 0.106
}
],
"logging_steps": 10,
"max_steps": 8940,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1000,
"total_flos": 8.111099524901732e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}