Baichuan-13B-Chat-sft-super / trainer_state.json
wangrongsheng's picture
update model
6da17dd
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.999555522631306,
"global_step": 6748,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9999729068921297e-05,
"loss": 1.8898,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 4.9998916281557476e-05,
"loss": 1.7273,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.999756165552527e-05,
"loss": 1.6799,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 4.999566522018553e-05,
"loss": 1.6431,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.999322701664249e-05,
"loss": 1.6153,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 4.9990247097742984e-05,
"loss": 1.5933,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 4.9986725528075205e-05,
"loss": 1.5913,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 4.998266238396737e-05,
"loss": 1.5434,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 4.997805775348605e-05,
"loss": 1.5304,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 4.997291173643424e-05,
"loss": 1.5531,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.996722444434921e-05,
"loss": 1.5446,
"step": 110
},
{
"epoch": 0.04,
"learning_rate": 4.99609960005001e-05,
"loss": 1.5352,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 4.995422653988524e-05,
"loss": 1.5303,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 4.994691620922919e-05,
"loss": 1.5449,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 4.993906516697964e-05,
"loss": 1.5114,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 4.9930673583303865e-05,
"loss": 1.5043,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 4.992174164008515e-05,
"loss": 1.5476,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 4.991226953091877e-05,
"loss": 1.5107,
"step": 180
},
{
"epoch": 0.06,
"learning_rate": 4.9902257461107824e-05,
"loss": 1.5104,
"step": 190
},
{
"epoch": 0.06,
"learning_rate": 4.9891705647658795e-05,
"loss": 1.5298,
"step": 200
},
{
"epoch": 0.06,
"learning_rate": 4.988061431927681e-05,
"loss": 1.4907,
"step": 210
},
{
"epoch": 0.07,
"learning_rate": 4.986898371636071e-05,
"loss": 1.5127,
"step": 220
},
{
"epoch": 0.07,
"learning_rate": 4.985681409099784e-05,
"loss": 1.5037,
"step": 230
},
{
"epoch": 0.07,
"learning_rate": 4.984410570695858e-05,
"loss": 1.5029,
"step": 240
},
{
"epoch": 0.07,
"learning_rate": 4.983085883969063e-05,
"loss": 1.4725,
"step": 250
},
{
"epoch": 0.08,
"learning_rate": 4.981707377631303e-05,
"loss": 1.5148,
"step": 260
},
{
"epoch": 0.08,
"learning_rate": 4.9802750815609936e-05,
"loss": 1.4993,
"step": 270
},
{
"epoch": 0.08,
"learning_rate": 4.978789026802419e-05,
"loss": 1.5006,
"step": 280
},
{
"epoch": 0.09,
"learning_rate": 4.9772492455650494e-05,
"loss": 1.4885,
"step": 290
},
{
"epoch": 0.09,
"learning_rate": 4.975655771222855e-05,
"loss": 1.4898,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 4.9740086383135706e-05,
"loss": 1.4906,
"step": 310
},
{
"epoch": 0.09,
"learning_rate": 4.97230788253796e-05,
"loss": 1.4796,
"step": 320
},
{
"epoch": 0.1,
"learning_rate": 4.970553540759028e-05,
"loss": 1.4861,
"step": 330
},
{
"epoch": 0.1,
"learning_rate": 4.968745651001231e-05,
"loss": 1.4827,
"step": 340
},
{
"epoch": 0.1,
"learning_rate": 4.9668842524496526e-05,
"loss": 1.4884,
"step": 350
},
{
"epoch": 0.11,
"learning_rate": 4.964969385449149e-05,
"loss": 1.4873,
"step": 360
},
{
"epoch": 0.11,
"learning_rate": 4.96300109150348e-05,
"loss": 1.4848,
"step": 370
},
{
"epoch": 0.11,
"learning_rate": 4.960979413274404e-05,
"loss": 1.4881,
"step": 380
},
{
"epoch": 0.12,
"learning_rate": 4.9589043945807594e-05,
"loss": 1.4618,
"step": 390
},
{
"epoch": 0.12,
"learning_rate": 4.9567760803975105e-05,
"loss": 1.4858,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 4.954594516854773e-05,
"loss": 1.4777,
"step": 410
},
{
"epoch": 0.12,
"learning_rate": 4.952359751236817e-05,
"loss": 1.4828,
"step": 420
},
{
"epoch": 0.13,
"learning_rate": 4.950071831981038e-05,
"loss": 1.4571,
"step": 430
},
{
"epoch": 0.13,
"learning_rate": 4.9477308086769117e-05,
"loss": 1.4724,
"step": 440
},
{
"epoch": 0.13,
"learning_rate": 4.945336732064915e-05,
"loss": 1.4771,
"step": 450
},
{
"epoch": 0.14,
"learning_rate": 4.9428896540354294e-05,
"loss": 1.4604,
"step": 460
},
{
"epoch": 0.14,
"learning_rate": 4.940389627627613e-05,
"loss": 1.4815,
"step": 470
},
{
"epoch": 0.14,
"learning_rate": 4.937836707028255e-05,
"loss": 1.4859,
"step": 480
},
{
"epoch": 0.15,
"learning_rate": 4.935230947570597e-05,
"loss": 1.4715,
"step": 490
},
{
"epoch": 0.15,
"learning_rate": 4.932572405733137e-05,
"loss": 1.4759,
"step": 500
},
{
"epoch": 0.15,
"learning_rate": 4.929861139138404e-05,
"loss": 1.4678,
"step": 510
},
{
"epoch": 0.15,
"learning_rate": 4.9270972065517083e-05,
"loss": 1.4754,
"step": 520
},
{
"epoch": 0.16,
"learning_rate": 4.924280667879869e-05,
"loss": 1.462,
"step": 530
},
{
"epoch": 0.16,
"learning_rate": 4.921411584169915e-05,
"loss": 1.4704,
"step": 540
},
{
"epoch": 0.16,
"learning_rate": 4.918490017607761e-05,
"loss": 1.4661,
"step": 550
},
{
"epoch": 0.17,
"learning_rate": 4.915516031516863e-05,
"loss": 1.471,
"step": 560
},
{
"epoch": 0.17,
"learning_rate": 4.912489690356841e-05,
"loss": 1.451,
"step": 570
},
{
"epoch": 0.17,
"learning_rate": 4.909411059722084e-05,
"loss": 1.4411,
"step": 580
},
{
"epoch": 0.17,
"learning_rate": 4.9062802063403316e-05,
"loss": 1.456,
"step": 590
},
{
"epoch": 0.18,
"learning_rate": 4.90309719807122e-05,
"loss": 1.4678,
"step": 600
},
{
"epoch": 0.18,
"learning_rate": 4.8998621039048205e-05,
"loss": 1.479,
"step": 610
},
{
"epoch": 0.18,
"learning_rate": 4.896574993960136e-05,
"loss": 1.4471,
"step": 620
},
{
"epoch": 0.19,
"learning_rate": 4.893235939483587e-05,
"loss": 1.453,
"step": 630
},
{
"epoch": 0.19,
"learning_rate": 4.8898450128474626e-05,
"loss": 1.4696,
"step": 640
},
{
"epoch": 0.19,
"learning_rate": 4.886402287548357e-05,
"loss": 1.4526,
"step": 650
},
{
"epoch": 0.2,
"learning_rate": 4.8829078382055725e-05,
"loss": 1.4429,
"step": 660
},
{
"epoch": 0.2,
"learning_rate": 4.8793617405595025e-05,
"loss": 1.4491,
"step": 670
},
{
"epoch": 0.2,
"learning_rate": 4.8757640714699924e-05,
"loss": 1.4411,
"step": 680
},
{
"epoch": 0.2,
"learning_rate": 4.872114908914671e-05,
"loss": 1.4543,
"step": 690
},
{
"epoch": 0.21,
"learning_rate": 4.8684143319872636e-05,
"loss": 1.4556,
"step": 700
},
{
"epoch": 0.21,
"learning_rate": 4.864662420895873e-05,
"loss": 1.4506,
"step": 710
},
{
"epoch": 0.21,
"learning_rate": 4.860859256961244e-05,
"loss": 1.4671,
"step": 720
},
{
"epoch": 0.22,
"learning_rate": 4.857004922615002e-05,
"loss": 1.4469,
"step": 730
},
{
"epoch": 0.22,
"learning_rate": 4.8530995013978645e-05,
"loss": 1.4554,
"step": 740
},
{
"epoch": 0.22,
"learning_rate": 4.84914307795783e-05,
"loss": 1.4671,
"step": 750
},
{
"epoch": 0.23,
"learning_rate": 4.845135738048343e-05,
"loss": 1.445,
"step": 760
},
{
"epoch": 0.23,
"learning_rate": 4.841077568526439e-05,
"loss": 1.4469,
"step": 770
},
{
"epoch": 0.23,
"learning_rate": 4.836968657350857e-05,
"loss": 1.4677,
"step": 780
},
{
"epoch": 0.23,
"learning_rate": 4.832809093580135e-05,
"loss": 1.4653,
"step": 790
},
{
"epoch": 0.24,
"learning_rate": 4.8285989673706826e-05,
"loss": 1.4342,
"step": 800
},
{
"epoch": 0.24,
"learning_rate": 4.824338369974822e-05,
"loss": 1.458,
"step": 810
},
{
"epoch": 0.24,
"learning_rate": 4.8200273937388126e-05,
"loss": 1.4541,
"step": 820
},
{
"epoch": 0.25,
"learning_rate": 4.81566613210085e-05,
"loss": 1.4324,
"step": 830
},
{
"epoch": 0.25,
"learning_rate": 4.81125467958904e-05,
"loss": 1.4405,
"step": 840
},
{
"epoch": 0.25,
"learning_rate": 4.80679313181935e-05,
"loss": 1.4408,
"step": 850
},
{
"epoch": 0.25,
"learning_rate": 4.8022815854935356e-05,
"loss": 1.4395,
"step": 860
},
{
"epoch": 0.26,
"learning_rate": 4.797720138397045e-05,
"loss": 1.4359,
"step": 870
},
{
"epoch": 0.26,
"learning_rate": 4.793108889396902e-05,
"loss": 1.442,
"step": 880
},
{
"epoch": 0.26,
"learning_rate": 4.7884479384395594e-05,
"loss": 1.4566,
"step": 890
},
{
"epoch": 0.27,
"learning_rate": 4.7837373865487345e-05,
"loss": 1.4257,
"step": 900
},
{
"epoch": 0.27,
"learning_rate": 4.77897733582322e-05,
"loss": 1.4755,
"step": 910
},
{
"epoch": 0.27,
"learning_rate": 4.774167889434671e-05,
"loss": 1.4476,
"step": 920
},
{
"epoch": 0.28,
"learning_rate": 4.769309151625366e-05,
"loss": 1.4531,
"step": 930
},
{
"epoch": 0.28,
"learning_rate": 4.7644012277059516e-05,
"loss": 1.447,
"step": 940
},
{
"epoch": 0.28,
"learning_rate": 4.7594442240531574e-05,
"loss": 1.4201,
"step": 950
},
{
"epoch": 0.28,
"learning_rate": 4.754438248107491e-05,
"loss": 1.4323,
"step": 960
},
{
"epoch": 0.29,
"learning_rate": 4.7493834083709104e-05,
"loss": 1.4432,
"step": 970
},
{
"epoch": 0.29,
"learning_rate": 4.7442798144044695e-05,
"loss": 1.4339,
"step": 980
},
{
"epoch": 0.29,
"learning_rate": 4.739127576825945e-05,
"loss": 1.4477,
"step": 990
},
{
"epoch": 0.3,
"learning_rate": 4.733926807307441e-05,
"loss": 1.4242,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 4.728677618572965e-05,
"loss": 1.4341,
"step": 1010
},
{
"epoch": 0.3,
"learning_rate": 4.723380124395985e-05,
"loss": 1.4526,
"step": 1020
},
{
"epoch": 0.31,
"learning_rate": 4.7180344395969675e-05,
"loss": 1.4402,
"step": 1030
},
{
"epoch": 0.31,
"learning_rate": 4.712640680040884e-05,
"loss": 1.4257,
"step": 1040
},
{
"epoch": 0.31,
"learning_rate": 4.707198962634701e-05,
"loss": 1.4232,
"step": 1050
},
{
"epoch": 0.31,
"learning_rate": 4.70170940532485e-05,
"loss": 1.4485,
"step": 1060
},
{
"epoch": 0.32,
"learning_rate": 4.6961721270946635e-05,
"loss": 1.456,
"step": 1070
},
{
"epoch": 0.32,
"learning_rate": 4.690587247961804e-05,
"loss": 1.4555,
"step": 1080
},
{
"epoch": 0.32,
"learning_rate": 4.684954888975657e-05,
"loss": 1.4376,
"step": 1090
},
{
"epoch": 0.33,
"learning_rate": 4.6792751722147104e-05,
"loss": 1.4353,
"step": 1100
},
{
"epoch": 0.33,
"learning_rate": 4.6735482207839074e-05,
"loss": 1.4226,
"step": 1110
},
{
"epoch": 0.33,
"learning_rate": 4.6677741588119784e-05,
"loss": 1.4315,
"step": 1120
},
{
"epoch": 0.33,
"learning_rate": 4.66195311144875e-05,
"loss": 1.4303,
"step": 1130
},
{
"epoch": 0.34,
"learning_rate": 4.6560852048624345e-05,
"loss": 1.4288,
"step": 1140
},
{
"epoch": 0.34,
"learning_rate": 4.650170566236892e-05,
"loss": 1.4539,
"step": 1150
},
{
"epoch": 0.34,
"learning_rate": 4.6442093237688756e-05,
"loss": 1.4527,
"step": 1160
},
{
"epoch": 0.35,
"learning_rate": 4.6382016066652556e-05,
"loss": 1.4406,
"step": 1170
},
{
"epoch": 0.35,
"learning_rate": 4.632147545140212e-05,
"loss": 1.4233,
"step": 1180
},
{
"epoch": 0.35,
"learning_rate": 4.626047270412419e-05,
"loss": 1.426,
"step": 1190
},
{
"epoch": 0.36,
"learning_rate": 4.619900914702198e-05,
"loss": 1.4577,
"step": 1200
},
{
"epoch": 0.36,
"learning_rate": 4.613708611228652e-05,
"loss": 1.4313,
"step": 1210
},
{
"epoch": 0.36,
"learning_rate": 4.607470494206776e-05,
"loss": 1.4129,
"step": 1220
},
{
"epoch": 0.36,
"learning_rate": 4.601186698844554e-05,
"loss": 1.4368,
"step": 1230
},
{
"epoch": 0.37,
"learning_rate": 4.594857361340021e-05,
"loss": 1.4342,
"step": 1240
},
{
"epoch": 0.37,
"learning_rate": 4.588482618878316e-05,
"loss": 1.4438,
"step": 1250
},
{
"epoch": 0.37,
"learning_rate": 4.582062609628709e-05,
"loss": 1.4263,
"step": 1260
},
{
"epoch": 0.38,
"learning_rate": 4.575597472741601e-05,
"loss": 1.4379,
"step": 1270
},
{
"epoch": 0.38,
"learning_rate": 4.569087348345512e-05,
"loss": 1.4221,
"step": 1280
},
{
"epoch": 0.38,
"learning_rate": 4.562532377544046e-05,
"loss": 1.4414,
"step": 1290
},
{
"epoch": 0.39,
"learning_rate": 4.5559327024128265e-05,
"loss": 1.4395,
"step": 1300
},
{
"epoch": 0.39,
"learning_rate": 4.549288465996421e-05,
"loss": 1.4278,
"step": 1310
},
{
"epoch": 0.39,
"learning_rate": 4.542599812305243e-05,
"loss": 1.4344,
"step": 1320
},
{
"epoch": 0.39,
"learning_rate": 4.535866886312423e-05,
"loss": 1.4352,
"step": 1330
},
{
"epoch": 0.4,
"learning_rate": 4.529089833950675e-05,
"loss": 1.4133,
"step": 1340
},
{
"epoch": 0.4,
"learning_rate": 4.5222688021091266e-05,
"loss": 1.4506,
"step": 1350
},
{
"epoch": 0.4,
"learning_rate": 4.5154039386301385e-05,
"loss": 1.4295,
"step": 1360
},
{
"epoch": 0.41,
"learning_rate": 4.5084953923061016e-05,
"loss": 1.4389,
"step": 1370
},
{
"epoch": 0.41,
"learning_rate": 4.5015433128762065e-05,
"loss": 1.4247,
"step": 1380
},
{
"epoch": 0.41,
"learning_rate": 4.494547851023205e-05,
"loss": 1.4347,
"step": 1390
},
{
"epoch": 0.41,
"learning_rate": 4.487509158370139e-05,
"loss": 1.4133,
"step": 1400
},
{
"epoch": 0.42,
"learning_rate": 4.480427387477056e-05,
"loss": 1.4296,
"step": 1410
},
{
"epoch": 0.42,
"learning_rate": 4.473302691837702e-05,
"loss": 1.4353,
"step": 1420
},
{
"epoch": 0.42,
"learning_rate": 4.466135225876194e-05,
"loss": 1.4377,
"step": 1430
},
{
"epoch": 0.43,
"learning_rate": 4.458925144943676e-05,
"loss": 1.4168,
"step": 1440
},
{
"epoch": 0.43,
"learning_rate": 4.451672605314948e-05,
"loss": 1.4334,
"step": 1450
},
{
"epoch": 0.43,
"learning_rate": 4.444377764185082e-05,
"loss": 1.44,
"step": 1460
},
{
"epoch": 0.44,
"learning_rate": 4.43704077966601e-05,
"loss": 1.4375,
"step": 1470
},
{
"epoch": 0.44,
"learning_rate": 4.4296618107831036e-05,
"loss": 1.447,
"step": 1480
},
{
"epoch": 0.44,
"learning_rate": 4.422241017471722e-05,
"loss": 1.4151,
"step": 1490
},
{
"epoch": 0.44,
"learning_rate": 4.414778560573749e-05,
"loss": 1.4388,
"step": 1500
},
{
"epoch": 0.45,
"learning_rate": 4.4072746018341036e-05,
"loss": 1.4228,
"step": 1510
},
{
"epoch": 0.45,
"learning_rate": 4.399729303897238e-05,
"loss": 1.4104,
"step": 1520
},
{
"epoch": 0.45,
"learning_rate": 4.392142830303608e-05,
"loss": 1.4441,
"step": 1530
},
{
"epoch": 0.46,
"learning_rate": 4.384515345486131e-05,
"loss": 1.4282,
"step": 1540
},
{
"epoch": 0.46,
"learning_rate": 4.376847014766623e-05,
"loss": 1.4271,
"step": 1550
},
{
"epoch": 0.46,
"learning_rate": 4.369138004352212e-05,
"loss": 1.4223,
"step": 1560
},
{
"epoch": 0.47,
"learning_rate": 4.3613884813317406e-05,
"loss": 1.425,
"step": 1570
},
{
"epoch": 0.47,
"learning_rate": 4.3535986136721377e-05,
"loss": 1.4392,
"step": 1580
},
{
"epoch": 0.47,
"learning_rate": 4.3457685702147834e-05,
"loss": 1.4097,
"step": 1590
},
{
"epoch": 0.47,
"learning_rate": 4.3378985206718484e-05,
"loss": 1.4405,
"step": 1600
},
{
"epoch": 0.48,
"learning_rate": 4.329988635622611e-05,
"loss": 1.4311,
"step": 1610
},
{
"epoch": 0.48,
"learning_rate": 4.322039086509769e-05,
"loss": 1.4358,
"step": 1620
},
{
"epoch": 0.48,
"learning_rate": 4.3140500456357145e-05,
"loss": 1.4114,
"step": 1630
},
{
"epoch": 0.49,
"learning_rate": 4.306021686158805e-05,
"loss": 1.4165,
"step": 1640
},
{
"epoch": 0.49,
"learning_rate": 4.297954182089609e-05,
"loss": 1.4309,
"step": 1650
},
{
"epoch": 0.49,
"learning_rate": 4.289847708287129e-05,
"loss": 1.4215,
"step": 1660
},
{
"epoch": 0.49,
"learning_rate": 4.2817024404550246e-05,
"loss": 1.4124,
"step": 1670
},
{
"epoch": 0.5,
"learning_rate": 4.2735185551377895e-05,
"loss": 1.4001,
"step": 1680
},
{
"epoch": 0.5,
"learning_rate": 4.265296229716935e-05,
"loss": 1.4302,
"step": 1690
},
{
"epoch": 0.5,
"learning_rate": 4.25703564240714e-05,
"loss": 1.4211,
"step": 1700
},
{
"epoch": 0.51,
"learning_rate": 4.2487369722523906e-05,
"loss": 1.4423,
"step": 1710
},
{
"epoch": 0.51,
"learning_rate": 4.240400399122101e-05,
"loss": 1.4299,
"step": 1720
},
{
"epoch": 0.51,
"learning_rate": 4.232026103707209e-05,
"loss": 1.4214,
"step": 1730
},
{
"epoch": 0.52,
"learning_rate": 4.223614267516268e-05,
"loss": 1.4348,
"step": 1740
},
{
"epoch": 0.52,
"learning_rate": 4.215165072871505e-05,
"loss": 1.4315,
"step": 1750
},
{
"epoch": 0.52,
"learning_rate": 4.206678702904874e-05,
"loss": 1.4098,
"step": 1760
},
{
"epoch": 0.52,
"learning_rate": 4.198155341554084e-05,
"loss": 1.4242,
"step": 1770
},
{
"epoch": 0.53,
"learning_rate": 4.1895951735586145e-05,
"loss": 1.4272,
"step": 1780
},
{
"epoch": 0.53,
"learning_rate": 4.1809983844557085e-05,
"loss": 1.4452,
"step": 1790
},
{
"epoch": 0.53,
"learning_rate": 4.172365160576355e-05,
"loss": 1.431,
"step": 1800
},
{
"epoch": 0.54,
"learning_rate": 4.163695689041245e-05,
"loss": 1.4389,
"step": 1810
},
{
"epoch": 0.54,
"learning_rate": 4.154990157756722e-05,
"loss": 1.413,
"step": 1820
},
{
"epoch": 0.54,
"learning_rate": 4.1462487554107036e-05,
"loss": 1.3893,
"step": 1830
},
{
"epoch": 0.55,
"learning_rate": 4.137471671468596e-05,
"loss": 1.4052,
"step": 1840
},
{
"epoch": 0.55,
"learning_rate": 4.128659096169183e-05,
"loss": 1.4173,
"step": 1850
},
{
"epoch": 0.55,
"learning_rate": 4.1198112205205096e-05,
"loss": 1.4012,
"step": 1860
},
{
"epoch": 0.55,
"learning_rate": 4.110928236295734e-05,
"loss": 1.4119,
"step": 1870
},
{
"epoch": 0.56,
"learning_rate": 4.102010336028975e-05,
"loss": 1.4111,
"step": 1880
},
{
"epoch": 0.56,
"learning_rate": 4.0930577130111424e-05,
"loss": 1.4156,
"step": 1890
},
{
"epoch": 0.56,
"learning_rate": 4.084070561285739e-05,
"loss": 1.4419,
"step": 1900
},
{
"epoch": 0.57,
"learning_rate": 4.0750490756446624e-05,
"loss": 1.4121,
"step": 1910
},
{
"epoch": 0.57,
"learning_rate": 4.0659934516239795e-05,
"loss": 1.4204,
"step": 1920
},
{
"epoch": 0.57,
"learning_rate": 4.056903885499689e-05,
"loss": 1.4032,
"step": 1930
},
{
"epoch": 0.57,
"learning_rate": 4.047780574283466e-05,
"loss": 1.4207,
"step": 1940
},
{
"epoch": 0.58,
"learning_rate": 4.038623715718397e-05,
"loss": 1.4095,
"step": 1950
},
{
"epoch": 0.58,
"learning_rate": 4.029433508274686e-05,
"loss": 1.4228,
"step": 1960
},
{
"epoch": 0.58,
"learning_rate": 4.0202101511453586e-05,
"loss": 1.4141,
"step": 1970
},
{
"epoch": 0.59,
"learning_rate": 4.010953844241943e-05,
"loss": 1.4323,
"step": 1980
},
{
"epoch": 0.59,
"learning_rate": 4.001664788190135e-05,
"loss": 1.4087,
"step": 1990
},
{
"epoch": 0.59,
"learning_rate": 3.992343184325453e-05,
"loss": 1.4186,
"step": 2000
},
{
"epoch": 0.6,
"learning_rate": 3.982989234688873e-05,
"loss": 1.4264,
"step": 2010
},
{
"epoch": 0.6,
"learning_rate": 3.973603142022448e-05,
"loss": 1.4417,
"step": 2020
},
{
"epoch": 0.6,
"learning_rate": 3.964185109764915e-05,
"loss": 1.4075,
"step": 2030
},
{
"epoch": 0.6,
"learning_rate": 3.954735342047285e-05,
"loss": 1.4143,
"step": 2040
},
{
"epoch": 0.61,
"learning_rate": 3.945254043688419e-05,
"loss": 1.4176,
"step": 2050
},
{
"epoch": 0.61,
"learning_rate": 3.935741420190587e-05,
"loss": 1.4214,
"step": 2060
},
{
"epoch": 0.61,
"learning_rate": 3.926197677735018e-05,
"loss": 1.4256,
"step": 2070
},
{
"epoch": 0.62,
"learning_rate": 3.9166230231774276e-05,
"loss": 1.4075,
"step": 2080
},
{
"epoch": 0.62,
"learning_rate": 3.9070176640435335e-05,
"loss": 1.3887,
"step": 2090
},
{
"epoch": 0.62,
"learning_rate": 3.897381808524562e-05,
"loss": 1.4225,
"step": 2100
},
{
"epoch": 0.63,
"learning_rate": 3.887715665472729e-05,
"loss": 1.4114,
"step": 2110
},
{
"epoch": 0.63,
"learning_rate": 3.8780194443967226e-05,
"loss": 1.4316,
"step": 2120
},
{
"epoch": 0.63,
"learning_rate": 3.8682933554571524e-05,
"loss": 1.4168,
"step": 2130
},
{
"epoch": 0.63,
"learning_rate": 3.858537609461999e-05,
"loss": 1.4237,
"step": 2140
},
{
"epoch": 0.64,
"learning_rate": 3.8487524178620464e-05,
"loss": 1.4373,
"step": 2150
},
{
"epoch": 0.64,
"learning_rate": 3.838937992746295e-05,
"loss": 1.4089,
"step": 2160
},
{
"epoch": 0.64,
"learning_rate": 3.8290945468373684e-05,
"loss": 1.4319,
"step": 2170
},
{
"epoch": 0.65,
"learning_rate": 3.8192222934869e-05,
"loss": 1.4035,
"step": 2180
},
{
"epoch": 0.65,
"learning_rate": 3.809321446670909e-05,
"loss": 1.4161,
"step": 2190
},
{
"epoch": 0.65,
"learning_rate": 3.799392220985164e-05,
"loss": 1.4136,
"step": 2200
},
{
"epoch": 0.65,
"learning_rate": 3.789434831640533e-05,
"loss": 1.4188,
"step": 2210
},
{
"epoch": 0.66,
"learning_rate": 3.779449494458312e-05,
"loss": 1.4203,
"step": 2220
},
{
"epoch": 0.66,
"learning_rate": 3.769436425865557e-05,
"loss": 1.4263,
"step": 2230
},
{
"epoch": 0.66,
"learning_rate": 3.759395842890384e-05,
"loss": 1.4295,
"step": 2240
},
{
"epoch": 0.67,
"learning_rate": 3.749327963157274e-05,
"loss": 1.4144,
"step": 2250
},
{
"epoch": 0.67,
"learning_rate": 3.739233004882346e-05,
"loss": 1.4162,
"step": 2260
},
{
"epoch": 0.67,
"learning_rate": 3.729111186868635e-05,
"loss": 1.4099,
"step": 2270
},
{
"epoch": 0.68,
"learning_rate": 3.718962728501348e-05,
"loss": 1.3878,
"step": 2280
},
{
"epoch": 0.68,
"learning_rate": 3.708787849743106e-05,
"loss": 1.4399,
"step": 2290
},
{
"epoch": 0.68,
"learning_rate": 3.69858677112918e-05,
"loss": 1.4249,
"step": 2300
},
{
"epoch": 0.68,
"learning_rate": 3.688359713762707e-05,
"loss": 1.3925,
"step": 2310
},
{
"epoch": 0.69,
"learning_rate": 3.6781068993099034e-05,
"loss": 1.4036,
"step": 2320
},
{
"epoch": 0.69,
"learning_rate": 3.667828549995255e-05,
"loss": 1.3986,
"step": 2330
},
{
"epoch": 0.69,
"learning_rate": 3.657524888596703e-05,
"loss": 1.4298,
"step": 2340
},
{
"epoch": 0.7,
"learning_rate": 3.6471961384408155e-05,
"loss": 1.4016,
"step": 2350
},
{
"epoch": 0.7,
"learning_rate": 3.636842523397945e-05,
"loss": 1.3992,
"step": 2360
},
{
"epoch": 0.7,
"learning_rate": 3.626464267877381e-05,
"loss": 1.4441,
"step": 2370
},
{
"epoch": 0.71,
"learning_rate": 3.616061596822478e-05,
"loss": 1.3967,
"step": 2380
},
{
"epoch": 0.71,
"learning_rate": 3.6056347357057893e-05,
"loss": 1.4252,
"step": 2390
},
{
"epoch": 0.71,
"learning_rate": 3.595183910524173e-05,
"loss": 1.4209,
"step": 2400
},
{
"epoch": 0.71,
"learning_rate": 3.5847093477938956e-05,
"loss": 1.4133,
"step": 2410
},
{
"epoch": 0.72,
"learning_rate": 3.5742112745457235e-05,
"loss": 1.4313,
"step": 2420
},
{
"epoch": 0.72,
"learning_rate": 3.563689918320002e-05,
"loss": 1.4275,
"step": 2430
},
{
"epoch": 0.72,
"learning_rate": 3.5531455071617226e-05,
"loss": 1.421,
"step": 2440
},
{
"epoch": 0.73,
"learning_rate": 3.542578269615579e-05,
"loss": 1.4402,
"step": 2450
},
{
"epoch": 0.73,
"learning_rate": 3.5319884347210186e-05,
"loss": 1.4176,
"step": 2460
},
{
"epoch": 0.73,
"learning_rate": 3.521376232007271e-05,
"loss": 1.4117,
"step": 2470
},
{
"epoch": 0.73,
"learning_rate": 3.5107418914883794e-05,
"loss": 1.41,
"step": 2480
},
{
"epoch": 0.74,
"learning_rate": 3.500085643658211e-05,
"loss": 1.4313,
"step": 2490
},
{
"epoch": 0.74,
"learning_rate": 3.489407719485464e-05,
"loss": 1.4035,
"step": 2500
},
{
"epoch": 0.74,
"learning_rate": 3.4787083504086605e-05,
"loss": 1.4057,
"step": 2510
},
{
"epoch": 0.75,
"learning_rate": 3.467987768331127e-05,
"loss": 1.4125,
"step": 2520
},
{
"epoch": 0.75,
"learning_rate": 3.457246205615974e-05,
"loss": 1.4056,
"step": 2530
},
{
"epoch": 0.75,
"learning_rate": 3.446483895081054e-05,
"loss": 1.4082,
"step": 2540
},
{
"epoch": 0.76,
"learning_rate": 3.4357010699939215e-05,
"loss": 1.3915,
"step": 2550
},
{
"epoch": 0.76,
"learning_rate": 3.424897964066769e-05,
"loss": 1.4012,
"step": 2560
},
{
"epoch": 0.76,
"learning_rate": 3.4140748114513685e-05,
"loss": 1.4251,
"step": 2570
},
{
"epoch": 0.76,
"learning_rate": 3.403231846733994e-05,
"loss": 1.4013,
"step": 2580
},
{
"epoch": 0.77,
"learning_rate": 3.392369304930334e-05,
"loss": 1.4076,
"step": 2590
},
{
"epoch": 0.77,
"learning_rate": 3.3814874214804034e-05,
"loss": 1.3978,
"step": 2600
},
{
"epoch": 0.77,
"learning_rate": 3.3705864322434354e-05,
"loss": 1.408,
"step": 2610
},
{
"epoch": 0.78,
"learning_rate": 3.359666573492772e-05,
"loss": 1.3888,
"step": 2620
},
{
"epoch": 0.78,
"learning_rate": 3.3487280819107415e-05,
"loss": 1.4052,
"step": 2630
},
{
"epoch": 0.78,
"learning_rate": 3.33777119458353e-05,
"loss": 1.4286,
"step": 2640
},
{
"epoch": 0.79,
"learning_rate": 3.326796148996042e-05,
"loss": 1.4241,
"step": 2650
},
{
"epoch": 0.79,
"learning_rate": 3.315803183026753e-05,
"loss": 1.4049,
"step": 2660
},
{
"epoch": 0.79,
"learning_rate": 3.304792534942553e-05,
"loss": 1.3826,
"step": 2670
},
{
"epoch": 0.79,
"learning_rate": 3.293764443393582e-05,
"loss": 1.413,
"step": 2680
},
{
"epoch": 0.8,
"learning_rate": 3.2827191474080605e-05,
"loss": 1.4161,
"step": 2690
},
{
"epoch": 0.8,
"learning_rate": 3.2716568863871044e-05,
"loss": 1.382,
"step": 2700
},
{
"epoch": 0.8,
"learning_rate": 3.260577900099539e-05,
"loss": 1.381,
"step": 2710
},
{
"epoch": 0.81,
"learning_rate": 3.2494824286767e-05,
"loss": 1.396,
"step": 2720
},
{
"epoch": 0.81,
"learning_rate": 3.2383707126072315e-05,
"loss": 1.3923,
"step": 2730
},
{
"epoch": 0.81,
"learning_rate": 3.2272429927318707e-05,
"loss": 1.4044,
"step": 2740
},
{
"epoch": 0.81,
"learning_rate": 3.21609951023823e-05,
"loss": 1.4073,
"step": 2750
},
{
"epoch": 0.82,
"learning_rate": 3.204940506655568e-05,
"loss": 1.4178,
"step": 2760
},
{
"epoch": 0.82,
"learning_rate": 3.1937662238495544e-05,
"loss": 1.4179,
"step": 2770
},
{
"epoch": 0.82,
"learning_rate": 3.1825769040170285e-05,
"loss": 1.4003,
"step": 2780
},
{
"epoch": 0.83,
"learning_rate": 3.1713727896807505e-05,
"loss": 1.4176,
"step": 2790
},
{
"epoch": 0.83,
"learning_rate": 3.160154123684143e-05,
"loss": 1.4179,
"step": 2800
},
{
"epoch": 0.83,
"learning_rate": 3.1489211491860276e-05,
"loss": 1.4098,
"step": 2810
},
{
"epoch": 0.84,
"learning_rate": 3.1376741096553576e-05,
"loss": 1.4087,
"step": 2820
},
{
"epoch": 0.84,
"learning_rate": 3.126413248865935e-05,
"loss": 1.3971,
"step": 2830
},
{
"epoch": 0.84,
"learning_rate": 3.115138810891134e-05,
"loss": 1.3915,
"step": 2840
},
{
"epoch": 0.84,
"learning_rate": 3.103851040098607e-05,
"loss": 1.4041,
"step": 2850
},
{
"epoch": 0.85,
"learning_rate": 3.0925501811449855e-05,
"loss": 1.4129,
"step": 2860
},
{
"epoch": 0.85,
"learning_rate": 3.081236478970583e-05,
"loss": 1.3948,
"step": 2870
},
{
"epoch": 0.85,
"learning_rate": 3.069910178794082e-05,
"loss": 1.4116,
"step": 2880
},
{
"epoch": 0.86,
"learning_rate": 3.0585715261072206e-05,
"loss": 1.4029,
"step": 2890
},
{
"epoch": 0.86,
"learning_rate": 3.04722076666947e-05,
"loss": 1.399,
"step": 2900
},
{
"epoch": 0.86,
"learning_rate": 3.0358581465027125e-05,
"loss": 1.4061,
"step": 2910
},
{
"epoch": 0.87,
"learning_rate": 3.024483911885901e-05,
"loss": 1.4152,
"step": 2920
},
{
"epoch": 0.87,
"learning_rate": 3.013098309349729e-05,
"loss": 1.4257,
"step": 2930
},
{
"epoch": 0.87,
"learning_rate": 3.0017015856712814e-05,
"loss": 1.417,
"step": 2940
},
{
"epoch": 0.87,
"learning_rate": 2.9902939878686915e-05,
"loss": 1.3952,
"step": 2950
},
{
"epoch": 0.88,
"learning_rate": 2.978875763195779e-05,
"loss": 1.4252,
"step": 2960
},
{
"epoch": 0.88,
"learning_rate": 2.9674471591367005e-05,
"loss": 1.3982,
"step": 2970
},
{
"epoch": 0.88,
"learning_rate": 2.9560084234005765e-05,
"loss": 1.3948,
"step": 2980
},
{
"epoch": 0.89,
"learning_rate": 2.944559803916128e-05,
"loss": 1.4127,
"step": 2990
},
{
"epoch": 0.89,
"learning_rate": 2.9331015488263024e-05,
"loss": 1.4239,
"step": 3000
},
{
"epoch": 0.89,
"learning_rate": 2.9216339064828914e-05,
"loss": 1.3889,
"step": 3010
},
{
"epoch": 0.89,
"learning_rate": 2.910157125441152e-05,
"loss": 1.403,
"step": 3020
},
{
"epoch": 0.9,
"learning_rate": 2.898671454454418e-05,
"loss": 1.4106,
"step": 3030
},
{
"epoch": 0.9,
"learning_rate": 2.8871771424687078e-05,
"loss": 1.4123,
"step": 3040
},
{
"epoch": 0.9,
"learning_rate": 2.8756744386173284e-05,
"loss": 1.4137,
"step": 3050
},
{
"epoch": 0.91,
"learning_rate": 2.8641635922154774e-05,
"loss": 1.4009,
"step": 3060
},
{
"epoch": 0.91,
"learning_rate": 2.8526448527548372e-05,
"loss": 1.4159,
"step": 3070
},
{
"epoch": 0.91,
"learning_rate": 2.8411184698981684e-05,
"loss": 1.4071,
"step": 3080
},
{
"epoch": 0.92,
"learning_rate": 2.829584693473899e-05,
"loss": 1.41,
"step": 3090
},
{
"epoch": 0.92,
"learning_rate": 2.8180437734707064e-05,
"loss": 1.4038,
"step": 3100
},
{
"epoch": 0.92,
"learning_rate": 2.8064959600321043e-05,
"loss": 1.4069,
"step": 3110
},
{
"epoch": 0.92,
"learning_rate": 2.7949415034510163e-05,
"loss": 1.4096,
"step": 3120
},
{
"epoch": 0.93,
"learning_rate": 2.7833806541643544e-05,
"loss": 1.3821,
"step": 3130
},
{
"epoch": 0.93,
"learning_rate": 2.7718136627475865e-05,
"loss": 1.3886,
"step": 3140
},
{
"epoch": 0.93,
"learning_rate": 2.76024077990931e-05,
"loss": 1.3923,
"step": 3150
},
{
"epoch": 0.94,
"learning_rate": 2.748662256485816e-05,
"loss": 1.4072,
"step": 3160
},
{
"epoch": 0.94,
"learning_rate": 2.7370783434356512e-05,
"loss": 1.4126,
"step": 3170
},
{
"epoch": 0.94,
"learning_rate": 2.7254892918341802e-05,
"loss": 1.4238,
"step": 3180
},
{
"epoch": 0.95,
"learning_rate": 2.713895352868144e-05,
"loss": 1.4183,
"step": 3190
},
{
"epoch": 0.95,
"learning_rate": 2.702296777830212e-05,
"loss": 1.4056,
"step": 3200
},
{
"epoch": 0.95,
"learning_rate": 2.6906938181135423e-05,
"loss": 1.4096,
"step": 3210
},
{
"epoch": 0.95,
"learning_rate": 2.6790867252063247e-05,
"loss": 1.4018,
"step": 3220
},
{
"epoch": 0.96,
"learning_rate": 2.6674757506863357e-05,
"loss": 1.3922,
"step": 3230
},
{
"epoch": 0.96,
"learning_rate": 2.655861146215483e-05,
"loss": 1.4054,
"step": 3240
},
{
"epoch": 0.96,
"learning_rate": 2.6442431635343528e-05,
"loss": 1.3914,
"step": 3250
},
{
"epoch": 0.97,
"learning_rate": 2.6326220544567514e-05,
"loss": 1.3851,
"step": 3260
},
{
"epoch": 0.97,
"learning_rate": 2.620998070864248e-05,
"loss": 1.4102,
"step": 3270
},
{
"epoch": 0.97,
"learning_rate": 2.6093714647007156e-05,
"loss": 1.4069,
"step": 3280
},
{
"epoch": 0.97,
"learning_rate": 2.5977424879668705e-05,
"loss": 1.3919,
"step": 3290
},
{
"epoch": 0.98,
"learning_rate": 2.5861113927148096e-05,
"loss": 1.4073,
"step": 3300
},
{
"epoch": 0.98,
"learning_rate": 2.5744784310425467e-05,
"loss": 1.4025,
"step": 3310
},
{
"epoch": 0.98,
"learning_rate": 2.562843855088551e-05,
"loss": 1.3805,
"step": 3320
},
{
"epoch": 0.99,
"learning_rate": 2.5512079170262793e-05,
"loss": 1.4032,
"step": 3330
},
{
"epoch": 0.99,
"learning_rate": 2.5395708690587117e-05,
"loss": 1.4232,
"step": 3340
},
{
"epoch": 0.99,
"learning_rate": 2.527932963412885e-05,
"loss": 1.3897,
"step": 3350
},
{
"epoch": 1.0,
"learning_rate": 2.5162944523344256e-05,
"loss": 1.4008,
"step": 3360
},
{
"epoch": 1.0,
"learning_rate": 2.5046555880820826e-05,
"loss": 1.3936,
"step": 3370
},
{
"epoch": 1.0,
"learning_rate": 2.4930166229222597e-05,
"loss": 1.394,
"step": 3380
},
{
"epoch": 1.0,
"learning_rate": 2.481377809123547e-05,
"loss": 1.3903,
"step": 3390
},
{
"epoch": 1.01,
"learning_rate": 2.469739398951256e-05,
"loss": 1.3869,
"step": 3400
},
{
"epoch": 1.01,
"learning_rate": 2.458101644661947e-05,
"loss": 1.429,
"step": 3410
},
{
"epoch": 1.01,
"learning_rate": 2.4464647984979667e-05,
"loss": 1.3987,
"step": 3420
},
{
"epoch": 1.02,
"learning_rate": 2.4348291126819783e-05,
"loss": 1.38,
"step": 3430
},
{
"epoch": 1.02,
"learning_rate": 2.4231948394114936e-05,
"loss": 1.3906,
"step": 3440
},
{
"epoch": 1.02,
"learning_rate": 2.4115622308534096e-05,
"loss": 1.3931,
"step": 3450
},
{
"epoch": 1.03,
"learning_rate": 2.399931539138541e-05,
"loss": 1.4135,
"step": 3460
},
{
"epoch": 1.03,
"learning_rate": 2.388303016356156e-05,
"loss": 1.3952,
"step": 3470
},
{
"epoch": 1.03,
"learning_rate": 2.3766769145485125e-05,
"loss": 1.3972,
"step": 3480
},
{
"epoch": 1.03,
"learning_rate": 2.3650534857053943e-05,
"loss": 1.3937,
"step": 3490
},
{
"epoch": 1.04,
"learning_rate": 2.3534329817586513e-05,
"loss": 1.3936,
"step": 3500
},
{
"epoch": 1.04,
"learning_rate": 2.3418156545767365e-05,
"loss": 1.397,
"step": 3510
},
{
"epoch": 1.04,
"learning_rate": 2.3302017559592494e-05,
"loss": 1.3849,
"step": 3520
},
{
"epoch": 1.05,
"learning_rate": 2.318591537631476e-05,
"loss": 1.4118,
"step": 3530
},
{
"epoch": 1.05,
"learning_rate": 2.3069852512389335e-05,
"loss": 1.414,
"step": 3540
},
{
"epoch": 1.05,
"learning_rate": 2.2953831483419184e-05,
"loss": 1.4088,
"step": 3550
},
{
"epoch": 1.05,
"learning_rate": 2.2837854804100504e-05,
"loss": 1.3773,
"step": 3560
},
{
"epoch": 1.06,
"learning_rate": 2.272192498816825e-05,
"loss": 1.3977,
"step": 3570
},
{
"epoch": 1.06,
"learning_rate": 2.260604454834162e-05,
"loss": 1.3591,
"step": 3580
},
{
"epoch": 1.06,
"learning_rate": 2.2490215996269617e-05,
"loss": 1.4023,
"step": 3590
},
{
"epoch": 1.07,
"learning_rate": 2.237444184247661e-05,
"loss": 1.3873,
"step": 3600
},
{
"epoch": 1.07,
"learning_rate": 2.2258724596307915e-05,
"loss": 1.3826,
"step": 3610
},
{
"epoch": 1.07,
"learning_rate": 2.214306676587539e-05,
"loss": 1.3732,
"step": 3620
},
{
"epoch": 1.08,
"learning_rate": 2.2027470858003098e-05,
"loss": 1.3988,
"step": 3630
},
{
"epoch": 1.08,
"learning_rate": 2.1911939378172956e-05,
"loss": 1.4036,
"step": 3640
},
{
"epoch": 1.08,
"learning_rate": 2.1796474830470447e-05,
"loss": 1.4236,
"step": 3650
},
{
"epoch": 1.08,
"learning_rate": 2.1681079717530328e-05,
"loss": 1.4032,
"step": 3660
},
{
"epoch": 1.09,
"learning_rate": 2.156575654048239e-05,
"loss": 1.39,
"step": 3670
},
{
"epoch": 1.09,
"learning_rate": 2.145050779889725e-05,
"loss": 1.3757,
"step": 3680
},
{
"epoch": 1.09,
"learning_rate": 2.1335335990732186e-05,
"loss": 1.3934,
"step": 3690
},
{
"epoch": 1.1,
"learning_rate": 2.1220243612276964e-05,
"loss": 1.3979,
"step": 3700
},
{
"epoch": 1.1,
"learning_rate": 2.110523315809978e-05,
"loss": 1.4181,
"step": 3710
},
{
"epoch": 1.1,
"learning_rate": 2.0990307120993134e-05,
"loss": 1.406,
"step": 3720
},
{
"epoch": 1.11,
"learning_rate": 2.0875467991919854e-05,
"loss": 1.4036,
"step": 3730
},
{
"epoch": 1.11,
"learning_rate": 2.076071825995906e-05,
"loss": 1.4095,
"step": 3740
},
{
"epoch": 1.11,
"learning_rate": 2.0646060412252246e-05,
"loss": 1.4048,
"step": 3750
},
{
"epoch": 1.11,
"learning_rate": 2.0531496933949363e-05,
"loss": 1.3874,
"step": 3760
},
{
"epoch": 1.12,
"learning_rate": 2.0417030308154953e-05,
"loss": 1.3793,
"step": 3770
},
{
"epoch": 1.12,
"learning_rate": 2.0302663015874322e-05,
"loss": 1.4152,
"step": 3780
},
{
"epoch": 1.12,
"learning_rate": 2.0188397535959785e-05,
"loss": 1.3738,
"step": 3790
},
{
"epoch": 1.13,
"learning_rate": 2.007423634505692e-05,
"loss": 1.4033,
"step": 3800
},
{
"epoch": 1.13,
"learning_rate": 1.9960181917550897e-05,
"loss": 1.3753,
"step": 3810
},
{
"epoch": 1.13,
"learning_rate": 1.9846236725512835e-05,
"loss": 1.3791,
"step": 3820
},
{
"epoch": 1.13,
"learning_rate": 1.973240323864624e-05,
"loss": 1.3837,
"step": 3830
},
{
"epoch": 1.14,
"learning_rate": 1.9618683924233467e-05,
"loss": 1.3945,
"step": 3840
},
{
"epoch": 1.14,
"learning_rate": 1.9505081247082237e-05,
"loss": 1.3804,
"step": 3850
},
{
"epoch": 1.14,
"learning_rate": 1.9391597669472213e-05,
"loss": 1.3964,
"step": 3860
},
{
"epoch": 1.15,
"learning_rate": 1.927823565110165e-05,
"loss": 1.3983,
"step": 3870
},
{
"epoch": 1.15,
"learning_rate": 1.9164997649034058e-05,
"loss": 1.4169,
"step": 3880
},
{
"epoch": 1.15,
"learning_rate": 1.9051886117644963e-05,
"loss": 1.4101,
"step": 3890
},
{
"epoch": 1.16,
"learning_rate": 1.89389035085687e-05,
"loss": 1.3823,
"step": 3900
},
{
"epoch": 1.16,
"learning_rate": 1.8826052270645276e-05,
"loss": 1.3827,
"step": 3910
},
{
"epoch": 1.16,
"learning_rate": 1.8713334849867315e-05,
"loss": 1.4035,
"step": 3920
},
{
"epoch": 1.16,
"learning_rate": 1.8600753689327e-05,
"loss": 1.4081,
"step": 3930
},
{
"epoch": 1.17,
"learning_rate": 1.8488311229163152e-05,
"loss": 1.3919,
"step": 3940
},
{
"epoch": 1.17,
"learning_rate": 1.8376009906508338e-05,
"loss": 1.3854,
"step": 3950
},
{
"epoch": 1.17,
"learning_rate": 1.826385215543603e-05,
"loss": 1.3924,
"step": 3960
},
{
"epoch": 1.18,
"learning_rate": 1.8151840406907873e-05,
"loss": 1.3851,
"step": 3970
},
{
"epoch": 1.18,
"learning_rate": 1.8039977088720972e-05,
"loss": 1.3707,
"step": 3980
},
{
"epoch": 1.18,
"learning_rate": 1.7928264625455282e-05,
"loss": 1.3998,
"step": 3990
},
{
"epoch": 1.19,
"learning_rate": 1.7816705438421064e-05,
"loss": 1.3931,
"step": 4000
},
{
"epoch": 1.19,
"learning_rate": 1.7705301945606384e-05,
"loss": 1.3976,
"step": 4010
},
{
"epoch": 1.19,
"learning_rate": 1.7594056561624716e-05,
"loss": 1.3785,
"step": 4020
},
{
"epoch": 1.19,
"learning_rate": 1.748297169766262e-05,
"loss": 1.3845,
"step": 4030
},
{
"epoch": 1.2,
"learning_rate": 1.7372049761427457e-05,
"loss": 1.3926,
"step": 4040
},
{
"epoch": 1.2,
"learning_rate": 1.7261293157095204e-05,
"loss": 1.4075,
"step": 4050
},
{
"epoch": 1.2,
"learning_rate": 1.7150704285258375e-05,
"loss": 1.3938,
"step": 4060
},
{
"epoch": 1.21,
"learning_rate": 1.7040285542873945e-05,
"loss": 1.3884,
"step": 4070
},
{
"epoch": 1.21,
"learning_rate": 1.6930039323211448e-05,
"loss": 1.4066,
"step": 4080
},
{
"epoch": 1.21,
"learning_rate": 1.6819968015801048e-05,
"loss": 1.3992,
"step": 4090
},
{
"epoch": 1.21,
"learning_rate": 1.6710074006381797e-05,
"loss": 1.4066,
"step": 4100
},
{
"epoch": 1.22,
"learning_rate": 1.6600359676849892e-05,
"loss": 1.4076,
"step": 4110
},
{
"epoch": 1.22,
"learning_rate": 1.6490827405207062e-05,
"loss": 1.4078,
"step": 4120
},
{
"epoch": 1.22,
"learning_rate": 1.638147956550904e-05,
"loss": 1.4026,
"step": 4130
},
{
"epoch": 1.23,
"learning_rate": 1.627231852781407e-05,
"loss": 1.3861,
"step": 4140
},
{
"epoch": 1.23,
"learning_rate": 1.6163346658131567e-05,
"loss": 1.3915,
"step": 4150
},
{
"epoch": 1.23,
"learning_rate": 1.6054566318370832e-05,
"loss": 1.3828,
"step": 4160
},
{
"epoch": 1.24,
"learning_rate": 1.5945979866289844e-05,
"loss": 1.3952,
"step": 4170
},
{
"epoch": 1.24,
"learning_rate": 1.583758965544417e-05,
"loss": 1.3892,
"step": 4180
},
{
"epoch": 1.24,
"learning_rate": 1.5729398035135957e-05,
"loss": 1.3973,
"step": 4190
},
{
"epoch": 1.24,
"learning_rate": 1.5621407350362986e-05,
"loss": 1.4225,
"step": 4200
},
{
"epoch": 1.25,
"learning_rate": 1.5513619941767886e-05,
"loss": 1.3948,
"step": 4210
},
{
"epoch": 1.25,
"learning_rate": 1.540603814558736e-05,
"loss": 1.4074,
"step": 4220
},
{
"epoch": 1.25,
"learning_rate": 1.5298664293601574e-05,
"loss": 1.3965,
"step": 4230
},
{
"epoch": 1.26,
"learning_rate": 1.5191500713083615e-05,
"loss": 1.3743,
"step": 4240
},
{
"epoch": 1.26,
"learning_rate": 1.508454972674904e-05,
"loss": 1.384,
"step": 4250
},
{
"epoch": 1.26,
"learning_rate": 1.4977813652705535e-05,
"loss": 1.4018,
"step": 4260
},
{
"epoch": 1.27,
"learning_rate": 1.4871294804402675e-05,
"loss": 1.3904,
"step": 4270
},
{
"epoch": 1.27,
"learning_rate": 1.4764995490581779e-05,
"loss": 1.3981,
"step": 4280
},
{
"epoch": 1.27,
"learning_rate": 1.465891801522587e-05,
"loss": 1.4144,
"step": 4290
},
{
"epoch": 1.27,
"learning_rate": 1.4553064677509731e-05,
"loss": 1.4172,
"step": 4300
},
{
"epoch": 1.28,
"learning_rate": 1.4447437771750078e-05,
"loss": 1.3873,
"step": 4310
},
{
"epoch": 1.28,
"learning_rate": 1.4342039587355832e-05,
"loss": 1.3983,
"step": 4320
},
{
"epoch": 1.28,
"learning_rate": 1.423687240877849e-05,
"loss": 1.4007,
"step": 4330
},
{
"epoch": 1.29,
"learning_rate": 1.4131938515462639e-05,
"loss": 1.4088,
"step": 4340
},
{
"epoch": 1.29,
"learning_rate": 1.4027240181796508e-05,
"loss": 1.3941,
"step": 4350
},
{
"epoch": 1.29,
"learning_rate": 1.3922779677062689e-05,
"loss": 1.3975,
"step": 4360
},
{
"epoch": 1.29,
"learning_rate": 1.3818559265388964e-05,
"loss": 1.3842,
"step": 4370
},
{
"epoch": 1.3,
"learning_rate": 1.3714581205699214e-05,
"loss": 1.4011,
"step": 4380
},
{
"epoch": 1.3,
"learning_rate": 1.3610847751664473e-05,
"loss": 1.3881,
"step": 4390
},
{
"epoch": 1.3,
"learning_rate": 1.3507361151654067e-05,
"loss": 1.4028,
"step": 4400
},
{
"epoch": 1.31,
"learning_rate": 1.340412364868689e-05,
"loss": 1.3973,
"step": 4410
},
{
"epoch": 1.31,
"learning_rate": 1.3301137480382786e-05,
"loss": 1.445,
"step": 4420
},
{
"epoch": 1.31,
"learning_rate": 1.3198404878914044e-05,
"loss": 1.3957,
"step": 4430
},
{
"epoch": 1.32,
"learning_rate": 1.3095928070957037e-05,
"loss": 1.395,
"step": 4440
},
{
"epoch": 1.32,
"learning_rate": 1.2993709277643922e-05,
"loss": 1.4157,
"step": 4450
},
{
"epoch": 1.32,
"learning_rate": 1.2891750714514545e-05,
"loss": 1.4074,
"step": 4460
},
{
"epoch": 1.32,
"learning_rate": 1.2790054591468381e-05,
"loss": 1.3988,
"step": 4470
},
{
"epoch": 1.33,
"learning_rate": 1.2688623112716652e-05,
"loss": 1.3914,
"step": 4480
},
{
"epoch": 1.33,
"learning_rate": 1.2587458476734559e-05,
"loss": 1.3864,
"step": 4490
},
{
"epoch": 1.33,
"learning_rate": 1.248656287621362e-05,
"loss": 1.3934,
"step": 4500
},
{
"epoch": 1.34,
"learning_rate": 1.2385938498014138e-05,
"loss": 1.3893,
"step": 4510
},
{
"epoch": 1.34,
"learning_rate": 1.2285587523117825e-05,
"loss": 1.3991,
"step": 4520
},
{
"epoch": 1.34,
"learning_rate": 1.2185512126580512e-05,
"loss": 1.376,
"step": 4530
},
{
"epoch": 1.35,
"learning_rate": 1.2085714477484997e-05,
"loss": 1.3799,
"step": 4540
},
{
"epoch": 1.35,
"learning_rate": 1.1986196738894078e-05,
"loss": 1.3738,
"step": 4550
},
{
"epoch": 1.35,
"learning_rate": 1.188696106780361e-05,
"loss": 1.3754,
"step": 4560
},
{
"epoch": 1.35,
"learning_rate": 1.178800961509578e-05,
"loss": 1.4006,
"step": 4570
},
{
"epoch": 1.36,
"learning_rate": 1.1689344525492497e-05,
"loss": 1.4012,
"step": 4580
},
{
"epoch": 1.36,
"learning_rate": 1.1590967937508895e-05,
"loss": 1.3973,
"step": 4590
},
{
"epoch": 1.36,
"learning_rate": 1.149288198340698e-05,
"loss": 1.3737,
"step": 4600
},
{
"epoch": 1.37,
"learning_rate": 1.1395088789149419e-05,
"loss": 1.3998,
"step": 4610
},
{
"epoch": 1.37,
"learning_rate": 1.1297590474353464e-05,
"loss": 1.4053,
"step": 4620
},
{
"epoch": 1.37,
"learning_rate": 1.1200389152245003e-05,
"loss": 1.4038,
"step": 4630
},
{
"epoch": 1.37,
"learning_rate": 1.1103486929612759e-05,
"loss": 1.3968,
"step": 4640
},
{
"epoch": 1.38,
"learning_rate": 1.1006885906762626e-05,
"loss": 1.4037,
"step": 4650
},
{
"epoch": 1.38,
"learning_rate": 1.0910588177472153e-05,
"loss": 1.3901,
"step": 4660
},
{
"epoch": 1.38,
"learning_rate": 1.0814595828945154e-05,
"loss": 1.379,
"step": 4670
},
{
"epoch": 1.39,
"learning_rate": 1.0718910941766478e-05,
"loss": 1.3808,
"step": 4680
},
{
"epoch": 1.39,
"learning_rate": 1.0623535589856887e-05,
"loss": 1.4105,
"step": 4690
},
{
"epoch": 1.39,
"learning_rate": 1.0528471840428142e-05,
"loss": 1.3756,
"step": 4700
},
{
"epoch": 1.4,
"learning_rate": 1.0433721753938182e-05,
"loss": 1.3708,
"step": 4710
},
{
"epoch": 1.4,
"learning_rate": 1.0339287384046462e-05,
"loss": 1.3924,
"step": 4720
},
{
"epoch": 1.4,
"learning_rate": 1.024517077756943e-05,
"loss": 1.3854,
"step": 4730
},
{
"epoch": 1.4,
"learning_rate": 1.0151373974436184e-05,
"loss": 1.3908,
"step": 4740
},
{
"epoch": 1.41,
"learning_rate": 1.0057899007644245e-05,
"loss": 1.3953,
"step": 4750
},
{
"epoch": 1.41,
"learning_rate": 9.964747903215513e-06,
"loss": 1.3933,
"step": 4760
},
{
"epoch": 1.41,
"learning_rate": 9.871922680152318e-06,
"loss": 1.3854,
"step": 4770
},
{
"epoch": 1.42,
"learning_rate": 9.779425350393685e-06,
"loss": 1.4026,
"step": 4780
},
{
"epoch": 1.42,
"learning_rate": 9.687257918771719e-06,
"loss": 1.3958,
"step": 4790
},
{
"epoch": 1.42,
"learning_rate": 9.595422382968156e-06,
"loss": 1.3777,
"step": 4800
},
{
"epoch": 1.43,
"learning_rate": 9.503920733471052e-06,
"loss": 1.3835,
"step": 4810
},
{
"epoch": 1.43,
"learning_rate": 9.412754953531663e-06,
"loss": 1.3768,
"step": 4820
},
{
"epoch": 1.43,
"learning_rate": 9.321927019121435e-06,
"loss": 1.3846,
"step": 4830
},
{
"epoch": 1.43,
"learning_rate": 9.231438898889184e-06,
"loss": 1.3878,
"step": 4840
},
{
"epoch": 1.44,
"learning_rate": 9.141292554118435e-06,
"loss": 1.38,
"step": 4850
},
{
"epoch": 1.44,
"learning_rate": 9.051489938684903e-06,
"loss": 1.3841,
"step": 4860
},
{
"epoch": 1.44,
"learning_rate": 8.962032999014144e-06,
"loss": 1.4122,
"step": 4870
},
{
"epoch": 1.45,
"learning_rate": 8.87292367403937e-06,
"loss": 1.3839,
"step": 4880
},
{
"epoch": 1.45,
"learning_rate": 8.784163895159428e-06,
"loss": 1.3932,
"step": 4890
},
{
"epoch": 1.45,
"learning_rate": 8.695755586196924e-06,
"loss": 1.4012,
"step": 4900
},
{
"epoch": 1.45,
"learning_rate": 8.607700663356543e-06,
"loss": 1.3931,
"step": 4910
},
{
"epoch": 1.46,
"learning_rate": 8.520001035183503e-06,
"loss": 1.4003,
"step": 4920
},
{
"epoch": 1.46,
"learning_rate": 8.432658602522193e-06,
"loss": 1.4064,
"step": 4930
},
{
"epoch": 1.46,
"learning_rate": 8.345675258474969e-06,
"loss": 1.383,
"step": 4940
},
{
"epoch": 1.47,
"learning_rate": 8.259052888361132e-06,
"loss": 1.4147,
"step": 4950
},
{
"epoch": 1.47,
"learning_rate": 8.172793369676052e-06,
"loss": 1.4064,
"step": 4960
},
{
"epoch": 1.47,
"learning_rate": 8.086898572050494e-06,
"loss": 1.3894,
"step": 4970
},
{
"epoch": 1.48,
"learning_rate": 8.00137035721007e-06,
"loss": 1.3928,
"step": 4980
},
{
"epoch": 1.48,
"learning_rate": 7.916210578934896e-06,
"loss": 1.4049,
"step": 4990
},
{
"epoch": 1.48,
"learning_rate": 7.831421083019422e-06,
"loss": 1.402,
"step": 5000
},
{
"epoch": 1.48,
"learning_rate": 7.747003707232415e-06,
"loss": 1.4144,
"step": 5010
},
{
"epoch": 1.49,
"learning_rate": 7.66296028127713e-06,
"loss": 1.3884,
"step": 5020
},
{
"epoch": 1.49,
"learning_rate": 7.579292626751647e-06,
"loss": 1.4116,
"step": 5030
},
{
"epoch": 1.49,
"learning_rate": 7.4960025571094025e-06,
"loss": 1.3828,
"step": 5040
},
{
"epoch": 1.5,
"learning_rate": 7.413091877619868e-06,
"loss": 1.3821,
"step": 5050
},
{
"epoch": 1.5,
"learning_rate": 7.330562385329429e-06,
"loss": 1.4068,
"step": 5060
},
{
"epoch": 1.5,
"learning_rate": 7.248415869022434e-06,
"loss": 1.3842,
"step": 5070
},
{
"epoch": 1.51,
"learning_rate": 7.16665410918243e-06,
"loss": 1.3937,
"step": 5080
},
{
"epoch": 1.51,
"learning_rate": 7.085278877953558e-06,
"loss": 1.4077,
"step": 5090
},
{
"epoch": 1.51,
"learning_rate": 7.004291939102148e-06,
"loss": 1.3989,
"step": 5100
},
{
"epoch": 1.51,
"learning_rate": 6.923695047978502e-06,
"loss": 1.3727,
"step": 5110
},
{
"epoch": 1.52,
"learning_rate": 6.843489951478829e-06,
"loss": 1.3842,
"step": 5120
},
{
"epoch": 1.52,
"learning_rate": 6.763678388007394e-06,
"loss": 1.3662,
"step": 5130
},
{
"epoch": 1.52,
"learning_rate": 6.684262087438839e-06,
"loss": 1.4092,
"step": 5140
},
{
"epoch": 1.53,
"learning_rate": 6.605242771080686e-06,
"loss": 1.399,
"step": 5150
},
{
"epoch": 1.53,
"learning_rate": 6.526622151636011e-06,
"loss": 1.3931,
"step": 5160
},
{
"epoch": 1.53,
"learning_rate": 6.448401933166351e-06,
"loss": 1.3824,
"step": 5170
},
{
"epoch": 1.53,
"learning_rate": 6.370583811054778e-06,
"loss": 1.3764,
"step": 5180
},
{
"epoch": 1.54,
"learning_rate": 6.293169471969104e-06,
"loss": 1.3835,
"step": 5190
},
{
"epoch": 1.54,
"learning_rate": 6.216160593825363e-06,
"loss": 1.382,
"step": 5200
},
{
"epoch": 1.54,
"learning_rate": 6.1395588457514226e-06,
"loss": 1.3983,
"step": 5210
},
{
"epoch": 1.55,
"learning_rate": 6.063365888050829e-06,
"loss": 1.3709,
"step": 5220
},
{
"epoch": 1.55,
"learning_rate": 5.987583372166794e-06,
"loss": 1.4037,
"step": 5230
},
{
"epoch": 1.55,
"learning_rate": 5.912212940646422e-06,
"loss": 1.3955,
"step": 5240
},
{
"epoch": 1.56,
"learning_rate": 5.8372562271051e-06,
"loss": 1.384,
"step": 5250
},
{
"epoch": 1.56,
"learning_rate": 5.762714856191087e-06,
"loss": 1.3772,
"step": 5260
},
{
"epoch": 1.56,
"learning_rate": 5.688590443550304e-06,
"loss": 1.3818,
"step": 5270
},
{
"epoch": 1.56,
"learning_rate": 5.61488459579132e-06,
"loss": 1.4039,
"step": 5280
},
{
"epoch": 1.57,
"learning_rate": 5.541598910450518e-06,
"loss": 1.3935,
"step": 5290
},
{
"epoch": 1.57,
"learning_rate": 5.4687349759574845e-06,
"loss": 1.402,
"step": 5300
},
{
"epoch": 1.57,
"learning_rate": 5.396294371600569e-06,
"loss": 1.3774,
"step": 5310
},
{
"epoch": 1.58,
"learning_rate": 5.3242786674926545e-06,
"loss": 1.3936,
"step": 5320
},
{
"epoch": 1.58,
"learning_rate": 5.252689424537139e-06,
"loss": 1.3914,
"step": 5330
},
{
"epoch": 1.58,
"learning_rate": 5.181528194394081e-06,
"loss": 1.3931,
"step": 5340
},
{
"epoch": 1.59,
"learning_rate": 5.11079651944659e-06,
"loss": 1.3854,
"step": 5350
},
{
"epoch": 1.59,
"learning_rate": 5.040495932767386e-06,
"loss": 1.4101,
"step": 5360
},
{
"epoch": 1.59,
"learning_rate": 4.970627958085574e-06,
"loss": 1.3929,
"step": 5370
},
{
"epoch": 1.59,
"learning_rate": 4.901194109753607e-06,
"loss": 1.3826,
"step": 5380
},
{
"epoch": 1.6,
"learning_rate": 4.832195892714489e-06,
"loss": 1.3974,
"step": 5390
},
{
"epoch": 1.6,
"learning_rate": 4.763634802469124e-06,
"loss": 1.3988,
"step": 5400
},
{
"epoch": 1.6,
"learning_rate": 4.6955123250439245e-06,
"loss": 1.3875,
"step": 5410
},
{
"epoch": 1.61,
"learning_rate": 4.6278299369585916e-06,
"loss": 1.3881,
"step": 5420
},
{
"epoch": 1.61,
"learning_rate": 4.560589105194121e-06,
"loss": 1.3924,
"step": 5430
},
{
"epoch": 1.61,
"learning_rate": 4.493791287160998e-06,
"loss": 1.4058,
"step": 5440
},
{
"epoch": 1.61,
"learning_rate": 4.4274379306676164e-06,
"loss": 1.3946,
"step": 5450
},
{
"epoch": 1.62,
"learning_rate": 4.361530473888889e-06,
"loss": 1.4045,
"step": 5460
},
{
"epoch": 1.62,
"learning_rate": 4.296070345335085e-06,
"loss": 1.3817,
"step": 5470
},
{
"epoch": 1.62,
"learning_rate": 4.231058963820867e-06,
"loss": 1.3989,
"step": 5480
},
{
"epoch": 1.63,
"learning_rate": 4.166497738434527e-06,
"loss": 1.4004,
"step": 5490
},
{
"epoch": 1.63,
"learning_rate": 4.102388068507465e-06,
"loss": 1.3905,
"step": 5500
},
{
"epoch": 1.63,
"learning_rate": 4.03873134358384e-06,
"loss": 1.3731,
"step": 5510
},
{
"epoch": 1.64,
"learning_rate": 3.9755289433904694e-06,
"loss": 1.383,
"step": 5520
},
{
"epoch": 1.64,
"learning_rate": 3.912782237806903e-06,
"loss": 1.3853,
"step": 5530
},
{
"epoch": 1.64,
"learning_rate": 3.850492586835755e-06,
"loss": 1.4039,
"step": 5540
},
{
"epoch": 1.64,
"learning_rate": 3.788661340573213e-06,
"loss": 1.3703,
"step": 5550
},
{
"epoch": 1.65,
"learning_rate": 3.7272898391797734e-06,
"loss": 1.388,
"step": 5560
},
{
"epoch": 1.65,
"learning_rate": 3.6663794128512038e-06,
"loss": 1.3785,
"step": 5570
},
{
"epoch": 1.65,
"learning_rate": 3.6059313817897065e-06,
"loss": 1.3901,
"step": 5580
},
{
"epoch": 1.66,
"learning_rate": 3.5459470561753e-06,
"loss": 1.3894,
"step": 5590
},
{
"epoch": 1.66,
"learning_rate": 3.4864277361374264e-06,
"loss": 1.3799,
"step": 5600
},
{
"epoch": 1.66,
"learning_rate": 3.4273747117267774e-06,
"loss": 1.3798,
"step": 5610
},
{
"epoch": 1.67,
"learning_rate": 3.3687892628873175e-06,
"loss": 1.3948,
"step": 5620
},
{
"epoch": 1.67,
"learning_rate": 3.310672659428557e-06,
"loss": 1.4071,
"step": 5630
},
{
"epoch": 1.67,
"learning_rate": 3.2530261609980183e-06,
"loss": 1.3993,
"step": 5640
},
{
"epoch": 1.67,
"learning_rate": 3.195851017053944e-06,
"loss": 1.3885,
"step": 5650
},
{
"epoch": 1.68,
"learning_rate": 3.1391484668382073e-06,
"loss": 1.3919,
"step": 5660
},
{
"epoch": 1.68,
"learning_rate": 3.0829197393494548e-06,
"loss": 1.3965,
"step": 5670
},
{
"epoch": 1.68,
"learning_rate": 3.0271660533164714e-06,
"loss": 1.4135,
"step": 5680
},
{
"epoch": 1.69,
"learning_rate": 2.9718886171717613e-06,
"loss": 1.3923,
"step": 5690
},
{
"epoch": 1.69,
"learning_rate": 2.9170886290253552e-06,
"loss": 1.3663,
"step": 5700
},
{
"epoch": 1.69,
"learning_rate": 2.8627672766388448e-06,
"loss": 1.3772,
"step": 5710
},
{
"epoch": 1.69,
"learning_rate": 2.8089257373996424e-06,
"loss": 1.373,
"step": 5720
},
{
"epoch": 1.7,
"learning_rate": 2.755565178295447e-06,
"loss": 1.3858,
"step": 5730
},
{
"epoch": 1.7,
"learning_rate": 2.7026867558889694e-06,
"loss": 1.3996,
"step": 5740
},
{
"epoch": 1.7,
"learning_rate": 2.6502916162928463e-06,
"loss": 1.4194,
"step": 5750
},
{
"epoch": 1.71,
"learning_rate": 2.5983808951448196e-06,
"loss": 1.3895,
"step": 5760
},
{
"epoch": 1.71,
"learning_rate": 2.5469557175830993e-06,
"loss": 1.376,
"step": 5770
},
{
"epoch": 1.71,
"learning_rate": 2.496017198221995e-06,
"loss": 1.3901,
"step": 5780
},
{
"epoch": 1.72,
"learning_rate": 2.445566441127742e-06,
"loss": 1.3782,
"step": 5790
},
{
"epoch": 1.72,
"learning_rate": 2.3956045397945826e-06,
"loss": 1.3723,
"step": 5800
},
{
"epoch": 1.72,
"learning_rate": 2.3461325771210683e-06,
"loss": 1.3705,
"step": 5810
},
{
"epoch": 1.72,
"learning_rate": 2.297151625386576e-06,
"loss": 1.4018,
"step": 5820
},
{
"epoch": 1.73,
"learning_rate": 2.2486627462280724e-06,
"loss": 1.3811,
"step": 5830
},
{
"epoch": 1.73,
"learning_rate": 2.200666990617098e-06,
"loss": 1.3894,
"step": 5840
},
{
"epoch": 1.73,
"learning_rate": 2.153165398837009e-06,
"loss": 1.3931,
"step": 5850
},
{
"epoch": 1.74,
"learning_rate": 2.1061590004603978e-06,
"loss": 1.3775,
"step": 5860
},
{
"epoch": 1.74,
"learning_rate": 2.059648814326806e-06,
"loss": 1.3897,
"step": 5870
},
{
"epoch": 1.74,
"learning_rate": 2.013635848520626e-06,
"loss": 1.3919,
"step": 5880
},
{
"epoch": 1.75,
"learning_rate": 1.9681211003492543e-06,
"loss": 1.4191,
"step": 5890
},
{
"epoch": 1.75,
"learning_rate": 1.923105556321475e-06,
"loss": 1.4027,
"step": 5900
},
{
"epoch": 1.75,
"learning_rate": 1.8785901921260784e-06,
"loss": 1.3767,
"step": 5910
},
{
"epoch": 1.75,
"learning_rate": 1.8345759726107193e-06,
"loss": 1.3801,
"step": 5920
},
{
"epoch": 1.76,
"learning_rate": 1.7910638517609962e-06,
"loss": 1.3881,
"step": 5930
},
{
"epoch": 1.76,
"learning_rate": 1.748054772679772e-06,
"loss": 1.3915,
"step": 5940
},
{
"epoch": 1.76,
"learning_rate": 1.705549667566747e-06,
"loss": 1.403,
"step": 5950
},
{
"epoch": 1.77,
"learning_rate": 1.6635494576982353e-06,
"loss": 1.3882,
"step": 5960
},
{
"epoch": 1.77,
"learning_rate": 1.6220550534072094e-06,
"loss": 1.3902,
"step": 5970
},
{
"epoch": 1.77,
"learning_rate": 1.5810673540635702e-06,
"loss": 1.3928,
"step": 5980
},
{
"epoch": 1.77,
"learning_rate": 1.540587248054645e-06,
"loss": 1.3819,
"step": 5990
},
{
"epoch": 1.78,
"learning_rate": 1.5006156127659348e-06,
"loss": 1.3948,
"step": 6000
},
{
"epoch": 1.78,
"learning_rate": 1.4611533145621026e-06,
"loss": 1.3914,
"step": 6010
},
{
"epoch": 1.78,
"learning_rate": 1.422201208768187e-06,
"loss": 1.3713,
"step": 6020
},
{
"epoch": 1.79,
"learning_rate": 1.3837601396510745e-06,
"loss": 1.3922,
"step": 6030
},
{
"epoch": 1.79,
"learning_rate": 1.345830940401191e-06,
"loss": 1.3775,
"step": 6040
},
{
"epoch": 1.79,
"learning_rate": 1.3084144331144439e-06,
"loss": 1.3974,
"step": 6050
},
{
"epoch": 1.8,
"learning_rate": 1.2715114287744079e-06,
"loss": 1.3859,
"step": 6060
},
{
"epoch": 1.8,
"learning_rate": 1.2351227272347444e-06,
"loss": 1.3795,
"step": 6070
},
{
"epoch": 1.8,
"learning_rate": 1.19924911720187e-06,
"loss": 1.376,
"step": 6080
},
{
"epoch": 1.8,
"learning_rate": 1.1638913762178489e-06,
"loss": 1.3892,
"step": 6090
},
{
"epoch": 1.81,
"learning_rate": 1.1290502706435584e-06,
"loss": 1.3866,
"step": 6100
},
{
"epoch": 1.81,
"learning_rate": 1.0947265556420588e-06,
"loss": 1.3911,
"step": 6110
},
{
"epoch": 1.81,
"learning_rate": 1.060920975162244e-06,
"loss": 1.3535,
"step": 6120
},
{
"epoch": 1.82,
"learning_rate": 1.0276342619227024e-06,
"loss": 1.3815,
"step": 6130
},
{
"epoch": 1.82,
"learning_rate": 9.948671373958468e-07,
"loss": 1.3989,
"step": 6140
},
{
"epoch": 1.82,
"learning_rate": 9.626203117922672e-07,
"loss": 1.3974,
"step": 6150
},
{
"epoch": 1.83,
"learning_rate": 9.308944840453415e-07,
"loss": 1.3717,
"step": 6160
},
{
"epoch": 1.83,
"learning_rate": 8.996903417960917e-07,
"loss": 1.3797,
"step": 6170
},
{
"epoch": 1.83,
"learning_rate": 8.690085613782706e-07,
"loss": 1.3846,
"step": 6180
},
{
"epoch": 1.83,
"learning_rate": 8.388498078037044e-07,
"loss": 1.379,
"step": 6190
},
{
"epoch": 1.84,
"learning_rate": 8.092147347478873e-07,
"loss": 1.408,
"step": 6200
},
{
"epoch": 1.84,
"learning_rate": 7.801039845358044e-07,
"loss": 1.3776,
"step": 6210
},
{
"epoch": 1.84,
"learning_rate": 7.515181881280115e-07,
"loss": 1.397,
"step": 6220
},
{
"epoch": 1.85,
"learning_rate": 7.234579651069578e-07,
"loss": 1.3768,
"step": 6230
},
{
"epoch": 1.85,
"learning_rate": 6.959239236635662e-07,
"loss": 1.3807,
"step": 6240
},
{
"epoch": 1.85,
"learning_rate": 6.68916660584043e-07,
"loss": 1.3947,
"step": 6250
},
{
"epoch": 1.85,
"learning_rate": 6.424367612369364e-07,
"loss": 1.3785,
"step": 6260
},
{
"epoch": 1.86,
"learning_rate": 6.164847995604656e-07,
"loss": 1.3828,
"step": 6270
},
{
"epoch": 1.86,
"learning_rate": 5.910613380500696e-07,
"loss": 1.3765,
"step": 6280
},
{
"epoch": 1.86,
"learning_rate": 5.66166927746209e-07,
"loss": 1.3994,
"step": 6290
},
{
"epoch": 1.87,
"learning_rate": 5.418021082224472e-07,
"loss": 1.4081,
"step": 6300
},
{
"epoch": 1.87,
"learning_rate": 5.179674075737273e-07,
"loss": 1.3885,
"step": 6310
},
{
"epoch": 1.87,
"learning_rate": 4.946633424049413e-07,
"loss": 1.3748,
"step": 6320
},
{
"epoch": 1.88,
"learning_rate": 4.7189041781973144e-07,
"loss": 1.3988,
"step": 6330
},
{
"epoch": 1.88,
"learning_rate": 4.4964912740954045e-07,
"loss": 1.3815,
"step": 6340
},
{
"epoch": 1.88,
"learning_rate": 4.2793995324290903e-07,
"loss": 1.3736,
"step": 6350
},
{
"epoch": 1.88,
"learning_rate": 4.067633658550396e-07,
"loss": 1.3847,
"step": 6360
},
{
"epoch": 1.89,
"learning_rate": 3.861198242375852e-07,
"loss": 1.3907,
"step": 6370
},
{
"epoch": 1.89,
"learning_rate": 3.660097758287018e-07,
"loss": 1.3914,
"step": 6380
},
{
"epoch": 1.89,
"learning_rate": 3.464336565033588e-07,
"loss": 1.3861,
"step": 6390
},
{
"epoch": 1.9,
"learning_rate": 3.273918905638912e-07,
"loss": 1.3937,
"step": 6400
},
{
"epoch": 1.9,
"learning_rate": 3.088848907307873e-07,
"loss": 1.3783,
"step": 6410
},
{
"epoch": 1.9,
"learning_rate": 2.909130581337655e-07,
"loss": 1.4057,
"step": 6420
},
{
"epoch": 1.91,
"learning_rate": 2.7347678230306427e-07,
"loss": 1.3672,
"step": 6430
},
{
"epoch": 1.91,
"learning_rate": 2.5657644116100497e-07,
"loss": 1.3742,
"step": 6440
},
{
"epoch": 1.91,
"learning_rate": 2.4021240101379793e-07,
"loss": 1.3964,
"step": 6450
},
{
"epoch": 1.91,
"learning_rate": 2.243850165436129e-07,
"loss": 1.4025,
"step": 6460
},
{
"epoch": 1.92,
"learning_rate": 2.0909463080087123e-07,
"loss": 1.386,
"step": 6470
},
{
"epoch": 1.92,
"learning_rate": 1.9434157519683248e-07,
"loss": 1.3875,
"step": 6480
},
{
"epoch": 1.92,
"learning_rate": 1.8012616949640015e-07,
"loss": 1.3945,
"step": 6490
},
{
"epoch": 1.93,
"learning_rate": 1.6644872181118543e-07,
"loss": 1.3725,
"step": 6500
},
{
"epoch": 1.93,
"learning_rate": 1.533095285928432e-07,
"loss": 1.3732,
"step": 6510
},
{
"epoch": 1.93,
"learning_rate": 1.4070887462663552e-07,
"loss": 1.3791,
"step": 6520
},
{
"epoch": 1.93,
"learning_rate": 1.2864703302526703e-07,
"loss": 1.3922,
"step": 6530
},
{
"epoch": 1.94,
"learning_rate": 1.171242652229565e-07,
"loss": 1.3737,
"step": 6540
},
{
"epoch": 1.94,
"learning_rate": 1.0614082096977729e-07,
"loss": 1.3879,
"step": 6550
},
{
"epoch": 1.94,
"learning_rate": 9.569693832623961e-08,
"loss": 1.3962,
"step": 6560
},
{
"epoch": 1.95,
"learning_rate": 8.579284365813623e-08,
"loss": 1.4029,
"step": 6570
},
{
"epoch": 1.95,
"learning_rate": 7.642875163162977e-08,
"loss": 1.3903,
"step": 6580
},
{
"epoch": 1.95,
"learning_rate": 6.760486520860643e-08,
"loss": 1.3986,
"step": 6590
},
{
"epoch": 1.96,
"learning_rate": 5.932137564227391e-08,
"loss": 1.3624,
"step": 6600
},
{
"epoch": 1.96,
"learning_rate": 5.157846247300646e-08,
"loss": 1.3732,
"step": 6610
},
{
"epoch": 1.96,
"learning_rate": 4.43762935244757e-08,
"loss": 1.3838,
"step": 6620
},
{
"epoch": 1.96,
"learning_rate": 3.7715024899989684e-08,
"loss": 1.3711,
"step": 6630
},
{
"epoch": 1.97,
"learning_rate": 3.159480097912615e-08,
"loss": 1.3974,
"step": 6640
},
{
"epoch": 1.97,
"learning_rate": 2.6015754414593363e-08,
"loss": 1.3918,
"step": 6650
},
{
"epoch": 1.97,
"learning_rate": 2.097800612936296e-08,
"loss": 1.3933,
"step": 6660
},
{
"epoch": 1.98,
"learning_rate": 1.6481665314035944e-08,
"loss": 1.3815,
"step": 6670
},
{
"epoch": 1.98,
"learning_rate": 1.2526829424494569e-08,
"loss": 1.3671,
"step": 6680
},
{
"epoch": 1.98,
"learning_rate": 9.113584179770707e-09,
"loss": 1.4008,
"step": 6690
},
{
"epoch": 1.99,
"learning_rate": 6.242003560205656e-09,
"loss": 1.3876,
"step": 6700
},
{
"epoch": 1.99,
"learning_rate": 3.9121498058292124e-09,
"loss": 1.3722,
"step": 6710
},
{
"epoch": 1.99,
"learning_rate": 2.124073415030181e-09,
"loss": 1.3823,
"step": 6720
},
{
"epoch": 1.99,
"learning_rate": 8.778131434433734e-10,
"loss": 1.3815,
"step": 6730
},
{
"epoch": 2.0,
"learning_rate": 1.7339600311971903e-10,
"loss": 1.3889,
"step": 6740
},
{
"epoch": 2.0,
"step": 6748,
"total_flos": 1.1679618660044898e+19,
"train_loss": 1.4147593358881598,
"train_runtime": 86303.1376,
"train_samples_per_second": 7.507,
"train_steps_per_second": 0.078
}
],
"max_steps": 6748,
"num_train_epochs": 2,
"total_flos": 1.1679618660044898e+19,
"trial_name": null,
"trial_params": null
}