cline / trainer_state.json
AnonymousSub's picture
first commit
1f6283e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.15831903653248758,
"global_step": 214000,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000739808581927512,
"learning_rate": 1e-05,
"loss": 11.06675,
"step": 1000
},
{
"epoch": 0.001479617163855024,
"learning_rate": 2e-05,
"loss": 9.012630859375,
"step": 2000
},
{
"epoch": 0.002219425745782536,
"learning_rate": 3e-05,
"loss": 7.853123046875,
"step": 3000
},
{
"epoch": 0.002959234327710048,
"learning_rate": 4e-05,
"loss": 7.2371328125,
"step": 4000
},
{
"epoch": 0.00369904290963756,
"learning_rate": 5e-05,
"loss": 6.75319921875,
"step": 5000
},
{
"epoch": 0.004438851491565072,
"learning_rate": 6e-05,
"loss": 6.3771640625,
"step": 6000
},
{
"epoch": 0.005178660073492584,
"learning_rate": 7e-05,
"loss": 6.00278515625,
"step": 7000
},
{
"epoch": 0.005918468655420096,
"learning_rate": 8e-05,
"loss": 5.66916015625,
"step": 8000
},
{
"epoch": 0.006658277237347608,
"learning_rate": 9e-05,
"loss": 5.3051171875,
"step": 9000
},
{
"epoch": 0.00739808581927512,
"learning_rate": 0.0001,
"loss": 4.99890625,
"step": 10000
},
{
"epoch": 0.008137894401202633,
"learning_rate": 9.98989898989899e-05,
"loss": 4.6847890625,
"step": 11000
},
{
"epoch": 0.008877702983130144,
"learning_rate": 9.97979797979798e-05,
"loss": 4.393546875,
"step": 12000
},
{
"epoch": 0.009617511565057657,
"learning_rate": 9.96969696969697e-05,
"loss": 4.1845703125,
"step": 13000
},
{
"epoch": 0.010357320146985169,
"learning_rate": 9.95959595959596e-05,
"loss": 3.9956796875,
"step": 14000
},
{
"epoch": 0.011097128728912681,
"learning_rate": 9.94949494949495e-05,
"loss": 3.8322421875,
"step": 15000
},
{
"epoch": 0.011836937310840193,
"learning_rate": 9.939393939393939e-05,
"loss": 3.6949375,
"step": 16000
},
{
"epoch": 0.012576745892767706,
"learning_rate": 9.92929292929293e-05,
"loss": 3.584078125,
"step": 17000
},
{
"epoch": 0.013316554474695217,
"learning_rate": 9.919191919191919e-05,
"loss": 3.4821875,
"step": 18000
},
{
"epoch": 0.01405636305662273,
"learning_rate": 9.909090909090911e-05,
"loss": 3.4063359375,
"step": 19000
},
{
"epoch": 0.01479617163855024,
"learning_rate": 9.8989898989899e-05,
"loss": 3.3391875,
"step": 20000
},
{
"epoch": 0.015535980220477754,
"learning_rate": 9.888888888888889e-05,
"loss": 3.294140625,
"step": 21000
},
{
"epoch": 0.016275788802405267,
"learning_rate": 9.87878787878788e-05,
"loss": 3.2583359375,
"step": 22000
},
{
"epoch": 0.017015597384332778,
"learning_rate": 9.868686868686869e-05,
"loss": 3.2327890625,
"step": 23000
},
{
"epoch": 0.01775540596626029,
"learning_rate": 9.85858585858586e-05,
"loss": 3.213390625,
"step": 24000
},
{
"epoch": 0.018495214548187804,
"learning_rate": 9.848484848484849e-05,
"loss": 3.2020703125,
"step": 25000
},
{
"epoch": 0.019235023130115315,
"learning_rate": 9.838383838383838e-05,
"loss": 3.1884453125,
"step": 26000
},
{
"epoch": 0.019974831712042826,
"learning_rate": 9.828282828282829e-05,
"loss": 3.1846484375,
"step": 27000
},
{
"epoch": 0.020714640293970337,
"learning_rate": 9.818181818181818e-05,
"loss": 3.188234375,
"step": 28000
},
{
"epoch": 0.021454448875897852,
"learning_rate": 9.808080808080809e-05,
"loss": 3.1805625,
"step": 29000
},
{
"epoch": 0.022194257457825363,
"learning_rate": 9.797979797979798e-05,
"loss": 3.168953125,
"step": 30000
},
{
"epoch": 0.022934066039752874,
"learning_rate": 9.787878787878789e-05,
"loss": 3.162546875,
"step": 31000
},
{
"epoch": 0.023673874621680385,
"learning_rate": 9.777777777777778e-05,
"loss": 3.17115625,
"step": 32000
},
{
"epoch": 0.0244136832036079,
"learning_rate": 9.767676767676767e-05,
"loss": 3.168890625,
"step": 33000
},
{
"epoch": 0.02515349178553541,
"learning_rate": 9.757575757575758e-05,
"loss": 3.1635625,
"step": 34000
},
{
"epoch": 0.025893300367462922,
"learning_rate": 9.747474747474747e-05,
"loss": 3.165890625,
"step": 35000
},
{
"epoch": 0.026633108949390433,
"learning_rate": 9.737373737373738e-05,
"loss": 3.1600625,
"step": 36000
},
{
"epoch": 0.027372917531317948,
"learning_rate": 9.727272727272728e-05,
"loss": 3.15909375,
"step": 37000
},
{
"epoch": 0.02811272611324546,
"learning_rate": 9.717171717171718e-05,
"loss": 3.150296875,
"step": 38000
},
{
"epoch": 0.02885253469517297,
"learning_rate": 9.707070707070708e-05,
"loss": 3.15221875,
"step": 39000
},
{
"epoch": 0.02959234327710048,
"learning_rate": 9.696969696969698e-05,
"loss": 3.15134375,
"step": 40000
},
{
"epoch": 0.030332151859027996,
"learning_rate": 9.686868686868688e-05,
"loss": 3.141328125,
"step": 41000
},
{
"epoch": 0.031071960440955507,
"learning_rate": 9.676767676767677e-05,
"loss": 3.149328125,
"step": 42000
},
{
"epoch": 0.03181176902288302,
"learning_rate": 9.666666666666667e-05,
"loss": 3.139328125,
"step": 43000
},
{
"epoch": 0.03255157760481053,
"learning_rate": 9.656565656565657e-05,
"loss": 3.144234375,
"step": 44000
},
{
"epoch": 0.03329138618673804,
"learning_rate": 9.646464646464647e-05,
"loss": 3.1361875,
"step": 45000
},
{
"epoch": 0.034031194768665556,
"learning_rate": 9.636363636363637e-05,
"loss": 3.139328125,
"step": 46000
},
{
"epoch": 0.03477100335059307,
"learning_rate": 9.626262626262627e-05,
"loss": 3.12934375,
"step": 47000
},
{
"epoch": 0.03551081193252058,
"learning_rate": 9.616161616161616e-05,
"loss": 3.126953125,
"step": 48000
},
{
"epoch": 0.03625062051444809,
"learning_rate": 9.606060606060606e-05,
"loss": 3.123953125,
"step": 49000
},
{
"epoch": 0.03699042909637561,
"learning_rate": 9.595959595959596e-05,
"loss": 3.118390625,
"step": 50000
},
{
"epoch": 0.037730237678303115,
"learning_rate": 9.585858585858586e-05,
"loss": 3.12034375,
"step": 51000
},
{
"epoch": 0.03847004626023063,
"learning_rate": 9.575757575757576e-05,
"loss": 3.1155625,
"step": 52000
},
{
"epoch": 0.03920985484215814,
"learning_rate": 9.565656565656566e-05,
"loss": 3.112453125,
"step": 53000
},
{
"epoch": 0.03994966342408565,
"learning_rate": 9.555555555555557e-05,
"loss": 3.10696875,
"step": 54000
},
{
"epoch": 0.040689472006013166,
"learning_rate": 9.545454545454546e-05,
"loss": 3.1111875,
"step": 55000
},
{
"epoch": 0.041429280587940674,
"learning_rate": 9.535353535353537e-05,
"loss": 3.10640625,
"step": 56000
},
{
"epoch": 0.04216908916986819,
"learning_rate": 9.525252525252526e-05,
"loss": 3.105046875,
"step": 57000
},
{
"epoch": 0.042908897751795703,
"learning_rate": 9.515151515151515e-05,
"loss": 3.104453125,
"step": 58000
},
{
"epoch": 0.04364870633372321,
"learning_rate": 9.505050505050506e-05,
"loss": 3.102703125,
"step": 59000
},
{
"epoch": 0.044388514915650726,
"learning_rate": 9.494949494949495e-05,
"loss": 3.096671875,
"step": 60000
},
{
"epoch": 0.045128323497578234,
"learning_rate": 9.484848484848486e-05,
"loss": 3.09621875,
"step": 61000
},
{
"epoch": 0.04586813207950575,
"learning_rate": 9.474747474747475e-05,
"loss": 3.090984375,
"step": 62000
},
{
"epoch": 0.04660794066143326,
"learning_rate": 9.464646464646464e-05,
"loss": 3.087421875,
"step": 63000
},
{
"epoch": 0.04734774924336077,
"learning_rate": 9.454545454545455e-05,
"loss": 3.091140625,
"step": 64000
},
{
"epoch": 0.048087557825288285,
"learning_rate": 9.444444444444444e-05,
"loss": 3.084,
"step": 65000
},
{
"epoch": 0.0488273664072158,
"learning_rate": 9.434343434343435e-05,
"loss": 3.0825625,
"step": 66000
},
{
"epoch": 0.04956717498914331,
"learning_rate": 9.424242424242424e-05,
"loss": 3.074046875,
"step": 67000
},
{
"epoch": 0.05030698357107082,
"learning_rate": 9.414141414141415e-05,
"loss": 2.994109375,
"step": 68000
},
{
"epoch": 0.05104679215299833,
"learning_rate": 9.404040404040404e-05,
"loss": 2.941375,
"step": 69000
},
{
"epoch": 0.051786600734925844,
"learning_rate": 9.393939393939395e-05,
"loss": 2.9075,
"step": 70000
},
{
"epoch": 0.05252640931685336,
"learning_rate": 9.383838383838385e-05,
"loss": 2.87453125,
"step": 71000
},
{
"epoch": 0.05326621789878087,
"learning_rate": 9.373737373737375e-05,
"loss": 2.839375,
"step": 72000
},
{
"epoch": 0.05400602648070838,
"learning_rate": 9.363636363636364e-05,
"loss": 2.821875,
"step": 73000
},
{
"epoch": 0.054745835062635896,
"learning_rate": 9.353535353535354e-05,
"loss": 2.79540625,
"step": 74000
},
{
"epoch": 0.055485643644563404,
"learning_rate": 9.343434343434344e-05,
"loss": 2.7788125,
"step": 75000
},
{
"epoch": 0.05622545222649092,
"learning_rate": 9.333333333333334e-05,
"loss": 2.76378125,
"step": 76000
},
{
"epoch": 0.05696526080841843,
"learning_rate": 9.323232323232324e-05,
"loss": 2.75003125,
"step": 77000
},
{
"epoch": 0.05770506939034594,
"learning_rate": 9.313131313131314e-05,
"loss": 2.73059375,
"step": 78000
},
{
"epoch": 0.058444877972273455,
"learning_rate": 9.303030303030303e-05,
"loss": 2.7220625,
"step": 79000
},
{
"epoch": 0.05918468655420096,
"learning_rate": 9.292929292929293e-05,
"loss": 2.70821875,
"step": 80000
},
{
"epoch": 0.05992449513612848,
"learning_rate": 9.282828282828283e-05,
"loss": 2.70378125,
"step": 81000
},
{
"epoch": 0.06066430371805599,
"learning_rate": 9.272727272727273e-05,
"loss": 2.68196875,
"step": 82000
},
{
"epoch": 0.0614041122999835,
"learning_rate": 9.262626262626263e-05,
"loss": 2.68290625,
"step": 83000
},
{
"epoch": 0.062143920881911015,
"learning_rate": 9.252525252525253e-05,
"loss": 2.66859375,
"step": 84000
},
{
"epoch": 0.06288372946383852,
"learning_rate": 9.242424242424242e-05,
"loss": 2.643625,
"step": 85000
},
{
"epoch": 0.06362353804576604,
"learning_rate": 9.232323232323232e-05,
"loss": 2.64675,
"step": 86000
},
{
"epoch": 0.06436334662769355,
"learning_rate": 9.222222222222223e-05,
"loss": 2.6360625,
"step": 87000
},
{
"epoch": 0.06510315520962107,
"learning_rate": 9.212121212121214e-05,
"loss": 2.6265625,
"step": 88000
},
{
"epoch": 0.06584296379154858,
"learning_rate": 9.202020202020203e-05,
"loss": 2.61628125,
"step": 89000
},
{
"epoch": 0.06658277237347608,
"learning_rate": 9.191919191919192e-05,
"loss": 2.61065625,
"step": 90000
},
{
"epoch": 0.0673225809554036,
"learning_rate": 9.181818181818183e-05,
"loss": 2.6013125,
"step": 91000
},
{
"epoch": 0.06806238953733111,
"learning_rate": 9.171717171717172e-05,
"loss": 2.59125,
"step": 92000
},
{
"epoch": 0.06880219811925863,
"learning_rate": 9.161616161616163e-05,
"loss": 2.58496875,
"step": 93000
},
{
"epoch": 0.06954200670118614,
"learning_rate": 9.151515151515152e-05,
"loss": 2.58175,
"step": 94000
},
{
"epoch": 0.07028181528311364,
"learning_rate": 9.141414141414141e-05,
"loss": 2.5795625,
"step": 95000
},
{
"epoch": 0.07102162386504116,
"learning_rate": 9.131313131313132e-05,
"loss": 2.56846875,
"step": 96000
},
{
"epoch": 0.07176143244696867,
"learning_rate": 9.121212121212121e-05,
"loss": 2.56171875,
"step": 97000
},
{
"epoch": 0.07250124102889619,
"learning_rate": 9.111111111111112e-05,
"loss": 2.55975,
"step": 98000
},
{
"epoch": 0.0732410496108237,
"learning_rate": 9.101010101010101e-05,
"loss": 2.548375,
"step": 99000
},
{
"epoch": 0.07398085819275121,
"learning_rate": 9.090909090909092e-05,
"loss": 2.5480625,
"step": 100000
},
{
"epoch": 0.07472066677467872,
"learning_rate": 9.080808080808081e-05,
"loss": 2.5428125,
"step": 101000
},
{
"epoch": 0.07546047535660623,
"learning_rate": 9.07070707070707e-05,
"loss": 2.537125,
"step": 102000
},
{
"epoch": 0.07620028393853374,
"learning_rate": 9.060606060606061e-05,
"loss": 2.5354375,
"step": 103000
},
{
"epoch": 0.07694009252046126,
"learning_rate": 9.050505050505052e-05,
"loss": 2.5318125,
"step": 104000
},
{
"epoch": 0.07767990110238877,
"learning_rate": 9.040404040404041e-05,
"loss": 2.513,
"step": 105000
},
{
"epoch": 0.07841970968431627,
"learning_rate": 9.030303030303031e-05,
"loss": 2.51375,
"step": 106000
},
{
"epoch": 0.07915951826624379,
"learning_rate": 9.02020202020202e-05,
"loss": 2.50621875,
"step": 107000
},
{
"epoch": 0.0798993268481713,
"learning_rate": 9.010101010101011e-05,
"loss": 2.50103125,
"step": 108000
},
{
"epoch": 0.08063913543009882,
"learning_rate": 9e-05,
"loss": 2.5036875,
"step": 109000
},
{
"epoch": 0.08137894401202633,
"learning_rate": 8.98989898989899e-05,
"loss": 2.49821875,
"step": 110000
},
{
"epoch": 0.08211875259395383,
"learning_rate": 8.97979797979798e-05,
"loss": 2.49084375,
"step": 111000
},
{
"epoch": 0.08285856117588135,
"learning_rate": 8.96969696969697e-05,
"loss": 2.48453125,
"step": 112000
},
{
"epoch": 0.08359836975780886,
"learning_rate": 8.95959595959596e-05,
"loss": 2.47575,
"step": 113000
},
{
"epoch": 0.08433817833973638,
"learning_rate": 8.94949494949495e-05,
"loss": 2.48334375,
"step": 114000
},
{
"epoch": 0.08507798692166389,
"learning_rate": 8.93939393939394e-05,
"loss": 2.48015625,
"step": 115000
},
{
"epoch": 0.08581779550359141,
"learning_rate": 8.92929292929293e-05,
"loss": 2.467625,
"step": 116000
},
{
"epoch": 0.08655760408551891,
"learning_rate": 8.919191919191919e-05,
"loss": 2.464625,
"step": 117000
},
{
"epoch": 0.08729741266744642,
"learning_rate": 8.90909090909091e-05,
"loss": 2.45959375,
"step": 118000
},
{
"epoch": 0.08803722124937394,
"learning_rate": 8.898989898989899e-05,
"loss": 2.46559375,
"step": 119000
},
{
"epoch": 0.08877702983130145,
"learning_rate": 8.888888888888889e-05,
"loss": 2.453125,
"step": 120000
},
{
"epoch": 0.08951683841322897,
"learning_rate": 8.87878787878788e-05,
"loss": 2.45296875,
"step": 121000
},
{
"epoch": 0.09025664699515647,
"learning_rate": 8.868686868686869e-05,
"loss": 2.4496875,
"step": 122000
},
{
"epoch": 0.09099645557708398,
"learning_rate": 8.85858585858586e-05,
"loss": 2.4446875,
"step": 123000
},
{
"epoch": 0.0917362641590115,
"learning_rate": 8.848484848484849e-05,
"loss": 2.44803125,
"step": 124000
},
{
"epoch": 0.09247607274093901,
"learning_rate": 8.83838383838384e-05,
"loss": 2.43840625,
"step": 125000
},
{
"epoch": 0.09321588132286653,
"learning_rate": 8.828282828282829e-05,
"loss": 2.43853125,
"step": 126000
},
{
"epoch": 0.09395568990479404,
"learning_rate": 8.818181818181818e-05,
"loss": 2.44396875,
"step": 127000
},
{
"epoch": 0.09469549848672154,
"learning_rate": 8.808080808080809e-05,
"loss": 2.43378125,
"step": 128000
},
{
"epoch": 0.09543530706864906,
"learning_rate": 8.797979797979798e-05,
"loss": 2.4296875,
"step": 129000
},
{
"epoch": 0.09617511565057657,
"learning_rate": 8.787878787878789e-05,
"loss": 2.41928125,
"step": 130000
},
{
"epoch": 0.09691492423250408,
"learning_rate": 8.777777777777778e-05,
"loss": 2.41896875,
"step": 131000
},
{
"epoch": 0.0976547328144316,
"learning_rate": 8.767676767676767e-05,
"loss": 2.41890625,
"step": 132000
},
{
"epoch": 0.0983945413963591,
"learning_rate": 8.757575757575758e-05,
"loss": 2.423875,
"step": 133000
},
{
"epoch": 0.09913434997828661,
"learning_rate": 8.747474747474747e-05,
"loss": 2.41721875,
"step": 134000
},
{
"epoch": 0.09987415856021413,
"learning_rate": 8.737373737373738e-05,
"loss": 2.41221875,
"step": 135000
},
{
"epoch": 0.10061396714214164,
"learning_rate": 8.727272727272727e-05,
"loss": 2.41015625,
"step": 136000
},
{
"epoch": 0.10135377572406916,
"learning_rate": 8.717171717171718e-05,
"loss": 2.40334375,
"step": 137000
},
{
"epoch": 0.10209358430599666,
"learning_rate": 8.707070707070707e-05,
"loss": 2.4039375,
"step": 138000
},
{
"epoch": 0.10283339288792417,
"learning_rate": 8.696969696969698e-05,
"loss": 2.392,
"step": 139000
},
{
"epoch": 0.10357320146985169,
"learning_rate": 8.686868686868688e-05,
"loss": 2.3910625,
"step": 140000
},
{
"epoch": 0.1043130100517792,
"learning_rate": 8.676767676767678e-05,
"loss": 2.38865625,
"step": 141000
},
{
"epoch": 0.10505281863370672,
"learning_rate": 8.666666666666667e-05,
"loss": 2.3965,
"step": 142000
},
{
"epoch": 0.10579262721563423,
"learning_rate": 8.656565656565657e-05,
"loss": 2.37953125,
"step": 143000
},
{
"epoch": 0.10653243579756173,
"learning_rate": 8.646464646464647e-05,
"loss": 2.3775625,
"step": 144000
},
{
"epoch": 0.10727224437948925,
"learning_rate": 8.636363636363637e-05,
"loss": 2.3774375,
"step": 145000
},
{
"epoch": 0.10801205296141676,
"learning_rate": 8.626262626262627e-05,
"loss": 2.3824375,
"step": 146000
},
{
"epoch": 0.10875186154334428,
"learning_rate": 8.616161616161616e-05,
"loss": 2.3749375,
"step": 147000
},
{
"epoch": 0.10949167012527179,
"learning_rate": 8.606060606060606e-05,
"loss": 2.37059375,
"step": 148000
},
{
"epoch": 0.11023147870719929,
"learning_rate": 8.595959595959596e-05,
"loss": 2.36821875,
"step": 149000
},
{
"epoch": 0.11097128728912681,
"learning_rate": 8.585858585858586e-05,
"loss": 2.38290625,
"step": 150000
},
{
"epoch": 0.11171109587105432,
"learning_rate": 8.575757575757576e-05,
"loss": 2.36784375,
"step": 151000
},
{
"epoch": 0.11245090445298184,
"learning_rate": 8.565656565656566e-05,
"loss": 2.37403125,
"step": 152000
},
{
"epoch": 0.11319071303490935,
"learning_rate": 8.555555555555556e-05,
"loss": 2.3681875,
"step": 153000
},
{
"epoch": 0.11393052161683687,
"learning_rate": 8.545454545454545e-05,
"loss": 2.3626875,
"step": 154000
},
{
"epoch": 0.11467033019876437,
"learning_rate": 8.535353535353535e-05,
"loss": 2.365875,
"step": 155000
},
{
"epoch": 0.11541013878069188,
"learning_rate": 8.525252525252526e-05,
"loss": 2.3594375,
"step": 156000
},
{
"epoch": 0.1161499473626194,
"learning_rate": 8.515151515151515e-05,
"loss": 2.35259375,
"step": 157000
},
{
"epoch": 0.11688975594454691,
"learning_rate": 8.505050505050506e-05,
"loss": 2.35125,
"step": 158000
},
{
"epoch": 0.11762956452647443,
"learning_rate": 8.494949494949495e-05,
"loss": 2.35478125,
"step": 159000
},
{
"epoch": 0.11836937310840193,
"learning_rate": 8.484848484848486e-05,
"loss": 2.34390625,
"step": 160000
},
{
"epoch": 0.11910918169032944,
"learning_rate": 8.474747474747475e-05,
"loss": 2.3526875,
"step": 161000
},
{
"epoch": 0.11984899027225696,
"learning_rate": 8.464646464646466e-05,
"loss": 2.350125,
"step": 162000
},
{
"epoch": 0.12058879885418447,
"learning_rate": 8.454545454545455e-05,
"loss": 2.3663125,
"step": 163000
},
{
"epoch": 0.12132860743611198,
"learning_rate": 8.444444444444444e-05,
"loss": 2.3484375,
"step": 164000
},
{
"epoch": 0.12206841601803949,
"learning_rate": 8.434343434343435e-05,
"loss": 2.34578125,
"step": 165000
},
{
"epoch": 0.122808224599967,
"learning_rate": 8.424242424242424e-05,
"loss": 2.34928125,
"step": 166000
},
{
"epoch": 0.12354803318189451,
"learning_rate": 8.414141414141415e-05,
"loss": 2.336375,
"step": 167000
},
{
"epoch": 0.12428784176382203,
"learning_rate": 8.404040404040404e-05,
"loss": 2.34021875,
"step": 168000
},
{
"epoch": 0.12502765034574953,
"learning_rate": 8.393939393939393e-05,
"loss": 2.33871875,
"step": 169000
},
{
"epoch": 0.12576745892767704,
"learning_rate": 8.383838383838384e-05,
"loss": 2.3376875,
"step": 170000
},
{
"epoch": 0.12650726750960456,
"learning_rate": 8.373737373737373e-05,
"loss": 2.33028125,
"step": 171000
},
{
"epoch": 0.12724707609153207,
"learning_rate": 8.363636363636364e-05,
"loss": 2.3309375,
"step": 172000
},
{
"epoch": 0.1279868846734596,
"learning_rate": 8.353535353535355e-05,
"loss": 2.33528125,
"step": 173000
},
{
"epoch": 0.1287266932553871,
"learning_rate": 8.343434343434344e-05,
"loss": 2.3350625,
"step": 174000
},
{
"epoch": 0.12946650183731462,
"learning_rate": 8.333333333333334e-05,
"loss": 2.32559375,
"step": 175000
},
{
"epoch": 0.13020631041924213,
"learning_rate": 8.323232323232324e-05,
"loss": 2.321,
"step": 176000
},
{
"epoch": 0.13094611900116965,
"learning_rate": 8.313131313131314e-05,
"loss": 2.33025,
"step": 177000
},
{
"epoch": 0.13168592758309716,
"learning_rate": 8.303030303030304e-05,
"loss": 2.3170625,
"step": 178000
},
{
"epoch": 0.13242573616502465,
"learning_rate": 8.292929292929293e-05,
"loss": 2.3196875,
"step": 179000
},
{
"epoch": 0.13316554474695216,
"learning_rate": 8.282828282828283e-05,
"loss": 2.313375,
"step": 180000
},
{
"epoch": 0.13390535332887968,
"learning_rate": 8.272727272727273e-05,
"loss": 2.305875,
"step": 181000
},
{
"epoch": 0.1346451619108072,
"learning_rate": 8.262626262626263e-05,
"loss": 2.316,
"step": 182000
},
{
"epoch": 0.1353849704927347,
"learning_rate": 8.252525252525253e-05,
"loss": 2.30725,
"step": 183000
},
{
"epoch": 0.13612477907466222,
"learning_rate": 8.242424242424243e-05,
"loss": 2.3010625,
"step": 184000
},
{
"epoch": 0.13686458765658974,
"learning_rate": 8.232323232323233e-05,
"loss": 2.3125625,
"step": 185000
},
{
"epoch": 0.13760439623851725,
"learning_rate": 8.222222222222222e-05,
"loss": 2.31725,
"step": 186000
},
{
"epoch": 0.13834420482044477,
"learning_rate": 8.212121212121212e-05,
"loss": 2.2964375,
"step": 187000
},
{
"epoch": 0.13908401340237228,
"learning_rate": 8.202020202020202e-05,
"loss": 2.302,
"step": 188000
},
{
"epoch": 0.1398238219842998,
"learning_rate": 8.191919191919192e-05,
"loss": 2.3019375,
"step": 189000
},
{
"epoch": 0.14056363056622728,
"learning_rate": 8.181818181818183e-05,
"loss": 2.3020625,
"step": 190000
},
{
"epoch": 0.1413034391481548,
"learning_rate": 8.171717171717172e-05,
"loss": 2.3026875,
"step": 191000
},
{
"epoch": 0.1420432477300823,
"learning_rate": 8.161616161616163e-05,
"loss": 2.298875,
"step": 192000
},
{
"epoch": 0.14278305631200983,
"learning_rate": 8.151515151515152e-05,
"loss": 2.2983125,
"step": 193000
},
{
"epoch": 0.14352286489393734,
"learning_rate": 8.141414141414141e-05,
"loss": 2.2835625,
"step": 194000
},
{
"epoch": 0.14426267347586486,
"learning_rate": 8.131313131313132e-05,
"loss": 2.2885625,
"step": 195000
},
{
"epoch": 0.14500248205779237,
"learning_rate": 8.121212121212121e-05,
"loss": 2.295125,
"step": 196000
},
{
"epoch": 0.14574229063971988,
"learning_rate": 8.111111111111112e-05,
"loss": 2.2823125,
"step": 197000
},
{
"epoch": 0.1464820992216474,
"learning_rate": 8.101010101010101e-05,
"loss": 2.280375,
"step": 198000
},
{
"epoch": 0.14722190780357491,
"learning_rate": 8.090909090909092e-05,
"loss": 2.290125,
"step": 199000
},
{
"epoch": 0.14796171638550243,
"learning_rate": 8.080808080808081e-05,
"loss": 2.2868125,
"step": 200000
},
{
"epoch": 0.14870152496742992,
"learning_rate": 8.07070707070707e-05,
"loss": 2.2840625,
"step": 201000
},
{
"epoch": 0.14944133354935743,
"learning_rate": 8.060606060606061e-05,
"loss": 2.2799375,
"step": 202000
},
{
"epoch": 0.15018114213128494,
"learning_rate": 8.05050505050505e-05,
"loss": 2.285875,
"step": 203000
},
{
"epoch": 0.15092095071321246,
"learning_rate": 8.040404040404041e-05,
"loss": 2.344375,
"step": 204000
},
{
"epoch": 0.15166075929513997,
"learning_rate": 8.03030303030303e-05,
"loss": 2.2733125,
"step": 205000
},
{
"epoch": 0.1524005678770675,
"learning_rate": 8.02020202020202e-05,
"loss": 2.272125,
"step": 206000
},
{
"epoch": 0.153140376458995,
"learning_rate": 8.010101010101011e-05,
"loss": 2.271625,
"step": 207000
},
{
"epoch": 0.15388018504092252,
"learning_rate": 8e-05,
"loss": 2.278125,
"step": 208000
},
{
"epoch": 0.15461999362285003,
"learning_rate": 7.989898989898991e-05,
"loss": 2.275625,
"step": 209000
},
{
"epoch": 0.15535980220477755,
"learning_rate": 7.97979797979798e-05,
"loss": 2.2675,
"step": 210000
},
{
"epoch": 0.15609961078670503,
"learning_rate": 7.96969696969697e-05,
"loss": 2.2676875,
"step": 211000
},
{
"epoch": 0.15683941936863255,
"learning_rate": 7.95959595959596e-05,
"loss": 2.265875,
"step": 212000
},
{
"epoch": 0.15757922795056006,
"learning_rate": 7.94949494949495e-05,
"loss": 2.260125,
"step": 213000
},
{
"epoch": 0.15831903653248758,
"learning_rate": 7.93939393939394e-05,
"loss": 2.2750625,
"step": 214000
}
],
"max_steps": 1000000,
"num_train_epochs": 1,
"total_flos": 387919397980800000
}