{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15831903653248758, "global_step": 214000, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000739808581927512, "learning_rate": 1e-05, "loss": 11.06675, "step": 1000 }, { "epoch": 0.001479617163855024, "learning_rate": 2e-05, "loss": 9.012630859375, "step": 2000 }, { "epoch": 0.002219425745782536, "learning_rate": 3e-05, "loss": 7.853123046875, "step": 3000 }, { "epoch": 0.002959234327710048, "learning_rate": 4e-05, "loss": 7.2371328125, "step": 4000 }, { "epoch": 0.00369904290963756, "learning_rate": 5e-05, "loss": 6.75319921875, "step": 5000 }, { "epoch": 0.004438851491565072, "learning_rate": 6e-05, "loss": 6.3771640625, "step": 6000 }, { "epoch": 0.005178660073492584, "learning_rate": 7e-05, "loss": 6.00278515625, "step": 7000 }, { "epoch": 0.005918468655420096, "learning_rate": 8e-05, "loss": 5.66916015625, "step": 8000 }, { "epoch": 0.006658277237347608, "learning_rate": 9e-05, "loss": 5.3051171875, "step": 9000 }, { "epoch": 0.00739808581927512, "learning_rate": 0.0001, "loss": 4.99890625, "step": 10000 }, { "epoch": 0.008137894401202633, "learning_rate": 9.98989898989899e-05, "loss": 4.6847890625, "step": 11000 }, { "epoch": 0.008877702983130144, "learning_rate": 9.97979797979798e-05, "loss": 4.393546875, "step": 12000 }, { "epoch": 0.009617511565057657, "learning_rate": 9.96969696969697e-05, "loss": 4.1845703125, "step": 13000 }, { "epoch": 0.010357320146985169, "learning_rate": 9.95959595959596e-05, "loss": 3.9956796875, "step": 14000 }, { "epoch": 0.011097128728912681, "learning_rate": 9.94949494949495e-05, "loss": 3.8322421875, "step": 15000 }, { "epoch": 0.011836937310840193, "learning_rate": 9.939393939393939e-05, "loss": 3.6949375, "step": 16000 }, { "epoch": 0.012576745892767706, "learning_rate": 9.92929292929293e-05, "loss": 3.584078125, "step": 17000 }, { "epoch": 0.013316554474695217, "learning_rate": 9.919191919191919e-05, "loss": 3.4821875, "step": 18000 }, { "epoch": 0.01405636305662273, "learning_rate": 9.909090909090911e-05, "loss": 3.4063359375, "step": 19000 }, { "epoch": 0.01479617163855024, "learning_rate": 9.8989898989899e-05, "loss": 3.3391875, "step": 20000 }, { "epoch": 0.015535980220477754, "learning_rate": 9.888888888888889e-05, "loss": 3.294140625, "step": 21000 }, { "epoch": 0.016275788802405267, "learning_rate": 9.87878787878788e-05, "loss": 3.2583359375, "step": 22000 }, { "epoch": 0.017015597384332778, "learning_rate": 9.868686868686869e-05, "loss": 3.2327890625, "step": 23000 }, { "epoch": 0.01775540596626029, "learning_rate": 9.85858585858586e-05, "loss": 3.213390625, "step": 24000 }, { "epoch": 0.018495214548187804, "learning_rate": 9.848484848484849e-05, "loss": 3.2020703125, "step": 25000 }, { "epoch": 0.019235023130115315, "learning_rate": 9.838383838383838e-05, "loss": 3.1884453125, "step": 26000 }, { "epoch": 0.019974831712042826, "learning_rate": 9.828282828282829e-05, "loss": 3.1846484375, "step": 27000 }, { "epoch": 0.020714640293970337, "learning_rate": 9.818181818181818e-05, "loss": 3.188234375, "step": 28000 }, { "epoch": 0.021454448875897852, "learning_rate": 9.808080808080809e-05, "loss": 3.1805625, "step": 29000 }, { "epoch": 0.022194257457825363, "learning_rate": 9.797979797979798e-05, "loss": 3.168953125, "step": 30000 }, { "epoch": 0.022934066039752874, "learning_rate": 9.787878787878789e-05, "loss": 3.162546875, "step": 31000 }, { "epoch": 0.023673874621680385, "learning_rate": 9.777777777777778e-05, "loss": 3.17115625, "step": 32000 }, { "epoch": 0.0244136832036079, "learning_rate": 9.767676767676767e-05, "loss": 3.168890625, "step": 33000 }, { "epoch": 0.02515349178553541, "learning_rate": 9.757575757575758e-05, "loss": 3.1635625, "step": 34000 }, { "epoch": 0.025893300367462922, "learning_rate": 9.747474747474747e-05, "loss": 3.165890625, "step": 35000 }, { "epoch": 0.026633108949390433, "learning_rate": 9.737373737373738e-05, "loss": 3.1600625, "step": 36000 }, { "epoch": 0.027372917531317948, "learning_rate": 9.727272727272728e-05, "loss": 3.15909375, "step": 37000 }, { "epoch": 0.02811272611324546, "learning_rate": 9.717171717171718e-05, "loss": 3.150296875, "step": 38000 }, { "epoch": 0.02885253469517297, "learning_rate": 9.707070707070708e-05, "loss": 3.15221875, "step": 39000 }, { "epoch": 0.02959234327710048, "learning_rate": 9.696969696969698e-05, "loss": 3.15134375, "step": 40000 }, { "epoch": 0.030332151859027996, "learning_rate": 9.686868686868688e-05, "loss": 3.141328125, "step": 41000 }, { "epoch": 0.031071960440955507, "learning_rate": 9.676767676767677e-05, "loss": 3.149328125, "step": 42000 }, { "epoch": 0.03181176902288302, "learning_rate": 9.666666666666667e-05, "loss": 3.139328125, "step": 43000 }, { "epoch": 0.03255157760481053, "learning_rate": 9.656565656565657e-05, "loss": 3.144234375, "step": 44000 }, { "epoch": 0.03329138618673804, "learning_rate": 9.646464646464647e-05, "loss": 3.1361875, "step": 45000 }, { "epoch": 0.034031194768665556, "learning_rate": 9.636363636363637e-05, "loss": 3.139328125, "step": 46000 }, { "epoch": 0.03477100335059307, "learning_rate": 9.626262626262627e-05, "loss": 3.12934375, "step": 47000 }, { "epoch": 0.03551081193252058, "learning_rate": 9.616161616161616e-05, "loss": 3.126953125, "step": 48000 }, { "epoch": 0.03625062051444809, "learning_rate": 9.606060606060606e-05, "loss": 3.123953125, "step": 49000 }, { "epoch": 0.03699042909637561, "learning_rate": 9.595959595959596e-05, "loss": 3.118390625, "step": 50000 }, { "epoch": 0.037730237678303115, "learning_rate": 9.585858585858586e-05, "loss": 3.12034375, "step": 51000 }, { "epoch": 0.03847004626023063, "learning_rate": 9.575757575757576e-05, "loss": 3.1155625, "step": 52000 }, { "epoch": 0.03920985484215814, "learning_rate": 9.565656565656566e-05, "loss": 3.112453125, "step": 53000 }, { "epoch": 0.03994966342408565, "learning_rate": 9.555555555555557e-05, "loss": 3.10696875, "step": 54000 }, { "epoch": 0.040689472006013166, "learning_rate": 9.545454545454546e-05, "loss": 3.1111875, "step": 55000 }, { "epoch": 0.041429280587940674, "learning_rate": 9.535353535353537e-05, "loss": 3.10640625, "step": 56000 }, { "epoch": 0.04216908916986819, "learning_rate": 9.525252525252526e-05, "loss": 3.105046875, "step": 57000 }, { "epoch": 0.042908897751795703, "learning_rate": 9.515151515151515e-05, "loss": 3.104453125, "step": 58000 }, { "epoch": 0.04364870633372321, "learning_rate": 9.505050505050506e-05, "loss": 3.102703125, "step": 59000 }, { "epoch": 0.044388514915650726, "learning_rate": 9.494949494949495e-05, "loss": 3.096671875, "step": 60000 }, { "epoch": 0.045128323497578234, "learning_rate": 9.484848484848486e-05, "loss": 3.09621875, "step": 61000 }, { "epoch": 0.04586813207950575, "learning_rate": 9.474747474747475e-05, "loss": 3.090984375, "step": 62000 }, { "epoch": 0.04660794066143326, "learning_rate": 9.464646464646464e-05, "loss": 3.087421875, "step": 63000 }, { "epoch": 0.04734774924336077, "learning_rate": 9.454545454545455e-05, "loss": 3.091140625, "step": 64000 }, { "epoch": 0.048087557825288285, "learning_rate": 9.444444444444444e-05, "loss": 3.084, "step": 65000 }, { "epoch": 0.0488273664072158, "learning_rate": 9.434343434343435e-05, "loss": 3.0825625, "step": 66000 }, { "epoch": 0.04956717498914331, "learning_rate": 9.424242424242424e-05, "loss": 3.074046875, "step": 67000 }, { "epoch": 0.05030698357107082, "learning_rate": 9.414141414141415e-05, "loss": 2.994109375, "step": 68000 }, { "epoch": 0.05104679215299833, "learning_rate": 9.404040404040404e-05, "loss": 2.941375, "step": 69000 }, { "epoch": 0.051786600734925844, "learning_rate": 9.393939393939395e-05, "loss": 2.9075, "step": 70000 }, { "epoch": 0.05252640931685336, "learning_rate": 9.383838383838385e-05, "loss": 2.87453125, "step": 71000 }, { "epoch": 0.05326621789878087, "learning_rate": 9.373737373737375e-05, "loss": 2.839375, "step": 72000 }, { "epoch": 0.05400602648070838, "learning_rate": 9.363636363636364e-05, "loss": 2.821875, "step": 73000 }, { "epoch": 0.054745835062635896, "learning_rate": 9.353535353535354e-05, "loss": 2.79540625, "step": 74000 }, { "epoch": 0.055485643644563404, "learning_rate": 9.343434343434344e-05, "loss": 2.7788125, "step": 75000 }, { "epoch": 0.05622545222649092, "learning_rate": 9.333333333333334e-05, "loss": 2.76378125, "step": 76000 }, { "epoch": 0.05696526080841843, "learning_rate": 9.323232323232324e-05, "loss": 2.75003125, "step": 77000 }, { "epoch": 0.05770506939034594, "learning_rate": 9.313131313131314e-05, "loss": 2.73059375, "step": 78000 }, { "epoch": 0.058444877972273455, "learning_rate": 9.303030303030303e-05, "loss": 2.7220625, "step": 79000 }, { "epoch": 0.05918468655420096, "learning_rate": 9.292929292929293e-05, "loss": 2.70821875, "step": 80000 }, { "epoch": 0.05992449513612848, "learning_rate": 9.282828282828283e-05, "loss": 2.70378125, "step": 81000 }, { "epoch": 0.06066430371805599, "learning_rate": 9.272727272727273e-05, "loss": 2.68196875, "step": 82000 }, { "epoch": 0.0614041122999835, "learning_rate": 9.262626262626263e-05, "loss": 2.68290625, "step": 83000 }, { "epoch": 0.062143920881911015, "learning_rate": 9.252525252525253e-05, "loss": 2.66859375, "step": 84000 }, { "epoch": 0.06288372946383852, "learning_rate": 9.242424242424242e-05, "loss": 2.643625, "step": 85000 }, { "epoch": 0.06362353804576604, "learning_rate": 9.232323232323232e-05, "loss": 2.64675, "step": 86000 }, { "epoch": 0.06436334662769355, "learning_rate": 9.222222222222223e-05, "loss": 2.6360625, "step": 87000 }, { "epoch": 0.06510315520962107, "learning_rate": 9.212121212121214e-05, "loss": 2.6265625, "step": 88000 }, { "epoch": 0.06584296379154858, "learning_rate": 9.202020202020203e-05, "loss": 2.61628125, "step": 89000 }, { "epoch": 0.06658277237347608, "learning_rate": 9.191919191919192e-05, "loss": 2.61065625, "step": 90000 }, { "epoch": 0.0673225809554036, "learning_rate": 9.181818181818183e-05, "loss": 2.6013125, "step": 91000 }, { "epoch": 0.06806238953733111, "learning_rate": 9.171717171717172e-05, "loss": 2.59125, "step": 92000 }, { "epoch": 0.06880219811925863, "learning_rate": 9.161616161616163e-05, "loss": 2.58496875, "step": 93000 }, { "epoch": 0.06954200670118614, "learning_rate": 9.151515151515152e-05, "loss": 2.58175, "step": 94000 }, { "epoch": 0.07028181528311364, "learning_rate": 9.141414141414141e-05, "loss": 2.5795625, "step": 95000 }, { "epoch": 0.07102162386504116, "learning_rate": 9.131313131313132e-05, "loss": 2.56846875, "step": 96000 }, { "epoch": 0.07176143244696867, "learning_rate": 9.121212121212121e-05, "loss": 2.56171875, "step": 97000 }, { "epoch": 0.07250124102889619, "learning_rate": 9.111111111111112e-05, "loss": 2.55975, "step": 98000 }, { "epoch": 0.0732410496108237, "learning_rate": 9.101010101010101e-05, "loss": 2.548375, "step": 99000 }, { "epoch": 0.07398085819275121, "learning_rate": 9.090909090909092e-05, "loss": 2.5480625, "step": 100000 }, { "epoch": 0.07472066677467872, "learning_rate": 9.080808080808081e-05, "loss": 2.5428125, "step": 101000 }, { "epoch": 0.07546047535660623, "learning_rate": 9.07070707070707e-05, "loss": 2.537125, "step": 102000 }, { "epoch": 0.07620028393853374, "learning_rate": 9.060606060606061e-05, "loss": 2.5354375, "step": 103000 }, { "epoch": 0.07694009252046126, "learning_rate": 9.050505050505052e-05, "loss": 2.5318125, "step": 104000 }, { "epoch": 0.07767990110238877, "learning_rate": 9.040404040404041e-05, "loss": 2.513, "step": 105000 }, { "epoch": 0.07841970968431627, "learning_rate": 9.030303030303031e-05, "loss": 2.51375, "step": 106000 }, { "epoch": 0.07915951826624379, "learning_rate": 9.02020202020202e-05, "loss": 2.50621875, "step": 107000 }, { "epoch": 0.0798993268481713, "learning_rate": 9.010101010101011e-05, "loss": 2.50103125, "step": 108000 }, { "epoch": 0.08063913543009882, "learning_rate": 9e-05, "loss": 2.5036875, "step": 109000 }, { "epoch": 0.08137894401202633, "learning_rate": 8.98989898989899e-05, "loss": 2.49821875, "step": 110000 }, { "epoch": 0.08211875259395383, "learning_rate": 8.97979797979798e-05, "loss": 2.49084375, "step": 111000 }, { "epoch": 0.08285856117588135, "learning_rate": 8.96969696969697e-05, "loss": 2.48453125, "step": 112000 }, { "epoch": 0.08359836975780886, "learning_rate": 8.95959595959596e-05, "loss": 2.47575, "step": 113000 }, { "epoch": 0.08433817833973638, "learning_rate": 8.94949494949495e-05, "loss": 2.48334375, "step": 114000 }, { "epoch": 0.08507798692166389, "learning_rate": 8.93939393939394e-05, "loss": 2.48015625, "step": 115000 }, { "epoch": 0.08581779550359141, "learning_rate": 8.92929292929293e-05, "loss": 2.467625, "step": 116000 }, { "epoch": 0.08655760408551891, "learning_rate": 8.919191919191919e-05, "loss": 2.464625, "step": 117000 }, { "epoch": 0.08729741266744642, "learning_rate": 8.90909090909091e-05, "loss": 2.45959375, "step": 118000 }, { "epoch": 0.08803722124937394, "learning_rate": 8.898989898989899e-05, "loss": 2.46559375, "step": 119000 }, { "epoch": 0.08877702983130145, "learning_rate": 8.888888888888889e-05, "loss": 2.453125, "step": 120000 }, { "epoch": 0.08951683841322897, "learning_rate": 8.87878787878788e-05, "loss": 2.45296875, "step": 121000 }, { "epoch": 0.09025664699515647, "learning_rate": 8.868686868686869e-05, "loss": 2.4496875, "step": 122000 }, { "epoch": 0.09099645557708398, "learning_rate": 8.85858585858586e-05, "loss": 2.4446875, "step": 123000 }, { "epoch": 0.0917362641590115, "learning_rate": 8.848484848484849e-05, "loss": 2.44803125, "step": 124000 }, { "epoch": 0.09247607274093901, "learning_rate": 8.83838383838384e-05, "loss": 2.43840625, "step": 125000 }, { "epoch": 0.09321588132286653, "learning_rate": 8.828282828282829e-05, "loss": 2.43853125, "step": 126000 }, { "epoch": 0.09395568990479404, "learning_rate": 8.818181818181818e-05, "loss": 2.44396875, "step": 127000 }, { "epoch": 0.09469549848672154, "learning_rate": 8.808080808080809e-05, "loss": 2.43378125, "step": 128000 }, { "epoch": 0.09543530706864906, "learning_rate": 8.797979797979798e-05, "loss": 2.4296875, "step": 129000 }, { "epoch": 0.09617511565057657, "learning_rate": 8.787878787878789e-05, "loss": 2.41928125, "step": 130000 }, { "epoch": 0.09691492423250408, "learning_rate": 8.777777777777778e-05, "loss": 2.41896875, "step": 131000 }, { "epoch": 0.0976547328144316, "learning_rate": 8.767676767676767e-05, "loss": 2.41890625, "step": 132000 }, { "epoch": 0.0983945413963591, "learning_rate": 8.757575757575758e-05, "loss": 2.423875, "step": 133000 }, { "epoch": 0.09913434997828661, "learning_rate": 8.747474747474747e-05, "loss": 2.41721875, "step": 134000 }, { "epoch": 0.09987415856021413, "learning_rate": 8.737373737373738e-05, "loss": 2.41221875, "step": 135000 }, { "epoch": 0.10061396714214164, "learning_rate": 8.727272727272727e-05, "loss": 2.41015625, "step": 136000 }, { "epoch": 0.10135377572406916, "learning_rate": 8.717171717171718e-05, "loss": 2.40334375, "step": 137000 }, { "epoch": 0.10209358430599666, "learning_rate": 8.707070707070707e-05, "loss": 2.4039375, "step": 138000 }, { "epoch": 0.10283339288792417, "learning_rate": 8.696969696969698e-05, "loss": 2.392, "step": 139000 }, { "epoch": 0.10357320146985169, "learning_rate": 8.686868686868688e-05, "loss": 2.3910625, "step": 140000 }, { "epoch": 0.1043130100517792, "learning_rate": 8.676767676767678e-05, "loss": 2.38865625, "step": 141000 }, { "epoch": 0.10505281863370672, "learning_rate": 8.666666666666667e-05, "loss": 2.3965, "step": 142000 }, { "epoch": 0.10579262721563423, "learning_rate": 8.656565656565657e-05, "loss": 2.37953125, "step": 143000 }, { "epoch": 0.10653243579756173, "learning_rate": 8.646464646464647e-05, "loss": 2.3775625, "step": 144000 }, { "epoch": 0.10727224437948925, "learning_rate": 8.636363636363637e-05, "loss": 2.3774375, "step": 145000 }, { "epoch": 0.10801205296141676, "learning_rate": 8.626262626262627e-05, "loss": 2.3824375, "step": 146000 }, { "epoch": 0.10875186154334428, "learning_rate": 8.616161616161616e-05, "loss": 2.3749375, "step": 147000 }, { "epoch": 0.10949167012527179, "learning_rate": 8.606060606060606e-05, "loss": 2.37059375, "step": 148000 }, { "epoch": 0.11023147870719929, "learning_rate": 8.595959595959596e-05, "loss": 2.36821875, "step": 149000 }, { "epoch": 0.11097128728912681, "learning_rate": 8.585858585858586e-05, "loss": 2.38290625, "step": 150000 }, { "epoch": 0.11171109587105432, "learning_rate": 8.575757575757576e-05, "loss": 2.36784375, "step": 151000 }, { "epoch": 0.11245090445298184, "learning_rate": 8.565656565656566e-05, "loss": 2.37403125, "step": 152000 }, { "epoch": 0.11319071303490935, "learning_rate": 8.555555555555556e-05, "loss": 2.3681875, "step": 153000 }, { "epoch": 0.11393052161683687, "learning_rate": 8.545454545454545e-05, "loss": 2.3626875, "step": 154000 }, { "epoch": 0.11467033019876437, "learning_rate": 8.535353535353535e-05, "loss": 2.365875, "step": 155000 }, { "epoch": 0.11541013878069188, "learning_rate": 8.525252525252526e-05, "loss": 2.3594375, "step": 156000 }, { "epoch": 0.1161499473626194, "learning_rate": 8.515151515151515e-05, "loss": 2.35259375, "step": 157000 }, { "epoch": 0.11688975594454691, "learning_rate": 8.505050505050506e-05, "loss": 2.35125, "step": 158000 }, { "epoch": 0.11762956452647443, "learning_rate": 8.494949494949495e-05, "loss": 2.35478125, "step": 159000 }, { "epoch": 0.11836937310840193, "learning_rate": 8.484848484848486e-05, "loss": 2.34390625, "step": 160000 }, { "epoch": 0.11910918169032944, "learning_rate": 8.474747474747475e-05, "loss": 2.3526875, "step": 161000 }, { "epoch": 0.11984899027225696, "learning_rate": 8.464646464646466e-05, "loss": 2.350125, "step": 162000 }, { "epoch": 0.12058879885418447, "learning_rate": 8.454545454545455e-05, "loss": 2.3663125, "step": 163000 }, { "epoch": 0.12132860743611198, "learning_rate": 8.444444444444444e-05, "loss": 2.3484375, "step": 164000 }, { "epoch": 0.12206841601803949, "learning_rate": 8.434343434343435e-05, "loss": 2.34578125, "step": 165000 }, { "epoch": 0.122808224599967, "learning_rate": 8.424242424242424e-05, "loss": 2.34928125, "step": 166000 }, { "epoch": 0.12354803318189451, "learning_rate": 8.414141414141415e-05, "loss": 2.336375, "step": 167000 }, { "epoch": 0.12428784176382203, "learning_rate": 8.404040404040404e-05, "loss": 2.34021875, "step": 168000 }, { "epoch": 0.12502765034574953, "learning_rate": 8.393939393939393e-05, "loss": 2.33871875, "step": 169000 }, { "epoch": 0.12576745892767704, "learning_rate": 8.383838383838384e-05, "loss": 2.3376875, "step": 170000 }, { "epoch": 0.12650726750960456, "learning_rate": 8.373737373737373e-05, "loss": 2.33028125, "step": 171000 }, { "epoch": 0.12724707609153207, "learning_rate": 8.363636363636364e-05, "loss": 2.3309375, "step": 172000 }, { "epoch": 0.1279868846734596, "learning_rate": 8.353535353535355e-05, "loss": 2.33528125, "step": 173000 }, { "epoch": 0.1287266932553871, "learning_rate": 8.343434343434344e-05, "loss": 2.3350625, "step": 174000 }, { "epoch": 0.12946650183731462, "learning_rate": 8.333333333333334e-05, "loss": 2.32559375, "step": 175000 }, { "epoch": 0.13020631041924213, "learning_rate": 8.323232323232324e-05, "loss": 2.321, "step": 176000 }, { "epoch": 0.13094611900116965, "learning_rate": 8.313131313131314e-05, "loss": 2.33025, "step": 177000 }, { "epoch": 0.13168592758309716, "learning_rate": 8.303030303030304e-05, "loss": 2.3170625, "step": 178000 }, { "epoch": 0.13242573616502465, "learning_rate": 8.292929292929293e-05, "loss": 2.3196875, "step": 179000 }, { "epoch": 0.13316554474695216, "learning_rate": 8.282828282828283e-05, "loss": 2.313375, "step": 180000 }, { "epoch": 0.13390535332887968, "learning_rate": 8.272727272727273e-05, "loss": 2.305875, "step": 181000 }, { "epoch": 0.1346451619108072, "learning_rate": 8.262626262626263e-05, "loss": 2.316, "step": 182000 }, { "epoch": 0.1353849704927347, "learning_rate": 8.252525252525253e-05, "loss": 2.30725, "step": 183000 }, { "epoch": 0.13612477907466222, "learning_rate": 8.242424242424243e-05, "loss": 2.3010625, "step": 184000 }, { "epoch": 0.13686458765658974, "learning_rate": 8.232323232323233e-05, "loss": 2.3125625, "step": 185000 }, { "epoch": 0.13760439623851725, "learning_rate": 8.222222222222222e-05, "loss": 2.31725, "step": 186000 }, { "epoch": 0.13834420482044477, "learning_rate": 8.212121212121212e-05, "loss": 2.2964375, "step": 187000 }, { "epoch": 0.13908401340237228, "learning_rate": 8.202020202020202e-05, "loss": 2.302, "step": 188000 }, { "epoch": 0.1398238219842998, "learning_rate": 8.191919191919192e-05, "loss": 2.3019375, "step": 189000 }, { "epoch": 0.14056363056622728, "learning_rate": 8.181818181818183e-05, "loss": 2.3020625, "step": 190000 }, { "epoch": 0.1413034391481548, "learning_rate": 8.171717171717172e-05, "loss": 2.3026875, "step": 191000 }, { "epoch": 0.1420432477300823, "learning_rate": 8.161616161616163e-05, "loss": 2.298875, "step": 192000 }, { "epoch": 0.14278305631200983, "learning_rate": 8.151515151515152e-05, "loss": 2.2983125, "step": 193000 }, { "epoch": 0.14352286489393734, "learning_rate": 8.141414141414141e-05, "loss": 2.2835625, "step": 194000 }, { "epoch": 0.14426267347586486, "learning_rate": 8.131313131313132e-05, "loss": 2.2885625, "step": 195000 }, { "epoch": 0.14500248205779237, "learning_rate": 8.121212121212121e-05, "loss": 2.295125, "step": 196000 }, { "epoch": 0.14574229063971988, "learning_rate": 8.111111111111112e-05, "loss": 2.2823125, "step": 197000 }, { "epoch": 0.1464820992216474, "learning_rate": 8.101010101010101e-05, "loss": 2.280375, "step": 198000 }, { "epoch": 0.14722190780357491, "learning_rate": 8.090909090909092e-05, "loss": 2.290125, "step": 199000 }, { "epoch": 0.14796171638550243, "learning_rate": 8.080808080808081e-05, "loss": 2.2868125, "step": 200000 }, { "epoch": 0.14870152496742992, "learning_rate": 8.07070707070707e-05, "loss": 2.2840625, "step": 201000 }, { "epoch": 0.14944133354935743, "learning_rate": 8.060606060606061e-05, "loss": 2.2799375, "step": 202000 }, { "epoch": 0.15018114213128494, "learning_rate": 8.05050505050505e-05, "loss": 2.285875, "step": 203000 }, { "epoch": 0.15092095071321246, "learning_rate": 8.040404040404041e-05, "loss": 2.344375, "step": 204000 }, { "epoch": 0.15166075929513997, "learning_rate": 8.03030303030303e-05, "loss": 2.2733125, "step": 205000 }, { "epoch": 0.1524005678770675, "learning_rate": 8.02020202020202e-05, "loss": 2.272125, "step": 206000 }, { "epoch": 0.153140376458995, "learning_rate": 8.010101010101011e-05, "loss": 2.271625, "step": 207000 }, { "epoch": 0.15388018504092252, "learning_rate": 8e-05, "loss": 2.278125, "step": 208000 }, { "epoch": 0.15461999362285003, "learning_rate": 7.989898989898991e-05, "loss": 2.275625, "step": 209000 }, { "epoch": 0.15535980220477755, "learning_rate": 7.97979797979798e-05, "loss": 2.2675, "step": 210000 }, { "epoch": 0.15609961078670503, "learning_rate": 7.96969696969697e-05, "loss": 2.2676875, "step": 211000 }, { "epoch": 0.15683941936863255, "learning_rate": 7.95959595959596e-05, "loss": 2.265875, "step": 212000 }, { "epoch": 0.15757922795056006, "learning_rate": 7.94949494949495e-05, "loss": 2.260125, "step": 213000 }, { "epoch": 0.15831903653248758, "learning_rate": 7.93939393939394e-05, "loss": 2.2750625, "step": 214000 } ], "max_steps": 1000000, "num_train_epochs": 1, "total_flos": 387919397980800000 }