|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995095635115253, |
|
"eval_steps": 500, |
|
"global_step": 1019, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000980872976949485, |
|
"grad_norm": 23.71615728078218, |
|
"learning_rate": 9.803921568627452e-08, |
|
"loss": 1.3172, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004904364884747425, |
|
"grad_norm": 21.50955102466688, |
|
"learning_rate": 4.901960784313725e-07, |
|
"loss": 1.3156, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00980872976949485, |
|
"grad_norm": 8.347835856988212, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 1.2118, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014713094654242276, |
|
"grad_norm": 9.70215349989816, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 1.0495, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0196174595389897, |
|
"grad_norm": 2.935709350105428, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.9169, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024521824423737126, |
|
"grad_norm": 2.3710459957915373, |
|
"learning_rate": 2.450980392156863e-06, |
|
"loss": 0.8718, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.029426189308484552, |
|
"grad_norm": 2.217748118460408, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.8404, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03433055419323198, |
|
"grad_norm": 2.2456090605656223, |
|
"learning_rate": 3.431372549019608e-06, |
|
"loss": 0.8207, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0392349190779794, |
|
"grad_norm": 2.1968777765698135, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.8027, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04413928396272683, |
|
"grad_norm": 2.3343295421758956, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.7888, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04904364884747425, |
|
"grad_norm": 2.3845961870372956, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 0.7703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.053948013732221675, |
|
"grad_norm": 2.3216256302247933, |
|
"learning_rate": 5.392156862745098e-06, |
|
"loss": 0.7541, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.058852378616969105, |
|
"grad_norm": 2.378678579603692, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.749, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06375674350171653, |
|
"grad_norm": 2.355836921671654, |
|
"learning_rate": 6.372549019607843e-06, |
|
"loss": 0.7258, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06866110838646396, |
|
"grad_norm": 2.480621419103395, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 0.7132, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07356547327121138, |
|
"grad_norm": 2.196577695253739, |
|
"learning_rate": 7.352941176470589e-06, |
|
"loss": 0.7168, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0784698381559588, |
|
"grad_norm": 2.418840940827789, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.7051, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08337420304070622, |
|
"grad_norm": 2.4161115457147577, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.6993, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08827856792545366, |
|
"grad_norm": 2.3049037332530804, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.6948, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09318293281020108, |
|
"grad_norm": 2.345395326875072, |
|
"learning_rate": 9.31372549019608e-06, |
|
"loss": 0.6859, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0980872976949485, |
|
"grad_norm": 2.299729975358926, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.6836, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10299166257969593, |
|
"grad_norm": 2.2610515025654117, |
|
"learning_rate": 9.999735917410952e-06, |
|
"loss": 0.6794, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10789602746444335, |
|
"grad_norm": 2.450010148482251, |
|
"learning_rate": 9.998122180387662e-06, |
|
"loss": 0.6765, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11280039234919079, |
|
"grad_norm": 2.221120129726642, |
|
"learning_rate": 9.995041891820093e-06, |
|
"loss": 0.6746, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11770475723393821, |
|
"grad_norm": 2.1057039809456377, |
|
"learning_rate": 9.990495955528073e-06, |
|
"loss": 0.6644, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12260912211868563, |
|
"grad_norm": 2.090893956936151, |
|
"learning_rate": 9.984485705382538e-06, |
|
"loss": 0.6695, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12751348700343307, |
|
"grad_norm": 2.0602352081070543, |
|
"learning_rate": 9.977012904914133e-06, |
|
"loss": 0.6519, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13241785188818048, |
|
"grad_norm": 2.0932998186794927, |
|
"learning_rate": 9.968079746795759e-06, |
|
"loss": 0.6657, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1373222167729279, |
|
"grad_norm": 2.136782363642342, |
|
"learning_rate": 9.957688852199201e-06, |
|
"loss": 0.6557, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14222658165767532, |
|
"grad_norm": 2.1261422382559623, |
|
"learning_rate": 9.945843270026021e-06, |
|
"loss": 0.6495, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14713094654242276, |
|
"grad_norm": 2.067127907721412, |
|
"learning_rate": 9.932546476012942e-06, |
|
"loss": 0.6411, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1520353114271702, |
|
"grad_norm": 2.040847654490592, |
|
"learning_rate": 9.91780237171201e-06, |
|
"loss": 0.6416, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1569396763119176, |
|
"grad_norm": 2.0912635989059787, |
|
"learning_rate": 9.901615283345782e-06, |
|
"loss": 0.6503, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16184404119666504, |
|
"grad_norm": 2.8372493265750873, |
|
"learning_rate": 9.883989960537934e-06, |
|
"loss": 0.6424, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.16674840608141245, |
|
"grad_norm": 2.0493928993359307, |
|
"learning_rate": 9.86493157491962e-06, |
|
"loss": 0.6387, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17165277096615988, |
|
"grad_norm": 2.083065623023665, |
|
"learning_rate": 9.84444571861201e-06, |
|
"loss": 0.6362, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.17655713585090732, |
|
"grad_norm": 2.066269679181421, |
|
"learning_rate": 9.822538402585451e-06, |
|
"loss": 0.6277, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.18146150073565473, |
|
"grad_norm": 2.1282623287152167, |
|
"learning_rate": 9.799216054895715e-06, |
|
"loss": 0.6274, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.18636586562040217, |
|
"grad_norm": 2.0335813176241095, |
|
"learning_rate": 9.774485518797892e-06, |
|
"loss": 0.6155, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19127023050514957, |
|
"grad_norm": 1.9384285409252677, |
|
"learning_rate": 9.748354050738416e-06, |
|
"loss": 0.638, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.196174595389897, |
|
"grad_norm": 2.006376224222685, |
|
"learning_rate": 9.720829318225897e-06, |
|
"loss": 0.613, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20107896027464445, |
|
"grad_norm": 2.088420694162267, |
|
"learning_rate": 9.691919397581304e-06, |
|
"loss": 0.6139, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.20598332515939186, |
|
"grad_norm": 2.0409644262977515, |
|
"learning_rate": 9.66163277156821e-06, |
|
"loss": 0.6068, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2108876900441393, |
|
"grad_norm": 2.0023467594132973, |
|
"learning_rate": 9.629978326903778e-06, |
|
"loss": 0.6084, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2157920549288867, |
|
"grad_norm": 1.904954310523434, |
|
"learning_rate": 9.596965351651204e-06, |
|
"loss": 0.6045, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.22069641981363414, |
|
"grad_norm": 1.964486390414263, |
|
"learning_rate": 9.562603532494432e-06, |
|
"loss": 0.6197, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.22560078469838157, |
|
"grad_norm": 2.0628213777647577, |
|
"learning_rate": 9.526902951895857e-06, |
|
"loss": 0.5853, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.23050514958312898, |
|
"grad_norm": 2.067943464789154, |
|
"learning_rate": 9.48987408513794e-06, |
|
"loss": 0.5892, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.23540951446787642, |
|
"grad_norm": 2.1163968024177757, |
|
"learning_rate": 9.451527797249538e-06, |
|
"loss": 0.5866, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.24031387935262383, |
|
"grad_norm": 2.218142662563601, |
|
"learning_rate": 9.411875339817886e-06, |
|
"loss": 0.5923, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.24521824423737126, |
|
"grad_norm": 2.0789211498714057, |
|
"learning_rate": 9.370928347687149e-06, |
|
"loss": 0.6067, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2501226091221187, |
|
"grad_norm": 2.00243401967041, |
|
"learning_rate": 9.328698835544516e-06, |
|
"loss": 0.5733, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.25502697400686614, |
|
"grad_norm": 1.9232684596963454, |
|
"learning_rate": 9.285199194394854e-06, |
|
"loss": 0.6039, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2599313388916135, |
|
"grad_norm": 2.4559722910125523, |
|
"learning_rate": 9.240442187924922e-06, |
|
"loss": 0.5837, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.26483570377636095, |
|
"grad_norm": 2.0642822474573235, |
|
"learning_rate": 9.19444094875825e-06, |
|
"loss": 0.5816, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2697400686611084, |
|
"grad_norm": 1.8431714309717544, |
|
"learning_rate": 9.147208974601762e-06, |
|
"loss": 0.5891, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2746444335458558, |
|
"grad_norm": 2.455382996504095, |
|
"learning_rate": 9.098760124285255e-06, |
|
"loss": 0.5739, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.27954879843060326, |
|
"grad_norm": 2.105877839231797, |
|
"learning_rate": 9.049108613694958e-06, |
|
"loss": 0.5664, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.28445316331535064, |
|
"grad_norm": 2.1572999228459637, |
|
"learning_rate": 8.998269011602283e-06, |
|
"loss": 0.5654, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2893575282000981, |
|
"grad_norm": 2.2354758276064257, |
|
"learning_rate": 8.94625623538905e-06, |
|
"loss": 0.5718, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2942618930848455, |
|
"grad_norm": 1.9951110551471705, |
|
"learning_rate": 8.893085546670426e-06, |
|
"loss": 0.5647, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29916625796959295, |
|
"grad_norm": 1.940858410217326, |
|
"learning_rate": 8.838772546816857e-06, |
|
"loss": 0.5503, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.3040706228543404, |
|
"grad_norm": 2.1124206964514567, |
|
"learning_rate": 8.783333172376292e-06, |
|
"loss": 0.5625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.30897498773908777, |
|
"grad_norm": 1.9926807872955052, |
|
"learning_rate": 8.726783690398091e-06, |
|
"loss": 0.5406, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3138793526238352, |
|
"grad_norm": 2.3776434685854664, |
|
"learning_rate": 8.669140693659928e-06, |
|
"loss": 0.5412, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.31878371750858264, |
|
"grad_norm": 2.0505624375679194, |
|
"learning_rate": 8.610421095799129e-06, |
|
"loss": 0.5465, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3236880823933301, |
|
"grad_norm": 1.9938642512875002, |
|
"learning_rate": 8.550642126349873e-06, |
|
"loss": 0.5448, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3285924472780775, |
|
"grad_norm": 1.8794344216432206, |
|
"learning_rate": 8.489821325687682e-06, |
|
"loss": 0.5309, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3334968121628249, |
|
"grad_norm": 1.9586224504819914, |
|
"learning_rate": 8.427976539882725e-06, |
|
"loss": 0.5256, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.33840117704757233, |
|
"grad_norm": 1.9633684416354464, |
|
"learning_rate": 8.365125915463406e-06, |
|
"loss": 0.528, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.34330554193231977, |
|
"grad_norm": 1.9574848872158568, |
|
"learning_rate": 8.301287894091812e-06, |
|
"loss": 0.5345, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3482099068170672, |
|
"grad_norm": 2.046001716842364, |
|
"learning_rate": 8.236481207152539e-06, |
|
"loss": 0.5392, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.35311427170181464, |
|
"grad_norm": 2.0110023874224257, |
|
"learning_rate": 8.170724870256526e-06, |
|
"loss": 0.5171, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.358018636586562, |
|
"grad_norm": 1.8982030843350457, |
|
"learning_rate": 8.104038177661484e-06, |
|
"loss": 0.5245, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.36292300147130946, |
|
"grad_norm": 1.9231079403397293, |
|
"learning_rate": 8.036440696610566e-06, |
|
"loss": 0.52, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3678273663560569, |
|
"grad_norm": 1.9562337288746108, |
|
"learning_rate": 7.967952261590936e-06, |
|
"loss": 0.5087, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.37273173124080433, |
|
"grad_norm": 1.9474638682438907, |
|
"learning_rate": 7.898592968513919e-06, |
|
"loss": 0.5085, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.37763609612555177, |
|
"grad_norm": 1.9123482797519735, |
|
"learning_rate": 7.828383168818457e-06, |
|
"loss": 0.5131, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.38254046101029915, |
|
"grad_norm": 2.057943038506519, |
|
"learning_rate": 7.757343463499577e-06, |
|
"loss": 0.4981, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3874448258950466, |
|
"grad_norm": 1.9838558136826598, |
|
"learning_rate": 7.685494697063627e-06, |
|
"loss": 0.5158, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.392349190779794, |
|
"grad_norm": 1.9089161018582137, |
|
"learning_rate": 7.612857951412085e-06, |
|
"loss": 0.5115, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.39725355566454146, |
|
"grad_norm": 2.053508845540271, |
|
"learning_rate": 7.5394545396556864e-06, |
|
"loss": 0.4983, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.4021579205492889, |
|
"grad_norm": 2.0381945487707225, |
|
"learning_rate": 7.465305999860728e-06, |
|
"loss": 0.4864, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4070622854340363, |
|
"grad_norm": 2.060283387944364, |
|
"learning_rate": 7.390434088729348e-06, |
|
"loss": 0.4858, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4119666503187837, |
|
"grad_norm": 2.108098690183231, |
|
"learning_rate": 7.314860775215674e-06, |
|
"loss": 0.4894, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.41687101520353115, |
|
"grad_norm": 1.9523420077195515, |
|
"learning_rate": 7.2386082340796715e-06, |
|
"loss": 0.5032, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4217753800882786, |
|
"grad_norm": 1.9725770065321593, |
|
"learning_rate": 7.1616988393806245e-06, |
|
"loss": 0.4917, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.426679744973026, |
|
"grad_norm": 1.989450857443718, |
|
"learning_rate": 7.0841551579121144e-06, |
|
"loss": 0.488, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4315841098577734, |
|
"grad_norm": 1.874422665252578, |
|
"learning_rate": 7.005999942580478e-06, |
|
"loss": 0.4871, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.43648847474252084, |
|
"grad_norm": 1.9628105831400913, |
|
"learning_rate": 6.927256125728624e-06, |
|
"loss": 0.4774, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4413928396272683, |
|
"grad_norm": 2.052494969259568, |
|
"learning_rate": 6.8479468124072146e-06, |
|
"loss": 0.4846, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4462972045120157, |
|
"grad_norm": 2.196992014411905, |
|
"learning_rate": 6.768095273595176e-06, |
|
"loss": 0.4761, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.45120156939676315, |
|
"grad_norm": 2.140187037948035, |
|
"learning_rate": 6.6877249393715115e-06, |
|
"loss": 0.4716, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4561059342815105, |
|
"grad_norm": 2.165318448275331, |
|
"learning_rate": 6.60685939204044e-06, |
|
"loss": 0.462, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.46101029916625796, |
|
"grad_norm": 2.0062687355632485, |
|
"learning_rate": 6.525522359211858e-06, |
|
"loss": 0.4592, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4659146640510054, |
|
"grad_norm": 1.9449243429974221, |
|
"learning_rate": 6.443737706839175e-06, |
|
"loss": 0.4662, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.47081902893575284, |
|
"grad_norm": 2.0062916535890816, |
|
"learning_rate": 6.36152943221656e-06, |
|
"loss": 0.4618, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4757233938205002, |
|
"grad_norm": 1.9536933195743733, |
|
"learning_rate": 6.278921656937631e-06, |
|
"loss": 0.4586, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.48062775870524765, |
|
"grad_norm": 2.1253405020175706, |
|
"learning_rate": 6.195938619817694e-06, |
|
"loss": 0.4643, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4855321235899951, |
|
"grad_norm": 2.0546536638691695, |
|
"learning_rate": 6.112604669781572e-06, |
|
"loss": 0.4553, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4904364884747425, |
|
"grad_norm": 1.9563960276975647, |
|
"learning_rate": 6.0289442587191405e-06, |
|
"loss": 0.4537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.49534085335948996, |
|
"grad_norm": 1.9189930888442277, |
|
"learning_rate": 5.944981934310627e-06, |
|
"loss": 0.4555, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5002452182442374, |
|
"grad_norm": 1.8664708836063784, |
|
"learning_rate": 5.860742332823831e-06, |
|
"loss": 0.4515, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5051495831289848, |
|
"grad_norm": 1.9544083422711673, |
|
"learning_rate": 5.776250171885329e-06, |
|
"loss": 0.447, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5100539480137323, |
|
"grad_norm": 2.0179051671028385, |
|
"learning_rate": 5.691530243227824e-06, |
|
"loss": 0.4386, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5149583128984796, |
|
"grad_norm": 2.0846338532033752, |
|
"learning_rate": 5.6066074054157385e-06, |
|
"loss": 0.4355, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.519862677783227, |
|
"grad_norm": 2.0263314582631153, |
|
"learning_rate": 5.521506576551196e-06, |
|
"loss": 0.4401, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5247670426679745, |
|
"grad_norm": 1.8778135562708458, |
|
"learning_rate": 5.436252726962553e-06, |
|
"loss": 0.4341, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5296714075527219, |
|
"grad_norm": 1.9757916591258975, |
|
"learning_rate": 5.350870871877577e-06, |
|
"loss": 0.4364, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5345757724374693, |
|
"grad_norm": 1.9531961318347626, |
|
"learning_rate": 5.265386064083481e-06, |
|
"loss": 0.4323, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5394801373222168, |
|
"grad_norm": 2.0725678291543628, |
|
"learning_rate": 5.179823386575908e-06, |
|
"loss": 0.4364, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5443845022069642, |
|
"grad_norm": 1.860156556204824, |
|
"learning_rate": 5.09420794519907e-06, |
|
"loss": 0.4329, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5492888670917117, |
|
"grad_norm": 1.8698424821946518, |
|
"learning_rate": 5.008564861279188e-06, |
|
"loss": 0.4143, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5541932319764591, |
|
"grad_norm": 1.9655955550507962, |
|
"learning_rate": 4.922919264253368e-06, |
|
"loss": 0.4248, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5590975968612065, |
|
"grad_norm": 1.9066406300773608, |
|
"learning_rate": 4.837296284296113e-06, |
|
"loss": 0.4186, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5640019617459539, |
|
"grad_norm": 1.80977031179867, |
|
"learning_rate": 4.75172104494561e-06, |
|
"loss": 0.4156, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5689063266307013, |
|
"grad_norm": 1.991595721949942, |
|
"learning_rate": 4.666218655731981e-06, |
|
"loss": 0.4156, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5738106915154487, |
|
"grad_norm": 1.910239431707766, |
|
"learning_rate": 4.580814204809618e-06, |
|
"loss": 0.3942, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5787150564001962, |
|
"grad_norm": 1.9231788230679294, |
|
"learning_rate": 4.495532751595813e-06, |
|
"loss": 0.4131, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5836194212849436, |
|
"grad_norm": 1.903193102248486, |
|
"learning_rate": 4.410399319417806e-06, |
|
"loss": 0.4128, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.588523786169691, |
|
"grad_norm": 1.8468443833129051, |
|
"learning_rate": 4.325438888170429e-06, |
|
"loss": 0.4007, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5934281510544385, |
|
"grad_norm": 1.8391801455451426, |
|
"learning_rate": 4.2406763869864965e-06, |
|
"loss": 0.4127, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5983325159391859, |
|
"grad_norm": 1.9158169069206314, |
|
"learning_rate": 4.156136686922083e-06, |
|
"loss": 0.4102, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6032368808239333, |
|
"grad_norm": 1.8336931901990852, |
|
"learning_rate": 4.071844593658841e-06, |
|
"loss": 0.3978, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6081412457086808, |
|
"grad_norm": 1.9026371408899738, |
|
"learning_rate": 3.987824840225512e-06, |
|
"loss": 0.4009, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6130456105934281, |
|
"grad_norm": 1.9012934657121805, |
|
"learning_rate": 3.904102079740753e-06, |
|
"loss": 0.3923, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6179499754781755, |
|
"grad_norm": 1.8972920333300498, |
|
"learning_rate": 3.820700878179389e-06, |
|
"loss": 0.3894, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.622854340362923, |
|
"grad_norm": 1.8312865024256686, |
|
"learning_rate": 3.73764570716427e-06, |
|
"loss": 0.3822, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6277587052476704, |
|
"grad_norm": 1.9183680244376244, |
|
"learning_rate": 3.654960936785783e-06, |
|
"loss": 0.3926, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6326630701324178, |
|
"grad_norm": 1.828330870696398, |
|
"learning_rate": 3.572670828451177e-06, |
|
"loss": 0.3924, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6375674350171653, |
|
"grad_norm": 1.8312336733506578, |
|
"learning_rate": 3.4907995277657624e-06, |
|
"loss": 0.3984, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6424717999019127, |
|
"grad_norm": 2.0339474627749436, |
|
"learning_rate": 3.4093710574480926e-06, |
|
"loss": 0.3737, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6473761647866602, |
|
"grad_norm": 1.8512202300281744, |
|
"learning_rate": 3.3284093102812144e-06, |
|
"loss": 0.3896, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6522805296714076, |
|
"grad_norm": 1.935099436122936, |
|
"learning_rate": 3.2479380421020336e-06, |
|
"loss": 0.3744, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.657184894556155, |
|
"grad_norm": 1.877516062960988, |
|
"learning_rate": 3.167980864830855e-06, |
|
"loss": 0.3872, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6620892594409024, |
|
"grad_norm": 1.8575937601231947, |
|
"learning_rate": 3.0885612395431765e-06, |
|
"loss": 0.3811, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6669936243256498, |
|
"grad_norm": 1.799905567165699, |
|
"learning_rate": 3.009702469585713e-06, |
|
"loss": 0.3793, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6718979892103972, |
|
"grad_norm": 1.9383699331479365, |
|
"learning_rate": 2.93142769373873e-06, |
|
"loss": 0.3712, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6768023540951447, |
|
"grad_norm": 1.9008755304822846, |
|
"learning_rate": 2.853759879426644e-06, |
|
"loss": 0.3738, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6817067189798921, |
|
"grad_norm": 1.8531666768609951, |
|
"learning_rate": 2.7767218159789067e-06, |
|
"loss": 0.3619, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6866110838646395, |
|
"grad_norm": 1.9787710274083234, |
|
"learning_rate": 2.7003361079431547e-06, |
|
"loss": 0.3733, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.691515448749387, |
|
"grad_norm": 1.797813113073451, |
|
"learning_rate": 2.624625168452568e-06, |
|
"loss": 0.3762, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6964198136341344, |
|
"grad_norm": 1.9305643695542356, |
|
"learning_rate": 2.5496112126493995e-06, |
|
"loss": 0.3712, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7013241785188818, |
|
"grad_norm": 1.7398004529962572, |
|
"learning_rate": 2.4753162511665936e-06, |
|
"loss": 0.366, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7062285434036293, |
|
"grad_norm": 2.0252060555010902, |
|
"learning_rate": 2.401762083669419e-06, |
|
"loss": 0.3626, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7111329082883766, |
|
"grad_norm": 1.7500094335967311, |
|
"learning_rate": 2.3289702924589914e-06, |
|
"loss": 0.3624, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.716037273173124, |
|
"grad_norm": 1.752557189300548, |
|
"learning_rate": 2.256962236139598e-06, |
|
"loss": 0.3677, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7209416380578715, |
|
"grad_norm": 1.784599164157859, |
|
"learning_rate": 2.18575904335163e-06, |
|
"loss": 0.3647, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7258460029426189, |
|
"grad_norm": 1.9166229348625452, |
|
"learning_rate": 2.115381606572018e-06, |
|
"loss": 0.3614, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7307503678273664, |
|
"grad_norm": 1.7664227370032382, |
|
"learning_rate": 2.0458505759839433e-06, |
|
"loss": 0.3539, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7356547327121138, |
|
"grad_norm": 1.7823664294805341, |
|
"learning_rate": 1.9771863534176544e-06, |
|
"loss": 0.3649, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7405590975968612, |
|
"grad_norm": 1.798651877731334, |
|
"learning_rate": 1.90940908636415e-06, |
|
"loss": 0.3584, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7454634624816087, |
|
"grad_norm": 1.7897055029417046, |
|
"learning_rate": 1.8425386620634961e-06, |
|
"loss": 0.3575, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7503678273663561, |
|
"grad_norm": 1.8567965533511697, |
|
"learning_rate": 1.7765947016694902e-06, |
|
"loss": 0.3597, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7552721922511035, |
|
"grad_norm": 1.7069946121225148, |
|
"learning_rate": 1.711596554492428e-06, |
|
"loss": 0.3569, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7601765571358509, |
|
"grad_norm": 1.7171226071674441, |
|
"learning_rate": 1.64756329232161e-06, |
|
"loss": 0.3508, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7650809220205983, |
|
"grad_norm": 1.851171218498648, |
|
"learning_rate": 1.5845137038292851e-06, |
|
"loss": 0.3505, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7699852869053457, |
|
"grad_norm": 1.7875922878269628, |
|
"learning_rate": 1.5224662890576781e-06, |
|
"loss": 0.3404, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7748896517900932, |
|
"grad_norm": 1.6638468790620988, |
|
"learning_rate": 1.4614392539906892e-06, |
|
"loss": 0.3522, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7797940166748406, |
|
"grad_norm": 1.7409632860061264, |
|
"learning_rate": 1.4014505052118893e-06, |
|
"loss": 0.353, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.784698381559588, |
|
"grad_norm": 1.815807330350682, |
|
"learning_rate": 1.3425176446503618e-06, |
|
"loss": 0.3414, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7896027464443355, |
|
"grad_norm": 1.8126274287911948, |
|
"learning_rate": 1.2846579644159291e-06, |
|
"loss": 0.3425, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7945071113290829, |
|
"grad_norm": 1.7603201856240744, |
|
"learning_rate": 1.2278884417253033e-06, |
|
"loss": 0.3453, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7994114762138304, |
|
"grad_norm": 1.7250692746456593, |
|
"learning_rate": 1.172225733920616e-06, |
|
"loss": 0.3456, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.8043158410985778, |
|
"grad_norm": 1.6574970310295125, |
|
"learning_rate": 1.1176861735818107e-06, |
|
"loss": 0.3357, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8092202059833251, |
|
"grad_norm": 1.7019799673778844, |
|
"learning_rate": 1.0642857637343346e-06, |
|
"loss": 0.3406, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8141245708680726, |
|
"grad_norm": 1.6768204828704758, |
|
"learning_rate": 1.0120401731535213e-06, |
|
"loss": 0.353, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.81902893575282, |
|
"grad_norm": 1.6058567769661107, |
|
"learning_rate": 9.609647317670468e-07, |
|
"loss": 0.3413, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8239333006375674, |
|
"grad_norm": 1.722906282315617, |
|
"learning_rate": 9.110744261568206e-07, |
|
"loss": 0.3329, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8288376655223149, |
|
"grad_norm": 1.722830977466264, |
|
"learning_rate": 8.623838951616076e-07, |
|
"loss": 0.3339, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8337420304070623, |
|
"grad_norm": 1.7043628706248817, |
|
"learning_rate": 8.149074255816996e-07, |
|
"loss": 0.3327, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8386463952918097, |
|
"grad_norm": 1.6620351875279176, |
|
"learning_rate": 7.68658947986874e-07, |
|
"loss": 0.3409, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8435507601765572, |
|
"grad_norm": 1.6249652725084447, |
|
"learning_rate": 7.236520326288721e-07, |
|
"loss": 0.3345, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8484551250613046, |
|
"grad_norm": 1.6797312028896152, |
|
"learning_rate": 6.79899885459619e-07, |
|
"loss": 0.3371, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.853359489946052, |
|
"grad_norm": 1.6133846444360203, |
|
"learning_rate": 6.374153442563192e-07, |
|
"loss": 0.3291, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8582638548307994, |
|
"grad_norm": 1.639961696466558, |
|
"learning_rate": 5.962108748545942e-07, |
|
"loss": 0.3405, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8631682197155468, |
|
"grad_norm": 1.7763694537319363, |
|
"learning_rate": 5.562985674907467e-07, |
|
"loss": 0.3377, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8680725846002942, |
|
"grad_norm": 1.7034248324336427, |
|
"learning_rate": 5.176901332542378e-07, |
|
"loss": 0.3406, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8729769494850417, |
|
"grad_norm": 1.6095220015390743, |
|
"learning_rate": 4.803969006514175e-07, |
|
"loss": 0.33, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8778813143697891, |
|
"grad_norm": 1.7111560907547738, |
|
"learning_rate": 4.444298122815055e-07, |
|
"loss": 0.335, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8827856792545365, |
|
"grad_norm": 1.6246588106178113, |
|
"learning_rate": 4.0979942162580387e-07, |
|
"loss": 0.3289, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.887690044139284, |
|
"grad_norm": 1.6674759393718703, |
|
"learning_rate": 3.76515889951099e-07, |
|
"loss": 0.3287, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8925944090240314, |
|
"grad_norm": 1.6046692843902606, |
|
"learning_rate": 3.445889833281296e-07, |
|
"loss": 0.3324, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8974987739087789, |
|
"grad_norm": 1.7079729479541892, |
|
"learning_rate": 3.140280697660247e-07, |
|
"loss": 0.3258, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9024031387935263, |
|
"grad_norm": 1.6511127606613865, |
|
"learning_rate": 2.8484211646353677e-07, |
|
"loss": 0.3266, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9073075036782736, |
|
"grad_norm": 1.6108380967394342, |
|
"learning_rate": 2.570396871778796e-07, |
|
"loss": 0.3285, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.912211868563021, |
|
"grad_norm": 1.7501453734514094, |
|
"learning_rate": 2.3062893971195211e-07, |
|
"loss": 0.3299, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9171162334477685, |
|
"grad_norm": 1.7120576016328979, |
|
"learning_rate": 2.0561762352066638e-07, |
|
"loss": 0.3261, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9220205983325159, |
|
"grad_norm": 1.6683684954483, |
|
"learning_rate": 1.8201307743709927e-07, |
|
"loss": 0.328, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9269249632172634, |
|
"grad_norm": 1.6981337204644897, |
|
"learning_rate": 1.5982222751913079e-07, |
|
"loss": 0.331, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9318293281020108, |
|
"grad_norm": 1.6609453351002048, |
|
"learning_rate": 1.390515850171953e-07, |
|
"loss": 0.3234, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9367336929867582, |
|
"grad_norm": 1.6636906278300567, |
|
"learning_rate": 1.1970724446374592e-07, |
|
"loss": 0.3336, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9416380578715057, |
|
"grad_norm": 1.5711270056253417, |
|
"learning_rate": 1.0179488188499675e-07, |
|
"loss": 0.3299, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9465424227562531, |
|
"grad_norm": 1.6157567675118045, |
|
"learning_rate": 8.531975313545715e-08, |
|
"loss": 0.3398, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9514467876410004, |
|
"grad_norm": 1.6523867015186364, |
|
"learning_rate": 7.028669235575714e-08, |
|
"loss": 0.3294, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9563511525257479, |
|
"grad_norm": 1.740288361114131, |
|
"learning_rate": 5.670011055421365e-08, |
|
"loss": 0.3335, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9612555174104953, |
|
"grad_norm": 1.621637272095815, |
|
"learning_rate": 4.4563994312546435e-08, |
|
"loss": 0.3295, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9661598822952427, |
|
"grad_norm": 1.657618038769045, |
|
"learning_rate": 3.3881904616137054e-08, |
|
"loss": 0.3266, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9710642471799902, |
|
"grad_norm": 1.6751008606908966, |
|
"learning_rate": 2.4656975809160267e-08, |
|
"loss": 0.3315, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9759686120647376, |
|
"grad_norm": 1.6937145693196844, |
|
"learning_rate": 1.689191467490303e-08, |
|
"loss": 0.3313, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.980872976949485, |
|
"grad_norm": 1.6407710130512811, |
|
"learning_rate": 1.058899964154092e-08, |
|
"loss": 0.3278, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9857773418342325, |
|
"grad_norm": 1.6749685115327173, |
|
"learning_rate": 5.750080113598455e-09, |
|
"loss": 0.3246, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9906817067189799, |
|
"grad_norm": 1.5703580183454553, |
|
"learning_rate": 2.376575929297076e-09, |
|
"loss": 0.3257, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9955860716037274, |
|
"grad_norm": 1.560579334168069, |
|
"learning_rate": 4.694769439445024e-10, |
|
"loss": 0.3258, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9995095635115253, |
|
"eval_loss": 0.33406102657318115, |
|
"eval_runtime": 96.9695, |
|
"eval_samples_per_second": 3.114, |
|
"eval_steps_per_second": 0.784, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.9995095635115253, |
|
"step": 1019, |
|
"total_flos": 213305524224000.0, |
|
"train_loss": 0.4876700218573169, |
|
"train_runtime": 22910.3839, |
|
"train_samples_per_second": 1.424, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1019, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 213305524224000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|