| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1389, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0021598272138228943, |
| "grad_norm": 2.878943681716919, |
| "learning_rate": 7.194244604316547e-08, |
| "loss": 0.7835, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004319654427645789, |
| "grad_norm": 2.902249813079834, |
| "learning_rate": 1.4388489208633095e-07, |
| "loss": 0.7896, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0064794816414686825, |
| "grad_norm": 2.8640873432159424, |
| "learning_rate": 2.1582733812949643e-07, |
| "loss": 0.7588, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008639308855291577, |
| "grad_norm": 2.825040102005005, |
| "learning_rate": 2.877697841726619e-07, |
| "loss": 0.779, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01079913606911447, |
| "grad_norm": 2.9926884174346924, |
| "learning_rate": 3.5971223021582736e-07, |
| "loss": 0.7816, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012958963282937365, |
| "grad_norm": 2.8692467212677, |
| "learning_rate": 4.3165467625899287e-07, |
| "loss": 0.7695, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01511879049676026, |
| "grad_norm": 2.79731822013855, |
| "learning_rate": 5.035971223021583e-07, |
| "loss": 0.7683, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.017278617710583154, |
| "grad_norm": 2.832988739013672, |
| "learning_rate": 5.755395683453238e-07, |
| "loss": 0.7865, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.019438444924406047, |
| "grad_norm": 2.787931442260742, |
| "learning_rate": 6.474820143884893e-07, |
| "loss": 0.7716, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02159827213822894, |
| "grad_norm": 2.6542158126831055, |
| "learning_rate": 7.194244604316547e-07, |
| "loss": 0.7708, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.023758099352051837, |
| "grad_norm": 2.5756170749664307, |
| "learning_rate": 7.913669064748202e-07, |
| "loss": 0.7548, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02591792656587473, |
| "grad_norm": 2.2221007347106934, |
| "learning_rate": 8.633093525179857e-07, |
| "loss": 0.7544, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.028077753779697623, |
| "grad_norm": 2.165950298309326, |
| "learning_rate": 9.352517985611512e-07, |
| "loss": 0.7345, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03023758099352052, |
| "grad_norm": 2.1415212154388428, |
| "learning_rate": 1.0071942446043167e-06, |
| "loss": 0.7375, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.032397408207343416, |
| "grad_norm": 2.045217275619507, |
| "learning_rate": 1.079136690647482e-06, |
| "loss": 0.7251, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03455723542116631, |
| "grad_norm": 1.8833245038986206, |
| "learning_rate": 1.1510791366906476e-06, |
| "loss": 0.734, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0367170626349892, |
| "grad_norm": 1.4383106231689453, |
| "learning_rate": 1.2230215827338131e-06, |
| "loss": 0.7126, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.038876889848812095, |
| "grad_norm": 1.3764389753341675, |
| "learning_rate": 1.2949640287769785e-06, |
| "loss": 0.69, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04103671706263499, |
| "grad_norm": 1.3699392080307007, |
| "learning_rate": 1.366906474820144e-06, |
| "loss": 0.7071, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04319654427645788, |
| "grad_norm": 1.2943273782730103, |
| "learning_rate": 1.4388489208633094e-06, |
| "loss": 0.686, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04535637149028078, |
| "grad_norm": 1.2634108066558838, |
| "learning_rate": 1.510791366906475e-06, |
| "loss": 0.6902, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.047516198704103674, |
| "grad_norm": 1.066751480102539, |
| "learning_rate": 1.5827338129496403e-06, |
| "loss": 0.6644, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04967602591792657, |
| "grad_norm": 1.004930019378662, |
| "learning_rate": 1.654676258992806e-06, |
| "loss": 0.6602, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05183585313174946, |
| "grad_norm": 0.9834485054016113, |
| "learning_rate": 1.7266187050359715e-06, |
| "loss": 0.6525, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05399568034557235, |
| "grad_norm": 0.9758538007736206, |
| "learning_rate": 1.7985611510791368e-06, |
| "loss": 0.6452, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.056155507559395246, |
| "grad_norm": 0.9222759008407593, |
| "learning_rate": 1.8705035971223024e-06, |
| "loss": 0.6485, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.058315334773218146, |
| "grad_norm": 0.8775356411933899, |
| "learning_rate": 1.942446043165468e-06, |
| "loss": 0.6388, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06047516198704104, |
| "grad_norm": 0.8008519411087036, |
| "learning_rate": 2.0143884892086333e-06, |
| "loss": 0.6328, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06263498920086392, |
| "grad_norm": 0.7609057426452637, |
| "learning_rate": 2.0863309352517987e-06, |
| "loss": 0.6253, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06479481641468683, |
| "grad_norm": 0.6197890043258667, |
| "learning_rate": 2.158273381294964e-06, |
| "loss": 0.6253, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06695464362850972, |
| "grad_norm": 0.6675652265548706, |
| "learning_rate": 2.23021582733813e-06, |
| "loss": 0.605, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06911447084233262, |
| "grad_norm": 0.6976248621940613, |
| "learning_rate": 2.302158273381295e-06, |
| "loss": 0.6077, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07127429805615551, |
| "grad_norm": 0.6653661131858826, |
| "learning_rate": 2.3741007194244605e-06, |
| "loss": 0.6021, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0734341252699784, |
| "grad_norm": 0.6243202090263367, |
| "learning_rate": 2.4460431654676263e-06, |
| "loss": 0.6147, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0755939524838013, |
| "grad_norm": 0.5303459167480469, |
| "learning_rate": 2.5179856115107916e-06, |
| "loss": 0.6, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07775377969762419, |
| "grad_norm": 0.48958107829093933, |
| "learning_rate": 2.589928057553957e-06, |
| "loss": 0.5829, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07991360691144708, |
| "grad_norm": 0.4979974031448364, |
| "learning_rate": 2.6618705035971228e-06, |
| "loss": 0.5852, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08207343412526998, |
| "grad_norm": 0.508642852306366, |
| "learning_rate": 2.733812949640288e-06, |
| "loss": 0.5827, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08423326133909287, |
| "grad_norm": 0.5054506063461304, |
| "learning_rate": 2.805755395683453e-06, |
| "loss": 0.5627, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08639308855291576, |
| "grad_norm": 0.42791351675987244, |
| "learning_rate": 2.877697841726619e-06, |
| "loss": 0.557, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08855291576673865, |
| "grad_norm": 0.3770763874053955, |
| "learning_rate": 2.949640287769784e-06, |
| "loss": 0.5452, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09071274298056156, |
| "grad_norm": 0.38157957792282104, |
| "learning_rate": 3.02158273381295e-06, |
| "loss": 0.5552, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09287257019438445, |
| "grad_norm": 0.4018012583255768, |
| "learning_rate": 3.0935251798561158e-06, |
| "loss": 0.5559, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09503239740820735, |
| "grad_norm": 0.3959904611110687, |
| "learning_rate": 3.1654676258992807e-06, |
| "loss": 0.5493, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09719222462203024, |
| "grad_norm": 0.38622933626174927, |
| "learning_rate": 3.237410071942446e-06, |
| "loss": 0.5512, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09935205183585313, |
| "grad_norm": 0.3973333239555359, |
| "learning_rate": 3.309352517985612e-06, |
| "loss": 0.5413, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10151187904967603, |
| "grad_norm": 0.3897247910499573, |
| "learning_rate": 3.381294964028777e-06, |
| "loss": 0.5223, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10367170626349892, |
| "grad_norm": 0.37678107619285583, |
| "learning_rate": 3.453237410071943e-06, |
| "loss": 0.5296, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10583153347732181, |
| "grad_norm": 0.33324435353279114, |
| "learning_rate": 3.525179856115108e-06, |
| "loss": 0.5184, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1079913606911447, |
| "grad_norm": 0.303320974111557, |
| "learning_rate": 3.5971223021582737e-06, |
| "loss": 0.5331, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1101511879049676, |
| "grad_norm": 0.30076754093170166, |
| "learning_rate": 3.669064748201439e-06, |
| "loss": 0.5331, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11231101511879049, |
| "grad_norm": 0.2589012086391449, |
| "learning_rate": 3.741007194244605e-06, |
| "loss": 0.5109, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11447084233261338, |
| "grad_norm": 0.2596394121646881, |
| "learning_rate": 3.81294964028777e-06, |
| "loss": 0.5227, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.11663066954643629, |
| "grad_norm": 0.255307137966156, |
| "learning_rate": 3.884892086330936e-06, |
| "loss": 0.5169, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11879049676025918, |
| "grad_norm": 0.2433944046497345, |
| "learning_rate": 3.956834532374101e-06, |
| "loss": 0.5161, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12095032397408208, |
| "grad_norm": 0.2333260476589203, |
| "learning_rate": 4.028776978417267e-06, |
| "loss": 0.5096, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12311015118790497, |
| "grad_norm": 0.22751125693321228, |
| "learning_rate": 4.100719424460432e-06, |
| "loss": 0.5115, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12526997840172785, |
| "grad_norm": 0.2149927169084549, |
| "learning_rate": 4.172661870503597e-06, |
| "loss": 0.5132, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12742980561555076, |
| "grad_norm": 0.22358939051628113, |
| "learning_rate": 4.244604316546763e-06, |
| "loss": 0.5057, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12958963282937366, |
| "grad_norm": 0.19954045116901398, |
| "learning_rate": 4.316546762589928e-06, |
| "loss": 0.4994, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13174946004319654, |
| "grad_norm": 0.1936485469341278, |
| "learning_rate": 4.388489208633094e-06, |
| "loss": 0.4954, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13390928725701945, |
| "grad_norm": 0.1977352499961853, |
| "learning_rate": 4.46043165467626e-06, |
| "loss": 0.4919, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.13606911447084233, |
| "grad_norm": 0.19697633385658264, |
| "learning_rate": 4.5323741007194245e-06, |
| "loss": 0.4895, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.13822894168466524, |
| "grad_norm": 0.2068362534046173, |
| "learning_rate": 4.60431654676259e-06, |
| "loss": 0.4848, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.14038876889848811, |
| "grad_norm": 0.2056417018175125, |
| "learning_rate": 4.676258992805755e-06, |
| "loss": 0.4901, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14254859611231102, |
| "grad_norm": 0.20445196330547333, |
| "learning_rate": 4.748201438848921e-06, |
| "loss": 0.4986, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1447084233261339, |
| "grad_norm": 0.17678698897361755, |
| "learning_rate": 4.820143884892087e-06, |
| "loss": 0.4784, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1468682505399568, |
| "grad_norm": 0.17606988549232483, |
| "learning_rate": 4.892086330935253e-06, |
| "loss": 0.4818, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1490280777537797, |
| "grad_norm": 0.1764959990978241, |
| "learning_rate": 4.9640287769784175e-06, |
| "loss": 0.4832, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1511879049676026, |
| "grad_norm": 0.18899278342723846, |
| "learning_rate": 5.035971223021583e-06, |
| "loss": 0.4832, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15334773218142547, |
| "grad_norm": 0.18127930164337158, |
| "learning_rate": 5.107913669064749e-06, |
| "loss": 0.4781, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.15550755939524838, |
| "grad_norm": 0.15677423775196075, |
| "learning_rate": 5.179856115107914e-06, |
| "loss": 0.4795, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.15766738660907129, |
| "grad_norm": 0.17852047085762024, |
| "learning_rate": 5.251798561151079e-06, |
| "loss": 0.4802, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.15982721382289417, |
| "grad_norm": 0.16051283478736877, |
| "learning_rate": 5.3237410071942456e-06, |
| "loss": 0.4758, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16198704103671707, |
| "grad_norm": 0.15272092819213867, |
| "learning_rate": 5.3956834532374105e-06, |
| "loss": 0.4742, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16414686825053995, |
| "grad_norm": 0.18069250881671906, |
| "learning_rate": 5.467625899280576e-06, |
| "loss": 0.4788, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.16630669546436286, |
| "grad_norm": 0.18495260179042816, |
| "learning_rate": 5.539568345323741e-06, |
| "loss": 0.477, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.16846652267818574, |
| "grad_norm": 0.15244323015213013, |
| "learning_rate": 5.611510791366906e-06, |
| "loss": 0.4738, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.17062634989200864, |
| "grad_norm": 0.15029869973659515, |
| "learning_rate": 5.683453237410073e-06, |
| "loss": 0.4809, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.17278617710583152, |
| "grad_norm": 0.15908615291118622, |
| "learning_rate": 5.755395683453238e-06, |
| "loss": 0.4682, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17494600431965443, |
| "grad_norm": 0.16395969688892365, |
| "learning_rate": 5.8273381294964035e-06, |
| "loss": 0.4786, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1771058315334773, |
| "grad_norm": 0.15997102856636047, |
| "learning_rate": 5.899280575539568e-06, |
| "loss": 0.4728, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.17926565874730022, |
| "grad_norm": 0.15442821383476257, |
| "learning_rate": 5.971223021582734e-06, |
| "loss": 0.4693, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.18142548596112312, |
| "grad_norm": 0.17457455396652222, |
| "learning_rate": 6.0431654676259e-06, |
| "loss": 0.4535, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.183585313174946, |
| "grad_norm": 0.17761239409446716, |
| "learning_rate": 6.115107913669065e-06, |
| "loss": 0.4615, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1857451403887689, |
| "grad_norm": 0.15749000012874603, |
| "learning_rate": 6.1870503597122315e-06, |
| "loss": 0.4757, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1879049676025918, |
| "grad_norm": 0.1500880867242813, |
| "learning_rate": 6.2589928057553964e-06, |
| "loss": 0.468, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1900647948164147, |
| "grad_norm": 0.16475360095500946, |
| "learning_rate": 6.330935251798561e-06, |
| "loss": 0.453, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19222462203023757, |
| "grad_norm": 0.15528172254562378, |
| "learning_rate": 6.402877697841727e-06, |
| "loss": 0.4606, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.19438444924406048, |
| "grad_norm": 0.18330231308937073, |
| "learning_rate": 6.474820143884892e-06, |
| "loss": 0.4645, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19654427645788336, |
| "grad_norm": 0.15349973738193512, |
| "learning_rate": 6.546762589928059e-06, |
| "loss": 0.4589, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.19870410367170627, |
| "grad_norm": 0.17889103293418884, |
| "learning_rate": 6.618705035971224e-06, |
| "loss": 0.4698, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.20086393088552915, |
| "grad_norm": 0.16917382180690765, |
| "learning_rate": 6.6906474820143886e-06, |
| "loss": 0.45, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.20302375809935205, |
| "grad_norm": 0.15472815930843353, |
| "learning_rate": 6.762589928057554e-06, |
| "loss": 0.4554, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.20518358531317496, |
| "grad_norm": 0.15166456997394562, |
| "learning_rate": 6.834532374100719e-06, |
| "loss": 0.4603, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.20734341252699784, |
| "grad_norm": 0.15480853617191315, |
| "learning_rate": 6.906474820143886e-06, |
| "loss": 0.4527, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.20950323974082075, |
| "grad_norm": 0.18076568841934204, |
| "learning_rate": 6.978417266187051e-06, |
| "loss": 0.4543, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.21166306695464362, |
| "grad_norm": 0.14898645877838135, |
| "learning_rate": 7.050359712230216e-06, |
| "loss": 0.4645, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.21382289416846653, |
| "grad_norm": 0.16191677749156952, |
| "learning_rate": 7.122302158273382e-06, |
| "loss": 0.4556, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2159827213822894, |
| "grad_norm": 0.15693144500255585, |
| "learning_rate": 7.194244604316547e-06, |
| "loss": 0.4636, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21814254859611232, |
| "grad_norm": 0.1577419489622116, |
| "learning_rate": 7.266187050359713e-06, |
| "loss": 0.445, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2203023758099352, |
| "grad_norm": 0.1567850261926651, |
| "learning_rate": 7.338129496402878e-06, |
| "loss": 0.4579, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2224622030237581, |
| "grad_norm": 0.15102896094322205, |
| "learning_rate": 7.410071942446043e-06, |
| "loss": 0.4381, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.22462203023758098, |
| "grad_norm": 0.18107765913009644, |
| "learning_rate": 7.48201438848921e-06, |
| "loss": 0.4479, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2267818574514039, |
| "grad_norm": 0.15492849051952362, |
| "learning_rate": 7.5539568345323745e-06, |
| "loss": 0.4466, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.22894168466522677, |
| "grad_norm": 0.16862063109874725, |
| "learning_rate": 7.62589928057554e-06, |
| "loss": 0.4556, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.23110151187904968, |
| "grad_norm": 0.1701633483171463, |
| "learning_rate": 7.697841726618706e-06, |
| "loss": 0.4483, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.23326133909287258, |
| "grad_norm": 0.18902191519737244, |
| "learning_rate": 7.769784172661872e-06, |
| "loss": 0.4383, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.23542116630669546, |
| "grad_norm": 0.16331182420253754, |
| "learning_rate": 7.841726618705036e-06, |
| "loss": 0.4438, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.23758099352051837, |
| "grad_norm": 0.18327923119068146, |
| "learning_rate": 7.913669064748202e-06, |
| "loss": 0.4535, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23974082073434125, |
| "grad_norm": 0.16586214303970337, |
| "learning_rate": 7.985611510791367e-06, |
| "loss": 0.452, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.24190064794816415, |
| "grad_norm": 0.1756211370229721, |
| "learning_rate": 8.057553956834533e-06, |
| "loss": 0.4461, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.24406047516198703, |
| "grad_norm": 0.17397738993167877, |
| "learning_rate": 8.129496402877699e-06, |
| "loss": 0.4444, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.24622030237580994, |
| "grad_norm": 0.1517469584941864, |
| "learning_rate": 8.201438848920865e-06, |
| "loss": 0.4423, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.24838012958963282, |
| "grad_norm": 0.15296703577041626, |
| "learning_rate": 8.273381294964029e-06, |
| "loss": 0.4434, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2505399568034557, |
| "grad_norm": 0.17677851021289825, |
| "learning_rate": 8.345323741007195e-06, |
| "loss": 0.4352, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2526997840172786, |
| "grad_norm": 0.1546233892440796, |
| "learning_rate": 8.41726618705036e-06, |
| "loss": 0.4416, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2548596112311015, |
| "grad_norm": 0.17565761506557465, |
| "learning_rate": 8.489208633093526e-06, |
| "loss": 0.4484, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2570194384449244, |
| "grad_norm": 0.1443185657262802, |
| "learning_rate": 8.561151079136692e-06, |
| "loss": 0.4291, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2591792656587473, |
| "grad_norm": 0.17720922827720642, |
| "learning_rate": 8.633093525179856e-06, |
| "loss": 0.4356, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2613390928725702, |
| "grad_norm": 0.17487414181232452, |
| "learning_rate": 8.705035971223022e-06, |
| "loss": 0.4465, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2634989200863931, |
| "grad_norm": 0.16723576188087463, |
| "learning_rate": 8.776978417266188e-06, |
| "loss": 0.4463, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.265658747300216, |
| "grad_norm": 0.19939404726028442, |
| "learning_rate": 8.848920863309353e-06, |
| "loss": 0.4387, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2678185745140389, |
| "grad_norm": 0.1569490283727646, |
| "learning_rate": 8.92086330935252e-06, |
| "loss": 0.4332, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.26997840172786175, |
| "grad_norm": 0.17922881245613098, |
| "learning_rate": 8.992805755395683e-06, |
| "loss": 0.4404, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.27213822894168466, |
| "grad_norm": 0.17273768782615662, |
| "learning_rate": 9.064748201438849e-06, |
| "loss": 0.4445, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.27429805615550756, |
| "grad_norm": 0.16782942414283752, |
| "learning_rate": 9.136690647482015e-06, |
| "loss": 0.4316, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.27645788336933047, |
| "grad_norm": 0.17636790871620178, |
| "learning_rate": 9.20863309352518e-06, |
| "loss": 0.4361, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2786177105831533, |
| "grad_norm": 0.18042488396167755, |
| "learning_rate": 9.280575539568346e-06, |
| "loss": 0.4316, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.28077753779697623, |
| "grad_norm": 0.21798282861709595, |
| "learning_rate": 9.35251798561151e-06, |
| "loss": 0.4384, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.28293736501079914, |
| "grad_norm": 0.18524324893951416, |
| "learning_rate": 9.424460431654678e-06, |
| "loss": 0.4434, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.28509719222462204, |
| "grad_norm": 0.19849282503128052, |
| "learning_rate": 9.496402877697842e-06, |
| "loss": 0.4454, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.28725701943844495, |
| "grad_norm": 0.17093205451965332, |
| "learning_rate": 9.568345323741008e-06, |
| "loss": 0.4449, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2894168466522678, |
| "grad_norm": 0.19003981351852417, |
| "learning_rate": 9.640287769784174e-06, |
| "loss": 0.4244, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2915766738660907, |
| "grad_norm": 0.2193020135164261, |
| "learning_rate": 9.712230215827338e-06, |
| "loss": 0.434, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2937365010799136, |
| "grad_norm": 0.19183115661144257, |
| "learning_rate": 9.784172661870505e-06, |
| "loss": 0.4259, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2958963282937365, |
| "grad_norm": 0.17214708030223846, |
| "learning_rate": 9.85611510791367e-06, |
| "loss": 0.4433, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2980561555075594, |
| "grad_norm": 0.16226549446582794, |
| "learning_rate": 9.928057553956835e-06, |
| "loss": 0.4389, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3002159827213823, |
| "grad_norm": 0.17609405517578125, |
| "learning_rate": 1e-05, |
| "loss": 0.4387, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3023758099352052, |
| "grad_norm": 0.15736715495586395, |
| "learning_rate": 9.999984208641271e-06, |
| "loss": 0.4324, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3045356371490281, |
| "grad_norm": 0.2223547101020813, |
| "learning_rate": 9.99993683466483e-06, |
| "loss": 0.4245, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.30669546436285094, |
| "grad_norm": 0.17344172298908234, |
| "learning_rate": 9.999857878369917e-06, |
| "loss": 0.4302, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.30885529157667385, |
| "grad_norm": 0.16877353191375732, |
| "learning_rate": 9.99974734025526e-06, |
| "loss": 0.4497, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.31101511879049676, |
| "grad_norm": 0.1692124605178833, |
| "learning_rate": 9.999605221019082e-06, |
| "loss": 0.4414, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.31317494600431967, |
| "grad_norm": 0.18339934945106506, |
| "learning_rate": 9.999431521559081e-06, |
| "loss": 0.4392, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.31533477321814257, |
| "grad_norm": 0.19719652831554413, |
| "learning_rate": 9.999226242972445e-06, |
| "loss": 0.4331, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3174946004319654, |
| "grad_norm": 0.14894719421863556, |
| "learning_rate": 9.998989386555815e-06, |
| "loss": 0.4344, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.31965442764578833, |
| "grad_norm": 0.20450158417224884, |
| "learning_rate": 9.998720953805312e-06, |
| "loss": 0.4397, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.32181425485961124, |
| "grad_norm": 0.1889685094356537, |
| "learning_rate": 9.9984209464165e-06, |
| "loss": 0.4297, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.32397408207343414, |
| "grad_norm": 0.16375744342803955, |
| "learning_rate": 9.998089366284392e-06, |
| "loss": 0.4228, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.326133909287257, |
| "grad_norm": 0.15281440317630768, |
| "learning_rate": 9.997726215503422e-06, |
| "loss": 0.4264, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3282937365010799, |
| "grad_norm": 0.16808317601680756, |
| "learning_rate": 9.997331496367455e-06, |
| "loss": 0.4247, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3304535637149028, |
| "grad_norm": 0.168230339884758, |
| "learning_rate": 9.996905211369748e-06, |
| "loss": 0.4245, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3326133909287257, |
| "grad_norm": 0.1692979782819748, |
| "learning_rate": 9.996447363202947e-06, |
| "loss": 0.4309, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3347732181425486, |
| "grad_norm": 0.190389946103096, |
| "learning_rate": 9.995957954759073e-06, |
| "loss": 0.4239, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3369330453563715, |
| "grad_norm": 0.18425118923187256, |
| "learning_rate": 9.995436989129495e-06, |
| "loss": 0.4316, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3390928725701944, |
| "grad_norm": 0.1809394657611847, |
| "learning_rate": 9.994884469604913e-06, |
| "loss": 0.4276, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3412526997840173, |
| "grad_norm": 0.20476803183555603, |
| "learning_rate": 9.994300399675342e-06, |
| "loss": 0.4375, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3434125269978402, |
| "grad_norm": 0.16786271333694458, |
| "learning_rate": 9.99368478303009e-06, |
| "loss": 0.4352, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.34557235421166305, |
| "grad_norm": 0.16701987385749817, |
| "learning_rate": 9.993037623557716e-06, |
| "loss": 0.4193, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.34773218142548595, |
| "grad_norm": 0.19547881186008453, |
| "learning_rate": 9.99235892534604e-06, |
| "loss": 0.4264, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.34989200863930886, |
| "grad_norm": 0.16596215963363647, |
| "learning_rate": 9.991648692682083e-06, |
| "loss": 0.4347, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.35205183585313177, |
| "grad_norm": 0.1804916262626648, |
| "learning_rate": 9.990906930052065e-06, |
| "loss": 0.4168, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3542116630669546, |
| "grad_norm": 0.16082727909088135, |
| "learning_rate": 9.990133642141359e-06, |
| "loss": 0.4281, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3563714902807775, |
| "grad_norm": 0.180884450674057, |
| "learning_rate": 9.989328833834472e-06, |
| "loss": 0.4318, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.35853131749460043, |
| "grad_norm": 0.16864454746246338, |
| "learning_rate": 9.988492510215011e-06, |
| "loss": 0.4306, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.36069114470842334, |
| "grad_norm": 0.17244219779968262, |
| "learning_rate": 9.987624676565652e-06, |
| "loss": 0.4282, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.36285097192224625, |
| "grad_norm": 0.1679103672504425, |
| "learning_rate": 9.986725338368103e-06, |
| "loss": 0.4195, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3650107991360691, |
| "grad_norm": 0.16607263684272766, |
| "learning_rate": 9.98579450130307e-06, |
| "loss": 0.4288, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.367170626349892, |
| "grad_norm": 0.16679252684116364, |
| "learning_rate": 9.98483217125023e-06, |
| "loss": 0.418, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3693304535637149, |
| "grad_norm": 0.15752755105495453, |
| "learning_rate": 9.983838354288181e-06, |
| "loss": 0.438, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3714902807775378, |
| "grad_norm": 0.1747094839811325, |
| "learning_rate": 9.982813056694411e-06, |
| "loss": 0.4316, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.37365010799136067, |
| "grad_norm": 0.1854209452867508, |
| "learning_rate": 9.981756284945256e-06, |
| "loss": 0.424, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3758099352051836, |
| "grad_norm": 0.1754128485918045, |
| "learning_rate": 9.980668045715864e-06, |
| "loss": 0.4115, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3779697624190065, |
| "grad_norm": 0.17791932821273804, |
| "learning_rate": 9.979548345880142e-06, |
| "loss": 0.4272, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3801295896328294, |
| "grad_norm": 0.15502074360847473, |
| "learning_rate": 9.978397192510722e-06, |
| "loss": 0.4161, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.38228941684665224, |
| "grad_norm": 0.1928102672100067, |
| "learning_rate": 9.977214592878917e-06, |
| "loss": 0.4202, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.38444924406047515, |
| "grad_norm": 0.1752733737230301, |
| "learning_rate": 9.976000554454668e-06, |
| "loss": 0.4251, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.38660907127429806, |
| "grad_norm": 0.15899012982845306, |
| "learning_rate": 9.974755084906503e-06, |
| "loss": 0.4212, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.38876889848812096, |
| "grad_norm": 0.20840278267860413, |
| "learning_rate": 9.97347819210148e-06, |
| "loss": 0.4193, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.39092872570194387, |
| "grad_norm": 0.16049212217330933, |
| "learning_rate": 9.972169884105155e-06, |
| "loss": 0.4222, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3930885529157667, |
| "grad_norm": 0.21681340038776398, |
| "learning_rate": 9.970830169181504e-06, |
| "loss": 0.4221, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3952483801295896, |
| "grad_norm": 0.20257696509361267, |
| "learning_rate": 9.969459055792903e-06, |
| "loss": 0.412, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.39740820734341253, |
| "grad_norm": 0.1784621775150299, |
| "learning_rate": 9.968056552600043e-06, |
| "loss": 0.4308, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.39956803455723544, |
| "grad_norm": 0.21011000871658325, |
| "learning_rate": 9.966622668461899e-06, |
| "loss": 0.4196, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4017278617710583, |
| "grad_norm": 0.17967236042022705, |
| "learning_rate": 9.965157412435663e-06, |
| "loss": 0.4171, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4038876889848812, |
| "grad_norm": 0.21680930256843567, |
| "learning_rate": 9.963660793776689e-06, |
| "loss": 0.4188, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.4060475161987041, |
| "grad_norm": 0.1738550364971161, |
| "learning_rate": 9.96213282193843e-06, |
| "loss": 0.4207, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.408207343412527, |
| "grad_norm": 0.1727888137102127, |
| "learning_rate": 9.960573506572391e-06, |
| "loss": 0.4244, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4103671706263499, |
| "grad_norm": 0.19244728982448578, |
| "learning_rate": 9.958982857528053e-06, |
| "loss": 0.4162, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.41252699784017277, |
| "grad_norm": 0.1902923285961151, |
| "learning_rate": 9.957360884852819e-06, |
| "loss": 0.4272, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4146868250539957, |
| "grad_norm": 0.15449483692646027, |
| "learning_rate": 9.955707598791952e-06, |
| "loss": 0.4103, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4168466522678186, |
| "grad_norm": 0.16577541828155518, |
| "learning_rate": 9.954023009788505e-06, |
| "loss": 0.4262, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4190064794816415, |
| "grad_norm": 0.17047494649887085, |
| "learning_rate": 9.952307128483257e-06, |
| "loss": 0.416, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.42116630669546434, |
| "grad_norm": 0.16605447232723236, |
| "learning_rate": 9.950559965714647e-06, |
| "loss": 0.4118, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.42332613390928725, |
| "grad_norm": 0.17296399176120758, |
| "learning_rate": 9.948781532518706e-06, |
| "loss": 0.415, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.42548596112311016, |
| "grad_norm": 0.16557732224464417, |
| "learning_rate": 9.946971840128982e-06, |
| "loss": 0.399, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.42764578833693306, |
| "grad_norm": 0.1601681262254715, |
| "learning_rate": 9.945130899976477e-06, |
| "loss": 0.4091, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.4298056155507559, |
| "grad_norm": 0.17228373885154724, |
| "learning_rate": 9.94325872368957e-06, |
| "loss": 0.4169, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4319654427645788, |
| "grad_norm": 0.1871252954006195, |
| "learning_rate": 9.941355323093944e-06, |
| "loss": 0.4064, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.43412526997840173, |
| "grad_norm": 0.16557608544826508, |
| "learning_rate": 9.939420710212511e-06, |
| "loss": 0.4022, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.43628509719222464, |
| "grad_norm": 0.19201870262622833, |
| "learning_rate": 9.937454897265338e-06, |
| "loss": 0.4106, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.43844492440604754, |
| "grad_norm": 0.20729224383831024, |
| "learning_rate": 9.935457896669568e-06, |
| "loss": 0.4231, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4406047516198704, |
| "grad_norm": 0.1870211958885193, |
| "learning_rate": 9.93342972103934e-06, |
| "loss": 0.4048, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4427645788336933, |
| "grad_norm": 0.17580384016036987, |
| "learning_rate": 9.931370383185717e-06, |
| "loss": 0.4088, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4449244060475162, |
| "grad_norm": 0.20925194025039673, |
| "learning_rate": 9.929279896116595e-06, |
| "loss": 0.4148, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4470842332613391, |
| "grad_norm": 0.2229665368795395, |
| "learning_rate": 9.927158273036624e-06, |
| "loss": 0.4185, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.44924406047516197, |
| "grad_norm": 0.1665569692850113, |
| "learning_rate": 9.925005527347132e-06, |
| "loss": 0.4137, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4514038876889849, |
| "grad_norm": 0.18300025165081024, |
| "learning_rate": 9.922821672646028e-06, |
| "loss": 0.4098, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4535637149028078, |
| "grad_norm": 0.21622765064239502, |
| "learning_rate": 9.920606722727726e-06, |
| "loss": 0.413, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4557235421166307, |
| "grad_norm": 0.1885174661874771, |
| "learning_rate": 9.918360691583056e-06, |
| "loss": 0.4156, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.45788336933045354, |
| "grad_norm": 0.20590178668498993, |
| "learning_rate": 9.916083593399167e-06, |
| "loss": 0.4192, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.46004319654427644, |
| "grad_norm": 0.19168834388256073, |
| "learning_rate": 9.913775442559451e-06, |
| "loss": 0.42, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.46220302375809935, |
| "grad_norm": 0.2033228576183319, |
| "learning_rate": 9.911436253643445e-06, |
| "loss": 0.4294, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.46436285097192226, |
| "grad_norm": 0.1603459119796753, |
| "learning_rate": 9.909066041426733e-06, |
| "loss": 0.4257, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.46652267818574517, |
| "grad_norm": 0.17825163900852203, |
| "learning_rate": 9.906664820880869e-06, |
| "loss": 0.4196, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.468682505399568, |
| "grad_norm": 0.19199080765247345, |
| "learning_rate": 9.904232607173262e-06, |
| "loss": 0.4213, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4708423326133909, |
| "grad_norm": 0.16068002581596375, |
| "learning_rate": 9.9017694156671e-06, |
| "loss": 0.4178, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.47300215982721383, |
| "grad_norm": 0.18598708510398865, |
| "learning_rate": 9.899275261921236e-06, |
| "loss": 0.4119, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.47516198704103674, |
| "grad_norm": 0.17735686898231506, |
| "learning_rate": 9.8967501616901e-06, |
| "loss": 0.4159, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4773218142548596, |
| "grad_norm": 0.20054501295089722, |
| "learning_rate": 9.894194130923602e-06, |
| "loss": 0.4228, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.4794816414686825, |
| "grad_norm": 0.1531924605369568, |
| "learning_rate": 9.891607185767018e-06, |
| "loss": 0.4182, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4816414686825054, |
| "grad_norm": 0.20048613846302032, |
| "learning_rate": 9.8889893425609e-06, |
| "loss": 0.4184, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4838012958963283, |
| "grad_norm": 0.16016018390655518, |
| "learning_rate": 9.886340617840968e-06, |
| "loss": 0.4162, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.48596112311015116, |
| "grad_norm": 0.17939400672912598, |
| "learning_rate": 9.883661028338009e-06, |
| "loss": 0.4216, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.48812095032397407, |
| "grad_norm": 0.17419300973415375, |
| "learning_rate": 9.880950590977764e-06, |
| "loss": 0.4165, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.490280777537797, |
| "grad_norm": 0.18040290474891663, |
| "learning_rate": 9.87820932288083e-06, |
| "loss": 0.4148, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4924406047516199, |
| "grad_norm": 0.2009221911430359, |
| "learning_rate": 9.875437241362546e-06, |
| "loss": 0.4088, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4946004319654428, |
| "grad_norm": 0.16184359788894653, |
| "learning_rate": 9.872634363932887e-06, |
| "loss": 0.4246, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.49676025917926564, |
| "grad_norm": 0.19945880770683289, |
| "learning_rate": 9.869800708296347e-06, |
| "loss": 0.415, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.49892008639308855, |
| "grad_norm": 0.1834121197462082, |
| "learning_rate": 9.866936292351837e-06, |
| "loss": 0.413, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5010799136069114, |
| "grad_norm": 0.19155217707157135, |
| "learning_rate": 9.864041134192563e-06, |
| "loss": 0.4145, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5032397408207343, |
| "grad_norm": 0.16527444124221802, |
| "learning_rate": 9.861115252105922e-06, |
| "loss": 0.411, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5053995680345572, |
| "grad_norm": 0.2018229365348816, |
| "learning_rate": 9.85815866457337e-06, |
| "loss": 0.4144, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5075593952483801, |
| "grad_norm": 0.18045048415660858, |
| "learning_rate": 9.855171390270325e-06, |
| "loss": 0.4173, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.509719222462203, |
| "grad_norm": 0.16335050761699677, |
| "learning_rate": 9.852153448066031e-06, |
| "loss": 0.4184, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5118790496760259, |
| "grad_norm": 0.1876971423625946, |
| "learning_rate": 9.849104857023455e-06, |
| "loss": 0.4149, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5140388768898488, |
| "grad_norm": 0.1632338911294937, |
| "learning_rate": 9.846025636399152e-06, |
| "loss": 0.4281, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5161987041036717, |
| "grad_norm": 0.1685461848974228, |
| "learning_rate": 9.842915805643156e-06, |
| "loss": 0.4168, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5183585313174947, |
| "grad_norm": 0.1753380447626114, |
| "learning_rate": 9.839775384398846e-06, |
| "loss": 0.4163, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5205183585313174, |
| "grad_norm": 0.17196574807167053, |
| "learning_rate": 9.836604392502829e-06, |
| "loss": 0.4264, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5226781857451404, |
| "grad_norm": 0.22572582960128784, |
| "learning_rate": 9.833402849984815e-06, |
| "loss": 0.4116, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5248380129589633, |
| "grad_norm": 0.16782043874263763, |
| "learning_rate": 9.830170777067486e-06, |
| "loss": 0.416, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5269978401727862, |
| "grad_norm": 0.1964120864868164, |
| "learning_rate": 9.82690819416637e-06, |
| "loss": 0.4189, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5291576673866091, |
| "grad_norm": 0.16382494568824768, |
| "learning_rate": 9.823615121889716e-06, |
| "loss": 0.4216, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.531317494600432, |
| "grad_norm": 0.19145262241363525, |
| "learning_rate": 9.820291581038354e-06, |
| "loss": 0.4069, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5334773218142549, |
| "grad_norm": 0.1793445199728012, |
| "learning_rate": 9.81693759260558e-06, |
| "loss": 0.4073, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5356371490280778, |
| "grad_norm": 0.20790617167949677, |
| "learning_rate": 9.813553177777005e-06, |
| "loss": 0.4098, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5377969762419006, |
| "grad_norm": 0.17739000916481018, |
| "learning_rate": 9.81013835793043e-06, |
| "loss": 0.416, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5399568034557235, |
| "grad_norm": 0.1868736743927002, |
| "learning_rate": 9.806693154635719e-06, |
| "loss": 0.4192, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5421166306695464, |
| "grad_norm": 0.2077651023864746, |
| "learning_rate": 9.803217589654642e-06, |
| "loss": 0.4001, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5442764578833693, |
| "grad_norm": 0.17842766642570496, |
| "learning_rate": 9.79971168494076e-06, |
| "loss": 0.4122, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5464362850971922, |
| "grad_norm": 0.20299148559570312, |
| "learning_rate": 9.796175462639273e-06, |
| "loss": 0.4164, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5485961123110151, |
| "grad_norm": 0.20451399683952332, |
| "learning_rate": 9.79260894508688e-06, |
| "loss": 0.4194, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.550755939524838, |
| "grad_norm": 0.16164958477020264, |
| "learning_rate": 9.789012154811648e-06, |
| "loss": 0.4037, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5529157667386609, |
| "grad_norm": 0.19269876182079315, |
| "learning_rate": 9.785385114532858e-06, |
| "loss": 0.4086, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5550755939524838, |
| "grad_norm": 0.22143694758415222, |
| "learning_rate": 9.781727847160865e-06, |
| "loss": 0.4205, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5572354211663066, |
| "grad_norm": 0.20241893827915192, |
| "learning_rate": 9.77804037579696e-06, |
| "loss": 0.4135, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5593952483801296, |
| "grad_norm": 0.19745588302612305, |
| "learning_rate": 9.774322723733216e-06, |
| "loss": 0.4129, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5615550755939525, |
| "grad_norm": 0.1914190798997879, |
| "learning_rate": 9.770574914452343e-06, |
| "loss": 0.4153, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5637149028077754, |
| "grad_norm": 0.18239063024520874, |
| "learning_rate": 9.766796971627543e-06, |
| "loss": 0.4183, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5658747300215983, |
| "grad_norm": 0.18472230434417725, |
| "learning_rate": 9.762988919122354e-06, |
| "loss": 0.4129, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5680345572354212, |
| "grad_norm": 0.18945036828517914, |
| "learning_rate": 9.759150780990508e-06, |
| "loss": 0.4145, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5701943844492441, |
| "grad_norm": 0.18478325009346008, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.4057, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.572354211663067, |
| "grad_norm": 0.21743497252464294, |
| "learning_rate": 9.751384345011787e-06, |
| "loss": 0.4161, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5745140388768899, |
| "grad_norm": 0.17114116251468658, |
| "learning_rate": 9.747456096221946e-06, |
| "loss": 0.4007, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5766738660907127, |
| "grad_norm": 0.187269389629364, |
| "learning_rate": 9.743497859919196e-06, |
| "loss": 0.4048, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5788336933045356, |
| "grad_norm": 0.16859321296215057, |
| "learning_rate": 9.739509661105912e-06, |
| "loss": 0.4109, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5809935205183585, |
| "grad_norm": 0.1999719887971878, |
| "learning_rate": 9.735491524973723e-06, |
| "loss": 0.3952, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5831533477321814, |
| "grad_norm": 0.176213800907135, |
| "learning_rate": 9.73144347690336e-06, |
| "loss": 0.4177, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5853131749460043, |
| "grad_norm": 0.1951010376214981, |
| "learning_rate": 9.727365542464498e-06, |
| "loss": 0.4164, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5874730021598272, |
| "grad_norm": 0.17570029199123383, |
| "learning_rate": 9.723257747415584e-06, |
| "loss": 0.4094, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5896328293736501, |
| "grad_norm": 0.179957315325737, |
| "learning_rate": 9.719120117703688e-06, |
| "loss": 0.406, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.591792656587473, |
| "grad_norm": 0.17086850106716156, |
| "learning_rate": 9.714952679464324e-06, |
| "loss": 0.403, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.593952483801296, |
| "grad_norm": 0.17228035628795624, |
| "learning_rate": 9.710755459021297e-06, |
| "loss": 0.4109, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5961123110151187, |
| "grad_norm": 0.19265015423297882, |
| "learning_rate": 9.706528482886535e-06, |
| "loss": 0.4209, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5982721382289417, |
| "grad_norm": 0.16357901692390442, |
| "learning_rate": 9.702271777759915e-06, |
| "loss": 0.4061, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6004319654427646, |
| "grad_norm": 0.17083071172237396, |
| "learning_rate": 9.697985370529101e-06, |
| "loss": 0.3996, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6025917926565875, |
| "grad_norm": 0.1811821013689041, |
| "learning_rate": 9.693669288269371e-06, |
| "loss": 0.4182, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6047516198704104, |
| "grad_norm": 0.1488206833600998, |
| "learning_rate": 9.689323558243446e-06, |
| "loss": 0.3981, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6069114470842333, |
| "grad_norm": 0.185231015086174, |
| "learning_rate": 9.684948207901315e-06, |
| "loss": 0.4132, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6090712742980562, |
| "grad_norm": 0.14964009821414948, |
| "learning_rate": 9.680543264880075e-06, |
| "loss": 0.4098, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6112311015118791, |
| "grad_norm": 0.16769437491893768, |
| "learning_rate": 9.676108757003735e-06, |
| "loss": 0.418, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6133909287257019, |
| "grad_norm": 0.1763710230588913, |
| "learning_rate": 9.671644712283061e-06, |
| "loss": 0.4111, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6155507559395248, |
| "grad_norm": 0.17033055424690247, |
| "learning_rate": 9.667151158915382e-06, |
| "loss": 0.4138, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6177105831533477, |
| "grad_norm": 0.21479171514511108, |
| "learning_rate": 9.662628125284426e-06, |
| "loss": 0.4164, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6198704103671706, |
| "grad_norm": 0.18288952112197876, |
| "learning_rate": 9.65807563996013e-06, |
| "loss": 0.416, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6220302375809935, |
| "grad_norm": 0.20399482548236847, |
| "learning_rate": 9.653493731698467e-06, |
| "loss": 0.4145, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6241900647948164, |
| "grad_norm": 0.19287261366844177, |
| "learning_rate": 9.648882429441258e-06, |
| "loss": 0.4131, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6263498920086393, |
| "grad_norm": 0.17563579976558685, |
| "learning_rate": 9.644241762315995e-06, |
| "loss": 0.4097, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6285097192224622, |
| "grad_norm": 0.18624839186668396, |
| "learning_rate": 9.639571759635655e-06, |
| "loss": 0.4176, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6306695464362851, |
| "grad_norm": 0.18379148840904236, |
| "learning_rate": 9.634872450898511e-06, |
| "loss": 0.4035, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6328293736501079, |
| "grad_norm": 0.1886526644229889, |
| "learning_rate": 9.630143865787951e-06, |
| "loss": 0.4068, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6349892008639308, |
| "grad_norm": 0.16463734209537506, |
| "learning_rate": 9.62538603417229e-06, |
| "loss": 0.4163, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6371490280777538, |
| "grad_norm": 0.1974654197692871, |
| "learning_rate": 9.620598986104578e-06, |
| "loss": 0.4039, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6393088552915767, |
| "grad_norm": 0.1882481724023819, |
| "learning_rate": 9.615782751822413e-06, |
| "loss": 0.4115, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6414686825053996, |
| "grad_norm": 0.15222138166427612, |
| "learning_rate": 9.610937361747747e-06, |
| "loss": 0.4045, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6436285097192225, |
| "grad_norm": 0.17053523659706116, |
| "learning_rate": 9.606062846486698e-06, |
| "loss": 0.4119, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6457883369330454, |
| "grad_norm": 0.15987005829811096, |
| "learning_rate": 9.601159236829353e-06, |
| "loss": 0.3964, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6479481641468683, |
| "grad_norm": 0.16534611582756042, |
| "learning_rate": 9.596226563749575e-06, |
| "loss": 0.4115, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6501079913606912, |
| "grad_norm": 0.1743890643119812, |
| "learning_rate": 9.591264858404809e-06, |
| "loss": 0.4241, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.652267818574514, |
| "grad_norm": 0.14473925530910492, |
| "learning_rate": 9.586274152135883e-06, |
| "loss": 0.4011, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6544276457883369, |
| "grad_norm": 0.1717105656862259, |
| "learning_rate": 9.58125447646681e-06, |
| "loss": 0.4128, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6565874730021598, |
| "grad_norm": 0.16893403232097626, |
| "learning_rate": 9.576205863104588e-06, |
| "loss": 0.3984, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6587473002159827, |
| "grad_norm": 0.19387434422969818, |
| "learning_rate": 9.571128343939006e-06, |
| "loss": 0.4086, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6609071274298056, |
| "grad_norm": 0.1532067507505417, |
| "learning_rate": 9.566021951042432e-06, |
| "loss": 0.413, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6630669546436285, |
| "grad_norm": 0.19082939624786377, |
| "learning_rate": 9.56088671666962e-06, |
| "loss": 0.4028, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6652267818574514, |
| "grad_norm": 0.1660735309123993, |
| "learning_rate": 9.555722673257502e-06, |
| "loss": 0.4048, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6673866090712743, |
| "grad_norm": 0.1646290272474289, |
| "learning_rate": 9.550529853424979e-06, |
| "loss": 0.401, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6695464362850972, |
| "grad_norm": 0.1946524977684021, |
| "learning_rate": 9.545308289972727e-06, |
| "loss": 0.3999, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.67170626349892, |
| "grad_norm": 0.1731284260749817, |
| "learning_rate": 9.54005801588298e-06, |
| "loss": 0.4056, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.673866090712743, |
| "grad_norm": 0.1577766239643097, |
| "learning_rate": 9.534779064319318e-06, |
| "loss": 0.3952, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6760259179265659, |
| "grad_norm": 0.20560896396636963, |
| "learning_rate": 9.529471468626472e-06, |
| "loss": 0.4082, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6781857451403888, |
| "grad_norm": 0.16146545112133026, |
| "learning_rate": 9.524135262330098e-06, |
| "loss": 0.4044, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6803455723542117, |
| "grad_norm": 0.18924373388290405, |
| "learning_rate": 9.51877047913658e-06, |
| "loss": 0.3949, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6825053995680346, |
| "grad_norm": 0.20120824873447418, |
| "learning_rate": 9.513377152932796e-06, |
| "loss": 0.4098, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6846652267818575, |
| "grad_norm": 0.17236529290676117, |
| "learning_rate": 9.507955317785935e-06, |
| "loss": 0.4005, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6868250539956804, |
| "grad_norm": 0.19479617476463318, |
| "learning_rate": 9.502505007943248e-06, |
| "loss": 0.4115, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6889848812095032, |
| "grad_norm": 0.18137769401073456, |
| "learning_rate": 9.497026257831856e-06, |
| "loss": 0.4006, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6911447084233261, |
| "grad_norm": 0.18386590480804443, |
| "learning_rate": 9.491519102058523e-06, |
| "loss": 0.4045, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.693304535637149, |
| "grad_norm": 0.18597717583179474, |
| "learning_rate": 9.48598357540944e-06, |
| "loss": 0.3974, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6954643628509719, |
| "grad_norm": 0.19069334864616394, |
| "learning_rate": 9.480419712849996e-06, |
| "loss": 0.4139, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6976241900647948, |
| "grad_norm": 0.18793267011642456, |
| "learning_rate": 9.474827549524574e-06, |
| "loss": 0.4105, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6997840172786177, |
| "grad_norm": 0.19101367890834808, |
| "learning_rate": 9.46920712075632e-06, |
| "loss": 0.3988, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7019438444924406, |
| "grad_norm": 0.15915150940418243, |
| "learning_rate": 9.463558462046912e-06, |
| "loss": 0.4052, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7041036717062635, |
| "grad_norm": 0.20149967074394226, |
| "learning_rate": 9.457881609076352e-06, |
| "loss": 0.4039, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7062634989200864, |
| "grad_norm": 0.1692509800195694, |
| "learning_rate": 9.452176597702724e-06, |
| "loss": 0.4146, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7084233261339092, |
| "grad_norm": 0.16798816621303558, |
| "learning_rate": 9.446443463961986e-06, |
| "loss": 0.3943, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7105831533477321, |
| "grad_norm": 0.16925224661827087, |
| "learning_rate": 9.440682244067724e-06, |
| "loss": 0.3992, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.712742980561555, |
| "grad_norm": 0.19609062373638153, |
| "learning_rate": 9.434892974410932e-06, |
| "loss": 0.4094, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.714902807775378, |
| "grad_norm": 0.19346529245376587, |
| "learning_rate": 9.429075691559788e-06, |
| "loss": 0.4018, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7170626349892009, |
| "grad_norm": 0.18897579610347748, |
| "learning_rate": 9.423230432259409e-06, |
| "loss": 0.4012, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7192224622030238, |
| "grad_norm": 0.16114945709705353, |
| "learning_rate": 9.41735723343163e-06, |
| "loss": 0.3988, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7213822894168467, |
| "grad_norm": 0.1889643669128418, |
| "learning_rate": 9.411456132174768e-06, |
| "loss": 0.3912, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7235421166306696, |
| "grad_norm": 0.20438078045845032, |
| "learning_rate": 9.405527165763384e-06, |
| "loss": 0.4036, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7257019438444925, |
| "grad_norm": 0.1676449030637741, |
| "learning_rate": 9.399570371648052e-06, |
| "loss": 0.4085, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7278617710583153, |
| "grad_norm": 0.23122479021549225, |
| "learning_rate": 9.393585787455125e-06, |
| "loss": 0.4075, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7300215982721382, |
| "grad_norm": 0.15428000688552856, |
| "learning_rate": 9.387573450986485e-06, |
| "loss": 0.3979, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7321814254859611, |
| "grad_norm": 0.1889045536518097, |
| "learning_rate": 9.381533400219319e-06, |
| "loss": 0.4004, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.734341252699784, |
| "grad_norm": 0.16855069994926453, |
| "learning_rate": 9.37546567330587e-06, |
| "loss": 0.4021, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7365010799136069, |
| "grad_norm": 0.15914303064346313, |
| "learning_rate": 9.369370308573198e-06, |
| "loss": 0.4147, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7386609071274298, |
| "grad_norm": 0.18533971905708313, |
| "learning_rate": 9.363247344522939e-06, |
| "loss": 0.4025, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7408207343412527, |
| "grad_norm": 0.15280672907829285, |
| "learning_rate": 9.357096819831065e-06, |
| "loss": 0.4061, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7429805615550756, |
| "grad_norm": 0.1812913715839386, |
| "learning_rate": 9.35091877334763e-06, |
| "loss": 0.4008, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7451403887688985, |
| "grad_norm": 0.19496847689151764, |
| "learning_rate": 9.344713244096533e-06, |
| "loss": 0.4063, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7473002159827213, |
| "grad_norm": 0.15390554070472717, |
| "learning_rate": 9.33848027127527e-06, |
| "loss": 0.3943, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7494600431965442, |
| "grad_norm": 0.18108762800693512, |
| "learning_rate": 9.332219894254686e-06, |
| "loss": 0.4037, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7516198704103672, |
| "grad_norm": 0.172384575009346, |
| "learning_rate": 9.325932152578726e-06, |
| "loss": 0.404, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7537796976241901, |
| "grad_norm": 0.1718224287033081, |
| "learning_rate": 9.319617085964177e-06, |
| "loss": 0.4098, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.755939524838013, |
| "grad_norm": 0.16733084619045258, |
| "learning_rate": 9.31327473430044e-06, |
| "loss": 0.41, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7580993520518359, |
| "grad_norm": 0.15835174918174744, |
| "learning_rate": 9.30690513764925e-06, |
| "loss": 0.4108, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7602591792656588, |
| "grad_norm": 0.16416366398334503, |
| "learning_rate": 9.300508336244443e-06, |
| "loss": 0.4123, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7624190064794817, |
| "grad_norm": 0.15685053169727325, |
| "learning_rate": 9.294084370491695e-06, |
| "loss": 0.4026, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7645788336933045, |
| "grad_norm": 0.17324267327785492, |
| "learning_rate": 9.287633280968263e-06, |
| "loss": 0.4043, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7667386609071274, |
| "grad_norm": 0.16480839252471924, |
| "learning_rate": 9.281155108422732e-06, |
| "loss": 0.3903, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7688984881209503, |
| "grad_norm": 0.155819833278656, |
| "learning_rate": 9.274649893774768e-06, |
| "loss": 0.4163, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7710583153347732, |
| "grad_norm": 0.1437472552061081, |
| "learning_rate": 9.268117678114833e-06, |
| "loss": 0.3983, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7732181425485961, |
| "grad_norm": 0.1644992083311081, |
| "learning_rate": 9.26155850270396e-06, |
| "loss": 0.4143, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.775377969762419, |
| "grad_norm": 0.15442179143428802, |
| "learning_rate": 9.25497240897346e-06, |
| "loss": 0.4186, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7775377969762419, |
| "grad_norm": 0.16961856186389923, |
| "learning_rate": 9.248359438524683e-06, |
| "loss": 0.4056, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7796976241900648, |
| "grad_norm": 0.14529763162136078, |
| "learning_rate": 9.241719633128743e-06, |
| "loss": 0.4081, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7818574514038877, |
| "grad_norm": 0.17451095581054688, |
| "learning_rate": 9.235053034726261e-06, |
| "loss": 0.4011, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7840172786177105, |
| "grad_norm": 0.16993848979473114, |
| "learning_rate": 9.228359685427095e-06, |
| "loss": 0.4126, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7861771058315334, |
| "grad_norm": 0.1698153018951416, |
| "learning_rate": 9.221639627510076e-06, |
| "loss": 0.3983, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7883369330453563, |
| "grad_norm": 0.15617668628692627, |
| "learning_rate": 9.214892903422745e-06, |
| "loss": 0.3894, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7904967602591793, |
| "grad_norm": 0.1748441755771637, |
| "learning_rate": 9.208119555781074e-06, |
| "loss": 0.4042, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7926565874730022, |
| "grad_norm": 0.18701235949993134, |
| "learning_rate": 9.201319627369211e-06, |
| "loss": 0.4166, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7948164146868251, |
| "grad_norm": 0.15359680354595184, |
| "learning_rate": 9.1944931611392e-06, |
| "loss": 0.4025, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.796976241900648, |
| "grad_norm": 0.17842437326908112, |
| "learning_rate": 9.18764020021071e-06, |
| "loss": 0.4157, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7991360691144709, |
| "grad_norm": 0.16838903725147247, |
| "learning_rate": 9.180760787870766e-06, |
| "loss": 0.4058, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8012958963282938, |
| "grad_norm": 0.17230413854122162, |
| "learning_rate": 9.173854967573479e-06, |
| "loss": 0.4063, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8034557235421166, |
| "grad_norm": 0.17813710868358612, |
| "learning_rate": 9.166922782939759e-06, |
| "loss": 0.4122, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8056155507559395, |
| "grad_norm": 0.19047455489635468, |
| "learning_rate": 9.159964277757054e-06, |
| "loss": 0.4026, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8077753779697624, |
| "grad_norm": 0.15476709604263306, |
| "learning_rate": 9.152979495979064e-06, |
| "loss": 0.3872, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8099352051835853, |
| "grad_norm": 0.15130369365215302, |
| "learning_rate": 9.145968481725466e-06, |
| "loss": 0.4018, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8120950323974082, |
| "grad_norm": 0.1687459796667099, |
| "learning_rate": 9.13893127928164e-06, |
| "loss": 0.3983, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8142548596112311, |
| "grad_norm": 0.1546049863100052, |
| "learning_rate": 9.131867933098379e-06, |
| "loss": 0.4109, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.816414686825054, |
| "grad_norm": 0.1616266667842865, |
| "learning_rate": 9.124778487791615e-06, |
| "loss": 0.4039, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8185745140388769, |
| "grad_norm": 0.1830417811870575, |
| "learning_rate": 9.117662988142138e-06, |
| "loss": 0.4053, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8207343412526998, |
| "grad_norm": 0.15087199211120605, |
| "learning_rate": 9.110521479095314e-06, |
| "loss": 0.4111, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8228941684665226, |
| "grad_norm": 0.15791049599647522, |
| "learning_rate": 9.10335400576079e-06, |
| "loss": 0.3882, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8250539956803455, |
| "grad_norm": 0.16011568903923035, |
| "learning_rate": 9.096160613412228e-06, |
| "loss": 0.4101, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8272138228941684, |
| "grad_norm": 0.1656263768672943, |
| "learning_rate": 9.088941347487004e-06, |
| "loss": 0.394, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8293736501079914, |
| "grad_norm": 0.15749986469745636, |
| "learning_rate": 9.08169625358592e-06, |
| "loss": 0.3972, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8315334773218143, |
| "grad_norm": 0.15940222144126892, |
| "learning_rate": 9.074425377472932e-06, |
| "loss": 0.4003, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8336933045356372, |
| "grad_norm": 0.17559286952018738, |
| "learning_rate": 9.067128765074842e-06, |
| "loss": 0.4046, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8358531317494601, |
| "grad_norm": 0.1646784096956253, |
| "learning_rate": 9.059806462481022e-06, |
| "loss": 0.3968, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.838012958963283, |
| "grad_norm": 0.16697706282138824, |
| "learning_rate": 9.052458515943112e-06, |
| "loss": 0.4146, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8401727861771058, |
| "grad_norm": 0.17301729321479797, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.4037, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8423326133909287, |
| "grad_norm": 0.1766882836818695, |
| "learning_rate": 9.037685876851211e-06, |
| "loss": 0.4019, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8444924406047516, |
| "grad_norm": 0.16974475979804993, |
| "learning_rate": 9.030261277609235e-06, |
| "loss": 0.3978, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8466522678185745, |
| "grad_norm": 0.17788070440292358, |
| "learning_rate": 9.022811221046618e-06, |
| "loss": 0.4062, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8488120950323974, |
| "grad_norm": 0.16667339205741882, |
| "learning_rate": 9.015335754221964e-06, |
| "loss": 0.4167, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8509719222462203, |
| "grad_norm": 0.15693309903144836, |
| "learning_rate": 9.007834924354384e-06, |
| "loss": 0.3988, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8531317494600432, |
| "grad_norm": 0.16362878680229187, |
| "learning_rate": 9.000308778823196e-06, |
| "loss": 0.3995, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8552915766738661, |
| "grad_norm": 0.14635585248470306, |
| "learning_rate": 8.992757365167625e-06, |
| "loss": 0.4028, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.857451403887689, |
| "grad_norm": 0.16527874767780304, |
| "learning_rate": 8.985180731086505e-06, |
| "loss": 0.406, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8596112311015118, |
| "grad_norm": 0.2163344919681549, |
| "learning_rate": 8.977578924437976e-06, |
| "loss": 0.3985, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8617710583153347, |
| "grad_norm": 0.14798112213611603, |
| "learning_rate": 8.969951993239177e-06, |
| "loss": 0.4011, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8639308855291576, |
| "grad_norm": 0.16196613013744354, |
| "learning_rate": 8.962299985665955e-06, |
| "loss": 0.4057, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8660907127429806, |
| "grad_norm": 0.15940962731838226, |
| "learning_rate": 8.954622950052543e-06, |
| "loss": 0.4027, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8682505399568035, |
| "grad_norm": 0.16603127121925354, |
| "learning_rate": 8.946920934891274e-06, |
| "loss": 0.4106, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8704103671706264, |
| "grad_norm": 0.16625916957855225, |
| "learning_rate": 8.939193988832261e-06, |
| "loss": 0.3997, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.8725701943844493, |
| "grad_norm": 0.17211325466632843, |
| "learning_rate": 8.931442160683094e-06, |
| "loss": 0.4036, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8747300215982722, |
| "grad_norm": 0.17657049000263214, |
| "learning_rate": 8.923665499408535e-06, |
| "loss": 0.393, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8768898488120951, |
| "grad_norm": 0.18346846103668213, |
| "learning_rate": 8.915864054130203e-06, |
| "loss": 0.3911, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8790496760259179, |
| "grad_norm": 0.17051193118095398, |
| "learning_rate": 8.908037874126263e-06, |
| "loss": 0.3916, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8812095032397408, |
| "grad_norm": 0.15643054246902466, |
| "learning_rate": 8.900187008831124e-06, |
| "loss": 0.3957, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8833693304535637, |
| "grad_norm": 0.18112455308437347, |
| "learning_rate": 8.892311507835118e-06, |
| "loss": 0.4006, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8855291576673866, |
| "grad_norm": 0.1472531408071518, |
| "learning_rate": 8.88441142088419e-06, |
| "loss": 0.3969, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8876889848812095, |
| "grad_norm": 0.16634514927864075, |
| "learning_rate": 8.87648679787958e-06, |
| "loss": 0.4052, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8898488120950324, |
| "grad_norm": 0.16606342792510986, |
| "learning_rate": 8.868537688877516e-06, |
| "loss": 0.3999, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8920086393088553, |
| "grad_norm": 0.16223309934139252, |
| "learning_rate": 8.860564144088891e-06, |
| "loss": 0.4053, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8941684665226782, |
| "grad_norm": 0.17775796353816986, |
| "learning_rate": 8.852566213878947e-06, |
| "loss": 0.3996, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.896328293736501, |
| "grad_norm": 0.16113241016864777, |
| "learning_rate": 8.844543948766958e-06, |
| "loss": 0.3874, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8984881209503239, |
| "grad_norm": 0.19586795568466187, |
| "learning_rate": 8.83649739942591e-06, |
| "loss": 0.4012, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9006479481641468, |
| "grad_norm": 0.18052950501441956, |
| "learning_rate": 8.828426616682184e-06, |
| "loss": 0.3973, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9028077753779697, |
| "grad_norm": 0.16518956422805786, |
| "learning_rate": 8.820331651515226e-06, |
| "loss": 0.3997, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9049676025917927, |
| "grad_norm": 0.1827470362186432, |
| "learning_rate": 8.81221255505724e-06, |
| "loss": 0.4008, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9071274298056156, |
| "grad_norm": 0.18678082525730133, |
| "learning_rate": 8.80406937859285e-06, |
| "loss": 0.3953, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9092872570194385, |
| "grad_norm": 0.1759604662656784, |
| "learning_rate": 8.795902173558784e-06, |
| "loss": 0.4037, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9114470842332614, |
| "grad_norm": 0.1986621916294098, |
| "learning_rate": 8.787710991543547e-06, |
| "loss": 0.4125, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9136069114470843, |
| "grad_norm": 0.19601307809352875, |
| "learning_rate": 8.779495884287099e-06, |
| "loss": 0.4018, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9157667386609071, |
| "grad_norm": 0.16699747741222382, |
| "learning_rate": 8.77125690368052e-06, |
| "loss": 0.4029, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.91792656587473, |
| "grad_norm": 0.16781239211559296, |
| "learning_rate": 8.76299410176569e-06, |
| "loss": 0.3956, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9200863930885529, |
| "grad_norm": 0.17204856872558594, |
| "learning_rate": 8.754707530734958e-06, |
| "loss": 0.4033, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9222462203023758, |
| "grad_norm": 0.1568082720041275, |
| "learning_rate": 8.74639724293081e-06, |
| "loss": 0.3937, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9244060475161987, |
| "grad_norm": 0.18325375020503998, |
| "learning_rate": 8.738063290845536e-06, |
| "loss": 0.4077, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9265658747300216, |
| "grad_norm": 0.15343928337097168, |
| "learning_rate": 8.729705727120911e-06, |
| "loss": 0.3997, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9287257019438445, |
| "grad_norm": 0.1750892996788025, |
| "learning_rate": 8.721324604547851e-06, |
| "loss": 0.4151, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9308855291576674, |
| "grad_norm": 0.17041905224323273, |
| "learning_rate": 8.712919976066078e-06, |
| "loss": 0.4051, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9330453563714903, |
| "grad_norm": 0.17677395045757294, |
| "learning_rate": 8.704491894763794e-06, |
| "loss": 0.4031, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9352051835853131, |
| "grad_norm": 0.2149730920791626, |
| "learning_rate": 8.696040413877344e-06, |
| "loss": 0.4029, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.937365010799136, |
| "grad_norm": 0.17261390388011932, |
| "learning_rate": 8.68756558679087e-06, |
| "loss": 0.3998, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9395248380129589, |
| "grad_norm": 0.17588981986045837, |
| "learning_rate": 8.679067467035989e-06, |
| "loss": 0.4127, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9416846652267818, |
| "grad_norm": 0.18429699540138245, |
| "learning_rate": 8.670546108291443e-06, |
| "loss": 0.3987, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9438444924406048, |
| "grad_norm": 0.15987183153629303, |
| "learning_rate": 8.662001564382768e-06, |
| "loss": 0.3911, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9460043196544277, |
| "grad_norm": 0.17549017071723938, |
| "learning_rate": 8.65343388928194e-06, |
| "loss": 0.4068, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9481641468682506, |
| "grad_norm": 0.1644325852394104, |
| "learning_rate": 8.644843137107058e-06, |
| "loss": 0.3938, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9503239740820735, |
| "grad_norm": 0.18092772364616394, |
| "learning_rate": 8.636229362121979e-06, |
| "loss": 0.4036, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9524838012958964, |
| "grad_norm": 0.19745442271232605, |
| "learning_rate": 8.627592618735989e-06, |
| "loss": 0.4131, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9546436285097192, |
| "grad_norm": 0.15399040281772614, |
| "learning_rate": 8.618932961503452e-06, |
| "loss": 0.3956, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9568034557235421, |
| "grad_norm": 0.21613968908786774, |
| "learning_rate": 8.610250445123472e-06, |
| "loss": 0.3957, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.958963282937365, |
| "grad_norm": 0.15756168961524963, |
| "learning_rate": 8.601545124439535e-06, |
| "loss": 0.401, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9611231101511879, |
| "grad_norm": 0.16475795209407806, |
| "learning_rate": 8.592817054439184e-06, |
| "loss": 0.4091, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9632829373650108, |
| "grad_norm": 0.17942647635936737, |
| "learning_rate": 8.584066290253649e-06, |
| "loss": 0.3818, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9654427645788337, |
| "grad_norm": 0.1804707795381546, |
| "learning_rate": 8.575292887157515e-06, |
| "loss": 0.4036, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.9676025917926566, |
| "grad_norm": 0.1610308587551117, |
| "learning_rate": 8.566496900568364e-06, |
| "loss": 0.4046, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9697624190064795, |
| "grad_norm": 0.17367208003997803, |
| "learning_rate": 8.557678386046429e-06, |
| "loss": 0.399, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9719222462203023, |
| "grad_norm": 0.16975344717502594, |
| "learning_rate": 8.548837399294235e-06, |
| "loss": 0.3973, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9740820734341252, |
| "grad_norm": 0.16336052119731903, |
| "learning_rate": 8.539973996156265e-06, |
| "loss": 0.4077, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.9762419006479481, |
| "grad_norm": 0.16016145050525665, |
| "learning_rate": 8.531088232618587e-06, |
| "loss": 0.4005, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.978401727861771, |
| "grad_norm": 0.15805621445178986, |
| "learning_rate": 8.522180164808515e-06, |
| "loss": 0.3885, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.980561555075594, |
| "grad_norm": 0.15626148879528046, |
| "learning_rate": 8.513249848994248e-06, |
| "loss": 0.3912, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9827213822894169, |
| "grad_norm": 0.1786354035139084, |
| "learning_rate": 8.504297341584509e-06, |
| "loss": 0.4034, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9848812095032398, |
| "grad_norm": 0.1438089907169342, |
| "learning_rate": 8.495322699128206e-06, |
| "loss": 0.4003, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9870410367170627, |
| "grad_norm": 0.16011767089366913, |
| "learning_rate": 8.486325978314054e-06, |
| "loss": 0.3985, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9892008639308856, |
| "grad_norm": 0.18413770198822021, |
| "learning_rate": 8.477307235970235e-06, |
| "loss": 0.3855, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9913606911447084, |
| "grad_norm": 0.15895338356494904, |
| "learning_rate": 8.468266529064025e-06, |
| "loss": 0.3918, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9935205183585313, |
| "grad_norm": 0.172573059797287, |
| "learning_rate": 8.459203914701444e-06, |
| "loss": 0.3903, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9956803455723542, |
| "grad_norm": 0.1525600552558899, |
| "learning_rate": 8.450119450126889e-06, |
| "loss": 0.4066, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9978401727861771, |
| "grad_norm": 0.1875782459974289, |
| "learning_rate": 8.441013192722774e-06, |
| "loss": 0.405, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.18118026852607727, |
| "learning_rate": 8.431885200009172e-06, |
| "loss": 0.402, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.0021598272138228, |
| "grad_norm": 0.1752985566854477, |
| "learning_rate": 8.422735529643445e-06, |
| "loss": 0.3926, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0043196544276458, |
| "grad_norm": 0.1703169196844101, |
| "learning_rate": 8.413564239419883e-06, |
| "loss": 0.3838, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.0064794816414686, |
| "grad_norm": 0.181954026222229, |
| "learning_rate": 8.404371387269341e-06, |
| "loss": 0.3863, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0086393088552916, |
| "grad_norm": 0.16215139627456665, |
| "learning_rate": 8.39515703125887e-06, |
| "loss": 0.3849, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0107991360691144, |
| "grad_norm": 0.23999503254890442, |
| "learning_rate": 8.385921229591351e-06, |
| "loss": 0.3917, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0129589632829374, |
| "grad_norm": 0.1752462089061737, |
| "learning_rate": 8.376664040605122e-06, |
| "loss": 0.3812, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.0151187904967602, |
| "grad_norm": 0.17159010469913483, |
| "learning_rate": 8.367385522773625e-06, |
| "loss": 0.386, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0172786177105833, |
| "grad_norm": 0.19381286203861237, |
| "learning_rate": 8.358085734705021e-06, |
| "loss": 0.3958, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.019438444924406, |
| "grad_norm": 0.17137818038463593, |
| "learning_rate": 8.348764735141823e-06, |
| "loss": 0.3867, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.0215982721382288, |
| "grad_norm": 0.18011374771595, |
| "learning_rate": 8.339422582960533e-06, |
| "loss": 0.3974, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.0237580993520519, |
| "grad_norm": 0.18092289566993713, |
| "learning_rate": 8.33005933717126e-06, |
| "loss": 0.3697, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.0259179265658747, |
| "grad_norm": 0.15359187126159668, |
| "learning_rate": 8.320675056917353e-06, |
| "loss": 0.3813, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0280777537796977, |
| "grad_norm": 0.16927658021450043, |
| "learning_rate": 8.311269801475026e-06, |
| "loss": 0.3834, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0302375809935205, |
| "grad_norm": 0.17222736775875092, |
| "learning_rate": 8.301843630252986e-06, |
| "loss": 0.3869, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.0323974082073435, |
| "grad_norm": 0.17333416640758514, |
| "learning_rate": 8.29239660279205e-06, |
| "loss": 0.3853, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0345572354211663, |
| "grad_norm": 0.18697193264961243, |
| "learning_rate": 8.282928778764783e-06, |
| "loss": 0.3974, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.0367170626349893, |
| "grad_norm": 0.1769992560148239, |
| "learning_rate": 8.273440217975103e-06, |
| "loss": 0.39, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.038876889848812, |
| "grad_norm": 0.1826915144920349, |
| "learning_rate": 8.26393098035792e-06, |
| "loss": 0.383, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.041036717062635, |
| "grad_norm": 0.18807494640350342, |
| "learning_rate": 8.254401125978744e-06, |
| "loss": 0.3875, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.043196544276458, |
| "grad_norm": 0.1729234904050827, |
| "learning_rate": 8.244850715033316e-06, |
| "loss": 0.3888, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.0453563714902807, |
| "grad_norm": 0.18379338085651398, |
| "learning_rate": 8.235279807847223e-06, |
| "loss": 0.3867, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.0475161987041037, |
| "grad_norm": 0.1450575441122055, |
| "learning_rate": 8.225688464875514e-06, |
| "loss": 0.3895, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.0496760259179265, |
| "grad_norm": 0.15889526903629303, |
| "learning_rate": 8.216076746702327e-06, |
| "loss": 0.3817, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.0518358531317495, |
| "grad_norm": 0.16916847229003906, |
| "learning_rate": 8.206444714040496e-06, |
| "loss": 0.382, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.0539956803455723, |
| "grad_norm": 0.1597558856010437, |
| "learning_rate": 8.196792427731175e-06, |
| "loss": 0.3905, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.0561555075593954, |
| "grad_norm": 0.1566566675901413, |
| "learning_rate": 8.18711994874345e-06, |
| "loss": 0.3841, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.0583153347732182, |
| "grad_norm": 0.17559486627578735, |
| "learning_rate": 8.177427338173955e-06, |
| "loss": 0.3792, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.060475161987041, |
| "grad_norm": 0.15165618062019348, |
| "learning_rate": 8.167714657246486e-06, |
| "loss": 0.3804, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.062634989200864, |
| "grad_norm": 0.15612950921058655, |
| "learning_rate": 8.157981967311614e-06, |
| "loss": 0.382, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.0647948164146868, |
| "grad_norm": 0.16774365305900574, |
| "learning_rate": 8.1482293298463e-06, |
| "loss": 0.3905, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.0669546436285098, |
| "grad_norm": 0.1574973613023758, |
| "learning_rate": 8.138456806453503e-06, |
| "loss": 0.3881, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.0691144708423326, |
| "grad_norm": 0.20335029065608978, |
| "learning_rate": 8.12866445886179e-06, |
| "loss": 0.3752, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.0712742980561556, |
| "grad_norm": 0.15830448269844055, |
| "learning_rate": 8.118852348924951e-06, |
| "loss": 0.3814, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.0734341252699784, |
| "grad_norm": 0.20952075719833374, |
| "learning_rate": 8.109020538621607e-06, |
| "loss": 0.3798, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.0755939524838012, |
| "grad_norm": 0.18261830508708954, |
| "learning_rate": 8.099169090054812e-06, |
| "loss": 0.3895, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.0777537796976242, |
| "grad_norm": 0.20644772052764893, |
| "learning_rate": 8.089298065451673e-06, |
| "loss": 0.3744, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.079913606911447, |
| "grad_norm": 0.17039693892002106, |
| "learning_rate": 8.079407527162944e-06, |
| "loss": 0.385, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.08207343412527, |
| "grad_norm": 0.1829117089509964, |
| "learning_rate": 8.069497537662638e-06, |
| "loss": 0.3745, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.0842332613390928, |
| "grad_norm": 0.16001296043395996, |
| "learning_rate": 8.05956815954764e-06, |
| "loss": 0.3796, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.0863930885529158, |
| "grad_norm": 0.1937176138162613, |
| "learning_rate": 8.049619455537296e-06, |
| "loss": 0.3814, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.0885529157667386, |
| "grad_norm": 0.15796703100204468, |
| "learning_rate": 8.039651488473028e-06, |
| "loss": 0.3804, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.0907127429805616, |
| "grad_norm": 0.21041610836982727, |
| "learning_rate": 8.029664321317932e-06, |
| "loss": 0.3862, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.0928725701943844, |
| "grad_norm": 0.18780550360679626, |
| "learning_rate": 8.019658017156384e-06, |
| "loss": 0.3807, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.0950323974082075, |
| "grad_norm": 0.1692945808172226, |
| "learning_rate": 8.009632639193643e-06, |
| "loss": 0.3845, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.0971922246220303, |
| "grad_norm": 0.18981167674064636, |
| "learning_rate": 7.999588250755442e-06, |
| "loss": 0.3848, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.099352051835853, |
| "grad_norm": 0.15760387480258942, |
| "learning_rate": 7.989524915287595e-06, |
| "loss": 0.3757, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.101511879049676, |
| "grad_norm": 0.140371173620224, |
| "learning_rate": 7.979442696355601e-06, |
| "loss": 0.3825, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1036717062634989, |
| "grad_norm": 0.15832389891147614, |
| "learning_rate": 7.969341657644236e-06, |
| "loss": 0.3863, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.1058315334773219, |
| "grad_norm": 0.14990824460983276, |
| "learning_rate": 7.959221862957149e-06, |
| "loss": 0.3917, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1079913606911447, |
| "grad_norm": 0.15084333717823029, |
| "learning_rate": 7.94908337621646e-06, |
| "loss": 0.3863, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.1101511879049677, |
| "grad_norm": 0.15440189838409424, |
| "learning_rate": 7.938926261462366e-06, |
| "loss": 0.3785, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.1123110151187905, |
| "grad_norm": 0.16310814023017883, |
| "learning_rate": 7.928750582852722e-06, |
| "loss": 0.3796, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.1144708423326133, |
| "grad_norm": 0.15400250256061554, |
| "learning_rate": 7.918556404662645e-06, |
| "loss": 0.3913, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1166306695464363, |
| "grad_norm": 0.16480040550231934, |
| "learning_rate": 7.908343791284104e-06, |
| "loss": 0.3817, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.118790496760259, |
| "grad_norm": 0.14894555509090424, |
| "learning_rate": 7.898112807225517e-06, |
| "loss": 0.3797, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.1209503239740821, |
| "grad_norm": 0.16937804222106934, |
| "learning_rate": 7.887863517111337e-06, |
| "loss": 0.3832, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.123110151187905, |
| "grad_norm": 0.1606750190258026, |
| "learning_rate": 7.877595985681656e-06, |
| "loss": 0.3735, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.125269978401728, |
| "grad_norm": 0.1703948825597763, |
| "learning_rate": 7.867310277791778e-06, |
| "loss": 0.3754, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.1274298056155507, |
| "grad_norm": 0.1625399887561798, |
| "learning_rate": 7.857006458411826e-06, |
| "loss": 0.3773, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.1295896328293737, |
| "grad_norm": 0.17872779071331024, |
| "learning_rate": 7.846684592626324e-06, |
| "loss": 0.3867, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1317494600431965, |
| "grad_norm": 0.14789296686649323, |
| "learning_rate": 7.836344745633785e-06, |
| "loss": 0.3794, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1339092872570196, |
| "grad_norm": 0.15560902655124664, |
| "learning_rate": 7.8259869827463e-06, |
| "loss": 0.3795, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.1360691144708424, |
| "grad_norm": 0.1677931696176529, |
| "learning_rate": 7.815611369389134e-06, |
| "loss": 0.3921, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.1382289416846652, |
| "grad_norm": 0.15654879808425903, |
| "learning_rate": 7.805217971100295e-06, |
| "loss": 0.3893, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.1403887688984882, |
| "grad_norm": 0.15903332829475403, |
| "learning_rate": 7.794806853530139e-06, |
| "loss": 0.3791, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.142548596112311, |
| "grad_norm": 0.1683822125196457, |
| "learning_rate": 7.78437808244094e-06, |
| "loss": 0.3877, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.144708423326134, |
| "grad_norm": 0.15309610962867737, |
| "learning_rate": 7.773931723706487e-06, |
| "loss": 0.3746, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.1468682505399568, |
| "grad_norm": 0.14578138291835785, |
| "learning_rate": 7.763467843311658e-06, |
| "loss": 0.3767, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.1490280777537798, |
| "grad_norm": 0.16950742900371552, |
| "learning_rate": 7.752986507352009e-06, |
| "loss": 0.3873, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.1511879049676026, |
| "grad_norm": 0.1471281796693802, |
| "learning_rate": 7.742487782033352e-06, |
| "loss": 0.3837, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.1533477321814254, |
| "grad_norm": 0.14385339617729187, |
| "learning_rate": 7.731971733671347e-06, |
| "loss": 0.3944, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.1555075593952484, |
| "grad_norm": 0.14128537476062775, |
| "learning_rate": 7.721438428691065e-06, |
| "loss": 0.3802, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.1576673866090712, |
| "grad_norm": 0.1677146852016449, |
| "learning_rate": 7.71088793362659e-06, |
| "loss": 0.3812, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.1598272138228942, |
| "grad_norm": 0.14564774930477142, |
| "learning_rate": 7.70032031512058e-06, |
| "loss": 0.3827, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.161987041036717, |
| "grad_norm": 0.15598656237125397, |
| "learning_rate": 7.689735639923857e-06, |
| "loss": 0.3829, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.16414686825054, |
| "grad_norm": 0.14980514347553253, |
| "learning_rate": 7.679133974894984e-06, |
| "loss": 0.3767, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.1663066954643628, |
| "grad_norm": 0.15688128769397736, |
| "learning_rate": 7.668515386999837e-06, |
| "loss": 0.3931, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1684665226781856, |
| "grad_norm": 0.15419645607471466, |
| "learning_rate": 7.65787994331119e-06, |
| "loss": 0.375, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.1706263498920086, |
| "grad_norm": 0.15213316679000854, |
| "learning_rate": 7.647227711008288e-06, |
| "loss": 0.3841, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.1727861771058314, |
| "grad_norm": 0.14635787904262543, |
| "learning_rate": 7.636558757376413e-06, |
| "loss": 0.379, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.1749460043196545, |
| "grad_norm": 0.1601177304983139, |
| "learning_rate": 7.6258731498064796e-06, |
| "loss": 0.3741, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.1771058315334773, |
| "grad_norm": 0.15203504264354706, |
| "learning_rate": 7.615170955794592e-06, |
| "loss": 0.3764, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.1792656587473003, |
| "grad_norm": 0.1715112179517746, |
| "learning_rate": 7.604452242941622e-06, |
| "loss": 0.3811, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.181425485961123, |
| "grad_norm": 0.17397920787334442, |
| "learning_rate": 7.593717078952788e-06, |
| "loss": 0.3826, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.183585313174946, |
| "grad_norm": 0.14259247481822968, |
| "learning_rate": 7.582965531637221e-06, |
| "loss": 0.3725, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.1857451403887689, |
| "grad_norm": 0.16911283135414124, |
| "learning_rate": 7.572197668907533e-06, |
| "loss": 0.3915, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.187904967602592, |
| "grad_norm": 0.1575639694929123, |
| "learning_rate": 7.561413558779401e-06, |
| "loss": 0.3719, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.1900647948164147, |
| "grad_norm": 0.15729346871376038, |
| "learning_rate": 7.550613269371124e-06, |
| "loss": 0.3802, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.1922246220302375, |
| "grad_norm": 0.16103574633598328, |
| "learning_rate": 7.5397968689032e-06, |
| "loss": 0.379, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.1943844492440605, |
| "grad_norm": 0.16614358127117157, |
| "learning_rate": 7.528964425697895e-06, |
| "loss": 0.3874, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.1965442764578833, |
| "grad_norm": 0.14216990768909454, |
| "learning_rate": 7.518116008178805e-06, |
| "loss": 0.3791, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.1987041036717063, |
| "grad_norm": 0.15424562990665436, |
| "learning_rate": 7.507251684870433e-06, |
| "loss": 0.3855, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.2008639308855291, |
| "grad_norm": 0.15728497505187988, |
| "learning_rate": 7.496371524397747e-06, |
| "loss": 0.3767, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.2030237580993521, |
| "grad_norm": 0.16239339113235474, |
| "learning_rate": 7.485475595485756e-06, |
| "loss": 0.39, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.205183585313175, |
| "grad_norm": 0.18078574538230896, |
| "learning_rate": 7.474563966959068e-06, |
| "loss": 0.3805, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.2073434125269977, |
| "grad_norm": 0.1507551670074463, |
| "learning_rate": 7.463636707741458e-06, |
| "loss": 0.385, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2095032397408207, |
| "grad_norm": 0.1794394552707672, |
| "learning_rate": 7.452693886855438e-06, |
| "loss": 0.3869, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2116630669546435, |
| "grad_norm": 0.17479896545410156, |
| "learning_rate": 7.4417355734218085e-06, |
| "loss": 0.3763, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.2138228941684666, |
| "grad_norm": 0.15585078299045563, |
| "learning_rate": 7.430761836659235e-06, |
| "loss": 0.3893, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.2159827213822894, |
| "grad_norm": 0.17647355794906616, |
| "learning_rate": 7.4197727458837995e-06, |
| "loss": 0.3858, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.2181425485961124, |
| "grad_norm": 0.1657349020242691, |
| "learning_rate": 7.408768370508577e-06, |
| "loss": 0.3787, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2203023758099352, |
| "grad_norm": 0.15990415215492249, |
| "learning_rate": 7.397748780043179e-06, |
| "loss": 0.3816, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2224622030237582, |
| "grad_norm": 0.16552990674972534, |
| "learning_rate": 7.386714044093331e-06, |
| "loss": 0.3818, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.224622030237581, |
| "grad_norm": 0.17762261629104614, |
| "learning_rate": 7.375664232360421e-06, |
| "loss": 0.3823, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.226781857451404, |
| "grad_norm": 0.17362867295742035, |
| "learning_rate": 7.364599414641064e-06, |
| "loss": 0.3796, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2289416846652268, |
| "grad_norm": 0.15305167436599731, |
| "learning_rate": 7.353519660826665e-06, |
| "loss": 0.3816, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.2311015118790496, |
| "grad_norm": 0.1919698268175125, |
| "learning_rate": 7.342425040902967e-06, |
| "loss": 0.3927, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2332613390928726, |
| "grad_norm": 0.15654806792736053, |
| "learning_rate": 7.331315624949624e-06, |
| "loss": 0.3844, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.2354211663066954, |
| "grad_norm": 0.1898239254951477, |
| "learning_rate": 7.320191483139742e-06, |
| "loss": 0.3935, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.2375809935205184, |
| "grad_norm": 0.15385276079177856, |
| "learning_rate": 7.309052685739448e-06, |
| "loss": 0.3731, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.2397408207343412, |
| "grad_norm": 0.15585872530937195, |
| "learning_rate": 7.297899303107441e-06, |
| "loss": 0.3802, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.2419006479481642, |
| "grad_norm": 0.14450909197330475, |
| "learning_rate": 7.286731405694544e-06, |
| "loss": 0.368, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.244060475161987, |
| "grad_norm": 0.15306542813777924, |
| "learning_rate": 7.275549064043269e-06, |
| "loss": 0.3827, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.2462203023758098, |
| "grad_norm": 0.15712149441242218, |
| "learning_rate": 7.264352348787364e-06, |
| "loss": 0.3933, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.2483801295896328, |
| "grad_norm": 0.16853763163089752, |
| "learning_rate": 7.253141330651367e-06, |
| "loss": 0.3886, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.2505399568034556, |
| "grad_norm": 0.15141934156417847, |
| "learning_rate": 7.241916080450163e-06, |
| "loss": 0.373, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.2526997840172787, |
| "grad_norm": 0.16748425364494324, |
| "learning_rate": 7.23067666908853e-06, |
| "loss": 0.3779, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.2548596112311015, |
| "grad_norm": 0.15394426882266998, |
| "learning_rate": 7.219423167560701e-06, |
| "loss": 0.3803, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.2570194384449245, |
| "grad_norm": 0.15716637670993805, |
| "learning_rate": 7.208155646949908e-06, |
| "loss": 0.3903, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.2591792656587473, |
| "grad_norm": 0.17571674287319183, |
| "learning_rate": 7.196874178427933e-06, |
| "loss": 0.3693, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.26133909287257, |
| "grad_norm": 0.16210925579071045, |
| "learning_rate": 7.185578833254665e-06, |
| "loss": 0.3806, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.263498920086393, |
| "grad_norm": 0.17312122881412506, |
| "learning_rate": 7.1742696827776415e-06, |
| "loss": 0.3867, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.265658747300216, |
| "grad_norm": 0.16945572197437286, |
| "learning_rate": 7.162946798431605e-06, |
| "loss": 0.3834, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.267818574514039, |
| "grad_norm": 0.15858979523181915, |
| "learning_rate": 7.151610251738045e-06, |
| "loss": 0.3837, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.2699784017278617, |
| "grad_norm": 0.14600925147533417, |
| "learning_rate": 7.1402601143047514e-06, |
| "loss": 0.3797, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.2721382289416847, |
| "grad_norm": 0.15963494777679443, |
| "learning_rate": 7.128896457825364e-06, |
| "loss": 0.3904, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.2742980561555075, |
| "grad_norm": 0.1409822553396225, |
| "learning_rate": 7.11751935407891e-06, |
| "loss": 0.384, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2764578833693305, |
| "grad_norm": 0.1461641937494278, |
| "learning_rate": 7.106128874929364e-06, |
| "loss": 0.3769, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.2786177105831533, |
| "grad_norm": 0.1487351655960083, |
| "learning_rate": 7.094725092325177e-06, |
| "loss": 0.3766, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.2807775377969763, |
| "grad_norm": 0.1428721696138382, |
| "learning_rate": 7.08330807829884e-06, |
| "loss": 0.3833, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.2829373650107991, |
| "grad_norm": 0.14245618879795074, |
| "learning_rate": 7.071877904966422e-06, |
| "loss": 0.382, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.285097192224622, |
| "grad_norm": 0.1549312025308609, |
| "learning_rate": 7.060434644527105e-06, |
| "loss": 0.3723, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.287257019438445, |
| "grad_norm": 0.14332742989063263, |
| "learning_rate": 7.048978369262747e-06, |
| "loss": 0.385, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.2894168466522677, |
| "grad_norm": 0.15278279781341553, |
| "learning_rate": 7.037509151537404e-06, |
| "loss": 0.3715, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.2915766738660908, |
| "grad_norm": 0.14458084106445312, |
| "learning_rate": 7.026027063796891e-06, |
| "loss": 0.3708, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.2937365010799136, |
| "grad_norm": 0.15547068417072296, |
| "learning_rate": 7.014532178568314e-06, |
| "loss": 0.3784, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.2958963282937366, |
| "grad_norm": 0.15412218868732452, |
| "learning_rate": 7.003024568459614e-06, |
| "loss": 0.3785, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2980561555075594, |
| "grad_norm": 0.15792393684387207, |
| "learning_rate": 6.991504306159115e-06, |
| "loss": 0.3912, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.3002159827213822, |
| "grad_norm": 0.1512409746646881, |
| "learning_rate": 6.9799714644350504e-06, |
| "loss": 0.3822, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.3023758099352052, |
| "grad_norm": 0.15624138712882996, |
| "learning_rate": 6.968426116135118e-06, |
| "loss": 0.3786, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.3045356371490282, |
| "grad_norm": 0.1699935495853424, |
| "learning_rate": 6.9568683341860135e-06, |
| "loss": 0.382, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.306695464362851, |
| "grad_norm": 0.1427888125181198, |
| "learning_rate": 6.945298191592967e-06, |
| "loss": 0.3694, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.3088552915766738, |
| "grad_norm": 0.15631450712680817, |
| "learning_rate": 6.93371576143929e-06, |
| "loss": 0.3846, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.3110151187904968, |
| "grad_norm": 0.15259280800819397, |
| "learning_rate": 6.922121116885905e-06, |
| "loss": 0.378, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3131749460043196, |
| "grad_norm": 0.13901083171367645, |
| "learning_rate": 6.910514331170888e-06, |
| "loss": 0.3852, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3153347732181426, |
| "grad_norm": 0.15216070413589478, |
| "learning_rate": 6.898895477609007e-06, |
| "loss": 0.3852, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.3174946004319654, |
| "grad_norm": 0.13873577117919922, |
| "learning_rate": 6.887264629591254e-06, |
| "loss": 0.3677, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3196544276457884, |
| "grad_norm": 0.15047885477542877, |
| "learning_rate": 6.875621860584389e-06, |
| "loss": 0.3811, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3218142548596112, |
| "grad_norm": 0.13761691749095917, |
| "learning_rate": 6.863967244130467e-06, |
| "loss": 0.3766, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.323974082073434, |
| "grad_norm": 0.14068377017974854, |
| "learning_rate": 6.852300853846381e-06, |
| "loss": 0.3768, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.326133909287257, |
| "grad_norm": 0.14240694046020508, |
| "learning_rate": 6.840622763423391e-06, |
| "loss": 0.3804, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.3282937365010798, |
| "grad_norm": 0.14259974658489227, |
| "learning_rate": 6.8289330466266635e-06, |
| "loss": 0.3796, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.3304535637149029, |
| "grad_norm": 0.13572795689105988, |
| "learning_rate": 6.817231777294804e-06, |
| "loss": 0.3791, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.3326133909287257, |
| "grad_norm": 0.14472903311252594, |
| "learning_rate": 6.805519029339388e-06, |
| "loss": 0.3825, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.3347732181425487, |
| "grad_norm": 0.13924075663089752, |
| "learning_rate": 6.793794876744499e-06, |
| "loss": 0.3822, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.3369330453563715, |
| "grad_norm": 0.1507209688425064, |
| "learning_rate": 6.782059393566254e-06, |
| "loss": 0.3799, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.3390928725701943, |
| "grad_norm": 0.15504410862922668, |
| "learning_rate": 6.770312653932346e-06, |
| "loss": 0.396, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.3412526997840173, |
| "grad_norm": 0.14195454120635986, |
| "learning_rate": 6.758554732041564e-06, |
| "loss": 0.3797, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.3434125269978403, |
| "grad_norm": 0.158910870552063, |
| "learning_rate": 6.7467857021633354e-06, |
| "loss": 0.3923, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.345572354211663, |
| "grad_norm": 0.1433819979429245, |
| "learning_rate": 6.7350056386372485e-06, |
| "loss": 0.3819, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.347732181425486, |
| "grad_norm": 0.1474255919456482, |
| "learning_rate": 6.723214615872585e-06, |
| "loss": 0.3819, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.349892008639309, |
| "grad_norm": 0.15845805406570435, |
| "learning_rate": 6.711412708347857e-06, |
| "loss": 0.39, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.3520518358531317, |
| "grad_norm": 0.12925179302692413, |
| "learning_rate": 6.699599990610324e-06, |
| "loss": 0.3779, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.3542116630669545, |
| "grad_norm": 0.1499335616827011, |
| "learning_rate": 6.68777653727553e-06, |
| "loss": 0.3804, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.3563714902807775, |
| "grad_norm": 0.15274159610271454, |
| "learning_rate": 6.675942423026834e-06, |
| "loss": 0.3783, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.3585313174946005, |
| "grad_norm": 0.13748182356357574, |
| "learning_rate": 6.664097722614934e-06, |
| "loss": 0.3735, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.3606911447084233, |
| "grad_norm": 0.14609220623970032, |
| "learning_rate": 6.652242510857395e-06, |
| "loss": 0.392, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3628509719222461, |
| "grad_norm": 0.1698596030473709, |
| "learning_rate": 6.640376862638176e-06, |
| "loss": 0.3832, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.3650107991360692, |
| "grad_norm": 0.1435316503047943, |
| "learning_rate": 6.6285008529071615e-06, |
| "loss": 0.3819, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.367170626349892, |
| "grad_norm": 0.13530634343624115, |
| "learning_rate": 6.616614556679684e-06, |
| "loss": 0.3809, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.369330453563715, |
| "grad_norm": 0.18893133103847504, |
| "learning_rate": 6.604718049036047e-06, |
| "loss": 0.3828, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.3714902807775378, |
| "grad_norm": 0.15310388803482056, |
| "learning_rate": 6.592811405121064e-06, |
| "loss": 0.3831, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.3736501079913608, |
| "grad_norm": 0.14660876989364624, |
| "learning_rate": 6.580894700143565e-06, |
| "loss": 0.3781, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.3758099352051836, |
| "grad_norm": 0.14833448827266693, |
| "learning_rate": 6.568968009375938e-06, |
| "loss": 0.3775, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.3779697624190064, |
| "grad_norm": 0.1682375818490982, |
| "learning_rate": 6.557031408153642e-06, |
| "loss": 0.3758, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.3801295896328294, |
| "grad_norm": 0.1533748358488083, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.3793, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.3822894168466522, |
| "grad_norm": 0.14140866696834564, |
| "learning_rate": 6.533128775999411e-06, |
| "loss": 0.384, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3844492440604752, |
| "grad_norm": 0.14936432242393494, |
| "learning_rate": 6.521162896049491e-06, |
| "loss": 0.3891, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.386609071274298, |
| "grad_norm": 0.15731281042099, |
| "learning_rate": 6.509187407607981e-06, |
| "loss": 0.3841, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.388768898488121, |
| "grad_norm": 0.15000282227993011, |
| "learning_rate": 6.497202386318573e-06, |
| "loss": 0.3851, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.3909287257019438, |
| "grad_norm": 0.14697906374931335, |
| "learning_rate": 6.485207907885175e-06, |
| "loss": 0.3773, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.3930885529157666, |
| "grad_norm": 0.1559685468673706, |
| "learning_rate": 6.473204048071433e-06, |
| "loss": 0.3821, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.3952483801295896, |
| "grad_norm": 0.15039733052253723, |
| "learning_rate": 6.4611908827002504e-06, |
| "loss": 0.3847, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.3974082073434126, |
| "grad_norm": 0.16008631885051727, |
| "learning_rate": 6.449168487653305e-06, |
| "loss": 0.3802, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.3995680345572354, |
| "grad_norm": 0.14514020085334778, |
| "learning_rate": 6.437136938870583e-06, |
| "loss": 0.3841, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.4017278617710582, |
| "grad_norm": 0.15419939160346985, |
| "learning_rate": 6.425096312349881e-06, |
| "loss": 0.3903, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.4038876889848813, |
| "grad_norm": 0.1530395895242691, |
| "learning_rate": 6.413046684146343e-06, |
| "loss": 0.3794, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.406047516198704, |
| "grad_norm": 0.15808750689029694, |
| "learning_rate": 6.400988130371969e-06, |
| "loss": 0.3766, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.408207343412527, |
| "grad_norm": 0.14891669154167175, |
| "learning_rate": 6.388920727195138e-06, |
| "loss": 0.3781, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.4103671706263499, |
| "grad_norm": 0.14925065636634827, |
| "learning_rate": 6.376844550840126e-06, |
| "loss": 0.3906, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4125269978401729, |
| "grad_norm": 0.16382241249084473, |
| "learning_rate": 6.364759677586627e-06, |
| "loss": 0.3771, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4146868250539957, |
| "grad_norm": 0.1546749770641327, |
| "learning_rate": 6.352666183769269e-06, |
| "loss": 0.3863, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.4168466522678185, |
| "grad_norm": 0.14334626495838165, |
| "learning_rate": 6.340564145777131e-06, |
| "loss": 0.3742, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.4190064794816415, |
| "grad_norm": 0.15877507627010345, |
| "learning_rate": 6.328453640053264e-06, |
| "loss": 0.3779, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.4211663066954643, |
| "grad_norm": 0.1556321382522583, |
| "learning_rate": 6.316334743094201e-06, |
| "loss": 0.3739, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.4233261339092873, |
| "grad_norm": 0.14519956707954407, |
| "learning_rate": 6.304207531449486e-06, |
| "loss": 0.3786, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.42548596112311, |
| "grad_norm": 0.14613112807273865, |
| "learning_rate": 6.292072081721173e-06, |
| "loss": 0.381, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.4276457883369331, |
| "grad_norm": 0.15813447535037994, |
| "learning_rate": 6.279928470563365e-06, |
| "loss": 0.3866, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.429805615550756, |
| "grad_norm": 0.15421751141548157, |
| "learning_rate": 6.267776774681703e-06, |
| "loss": 0.3796, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.4319654427645787, |
| "grad_norm": 0.15891966223716736, |
| "learning_rate": 6.255617070832908e-06, |
| "loss": 0.3717, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.4341252699784017, |
| "grad_norm": 0.15779848396778107, |
| "learning_rate": 6.243449435824276e-06, |
| "loss": 0.3701, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.4362850971922247, |
| "grad_norm": 0.14361926913261414, |
| "learning_rate": 6.231273946513201e-06, |
| "loss": 0.3698, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.4384449244060475, |
| "grad_norm": 0.1553213894367218, |
| "learning_rate": 6.219090679806694e-06, |
| "loss": 0.381, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.4406047516198703, |
| "grad_norm": 0.14260952174663544, |
| "learning_rate": 6.206899712660887e-06, |
| "loss": 0.3734, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.4427645788336934, |
| "grad_norm": 0.147933229804039, |
| "learning_rate": 6.1947011220805535e-06, |
| "loss": 0.3799, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.4449244060475162, |
| "grad_norm": 0.15127696096897125, |
| "learning_rate": 6.182494985118625e-06, |
| "loss": 0.3792, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.4470842332613392, |
| "grad_norm": 0.14316388964653015, |
| "learning_rate": 6.170281378875692e-06, |
| "loss": 0.3727, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.449244060475162, |
| "grad_norm": 0.14327897131443024, |
| "learning_rate": 6.158060380499533e-06, |
| "loss": 0.3823, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.451403887688985, |
| "grad_norm": 0.1345098912715912, |
| "learning_rate": 6.145832067184614e-06, |
| "loss": 0.3924, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.4535637149028078, |
| "grad_norm": 0.13999171555042267, |
| "learning_rate": 6.133596516171609e-06, |
| "loss": 0.3809, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.4557235421166306, |
| "grad_norm": 0.12059102207422256, |
| "learning_rate": 6.121353804746907e-06, |
| "loss": 0.3788, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.4578833693304536, |
| "grad_norm": 0.14187489449977875, |
| "learning_rate": 6.109104010242127e-06, |
| "loss": 0.3845, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.4600431965442764, |
| "grad_norm": 0.14432717859745026, |
| "learning_rate": 6.09684721003363e-06, |
| "loss": 0.3801, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.4622030237580994, |
| "grad_norm": 0.1373838186264038, |
| "learning_rate": 6.084583481542028e-06, |
| "loss": 0.3731, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.4643628509719222, |
| "grad_norm": 0.15109464526176453, |
| "learning_rate": 6.072312902231692e-06, |
| "loss": 0.3895, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.4665226781857452, |
| "grad_norm": 0.15525732934474945, |
| "learning_rate": 6.060035549610275e-06, |
| "loss": 0.3785, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.468682505399568, |
| "grad_norm": 0.14632560312747955, |
| "learning_rate": 6.047751501228203e-06, |
| "loss": 0.3793, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.4708423326133908, |
| "grad_norm": 0.1495695561170578, |
| "learning_rate": 6.0354608346782075e-06, |
| "loss": 0.3817, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.4730021598272138, |
| "grad_norm": 0.1651640236377716, |
| "learning_rate": 6.023163627594813e-06, |
| "loss": 0.386, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.4751619870410368, |
| "grad_norm": 0.14565086364746094, |
| "learning_rate": 6.010859957653869e-06, |
| "loss": 0.3749, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.4773218142548596, |
| "grad_norm": 0.1538180410861969, |
| "learning_rate": 5.9985499025720354e-06, |
| "loss": 0.3769, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.4794816414686824, |
| "grad_norm": 0.12949156761169434, |
| "learning_rate": 5.986233540106315e-06, |
| "loss": 0.3721, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.4816414686825055, |
| "grad_norm": 0.14916200935840607, |
| "learning_rate": 5.973910948053545e-06, |
| "loss": 0.386, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.4838012958963283, |
| "grad_norm": 0.1727481335401535, |
| "learning_rate": 5.961582204249915e-06, |
| "loss": 0.3769, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.485961123110151, |
| "grad_norm": 0.1516205221414566, |
| "learning_rate": 5.949247386570471e-06, |
| "loss": 0.3865, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.488120950323974, |
| "grad_norm": 0.164317786693573, |
| "learning_rate": 5.936906572928625e-06, |
| "loss": 0.3803, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.490280777537797, |
| "grad_norm": 0.15949031710624695, |
| "learning_rate": 5.924559841275661e-06, |
| "loss": 0.3819, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4924406047516199, |
| "grad_norm": 0.13948768377304077, |
| "learning_rate": 5.912207269600252e-06, |
| "loss": 0.381, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.4946004319654427, |
| "grad_norm": 0.17031709849834442, |
| "learning_rate": 5.89984893592795e-06, |
| "loss": 0.3837, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.4967602591792657, |
| "grad_norm": 0.13423483073711395, |
| "learning_rate": 5.887484918320708e-06, |
| "loss": 0.3824, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.4989200863930885, |
| "grad_norm": 0.15468288958072662, |
| "learning_rate": 5.8751152948763815e-06, |
| "loss": 0.372, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.5010799136069113, |
| "grad_norm": 0.16139718890190125, |
| "learning_rate": 5.8627401437282334e-06, |
| "loss": 0.3775, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.5032397408207343, |
| "grad_norm": 0.15016594529151917, |
| "learning_rate": 5.850359543044446e-06, |
| "loss": 0.3781, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5053995680345573, |
| "grad_norm": 0.1405385285615921, |
| "learning_rate": 5.837973571027621e-06, |
| "loss": 0.3789, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5075593952483801, |
| "grad_norm": 0.15115734934806824, |
| "learning_rate": 5.82558230591429e-06, |
| "loss": 0.384, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.509719222462203, |
| "grad_norm": 0.14200249314308167, |
| "learning_rate": 5.813185825974419e-06, |
| "loss": 0.3846, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.511879049676026, |
| "grad_norm": 0.147098109126091, |
| "learning_rate": 5.80078420951091e-06, |
| "loss": 0.3839, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.514038876889849, |
| "grad_norm": 0.1505637764930725, |
| "learning_rate": 5.7883775348591146e-06, |
| "loss": 0.3795, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.5161987041036717, |
| "grad_norm": 0.14681459963321686, |
| "learning_rate": 5.77596588038633e-06, |
| "loss": 0.3879, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.5183585313174945, |
| "grad_norm": 0.1480962336063385, |
| "learning_rate": 5.763549324491317e-06, |
| "loss": 0.3851, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.5205183585313176, |
| "grad_norm": 0.1447979360818863, |
| "learning_rate": 5.751127945603786e-06, |
| "loss": 0.379, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.5226781857451404, |
| "grad_norm": 0.14420117437839508, |
| "learning_rate": 5.7387018221839195e-06, |
| "loss": 0.3844, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.5248380129589632, |
| "grad_norm": 0.1400950700044632, |
| "learning_rate": 5.726271032721864e-06, |
| "loss": 0.3854, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.5269978401727862, |
| "grad_norm": 0.14526066184043884, |
| "learning_rate": 5.7138356557372444e-06, |
| "loss": 0.3815, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.5291576673866092, |
| "grad_norm": 0.15576431155204773, |
| "learning_rate": 5.70139576977866e-06, |
| "loss": 0.3866, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.531317494600432, |
| "grad_norm": 0.1484983265399933, |
| "learning_rate": 5.68895145342319e-06, |
| "loss": 0.3695, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.5334773218142548, |
| "grad_norm": 0.14368119835853577, |
| "learning_rate": 5.6765027852759015e-06, |
| "loss": 0.3751, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.5356371490280778, |
| "grad_norm": 0.14985284209251404, |
| "learning_rate": 5.664049843969348e-06, |
| "loss": 0.3759, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.5377969762419006, |
| "grad_norm": 0.14019222557544708, |
| "learning_rate": 5.651592708163074e-06, |
| "loss": 0.3768, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.5399568034557234, |
| "grad_norm": 0.15098613500595093, |
| "learning_rate": 5.639131456543119e-06, |
| "loss": 0.3755, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.5421166306695464, |
| "grad_norm": 0.13311173021793365, |
| "learning_rate": 5.626666167821522e-06, |
| "loss": 0.3727, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.5442764578833694, |
| "grad_norm": 0.1466924548149109, |
| "learning_rate": 5.614196920735822e-06, |
| "loss": 0.3816, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.5464362850971922, |
| "grad_norm": 0.14080245792865753, |
| "learning_rate": 5.601723794048558e-06, |
| "loss": 0.3808, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.548596112311015, |
| "grad_norm": 0.13415026664733887, |
| "learning_rate": 5.58924686654678e-06, |
| "loss": 0.3846, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.550755939524838, |
| "grad_norm": 0.14663489162921906, |
| "learning_rate": 5.576766217041541e-06, |
| "loss": 0.3728, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.552915766738661, |
| "grad_norm": 0.14009855687618256, |
| "learning_rate": 5.5642819243674085e-06, |
| "loss": 0.3661, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.5550755939524838, |
| "grad_norm": 0.12949179112911224, |
| "learning_rate": 5.551794067381959e-06, |
| "loss": 0.3766, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.5572354211663066, |
| "grad_norm": 0.1387072652578354, |
| "learning_rate": 5.5393027249652844e-06, |
| "loss": 0.3863, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.5593952483801297, |
| "grad_norm": 0.13852353394031525, |
| "learning_rate": 5.526807976019492e-06, |
| "loss": 0.3777, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.5615550755939525, |
| "grad_norm": 0.144193634390831, |
| "learning_rate": 5.514309899468209e-06, |
| "loss": 0.3708, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.5637149028077753, |
| "grad_norm": 0.13979433476924896, |
| "learning_rate": 5.5018085742560745e-06, |
| "loss": 0.3827, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.5658747300215983, |
| "grad_norm": 0.1412208378314972, |
| "learning_rate": 5.489304079348259e-06, |
| "loss": 0.3819, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.5680345572354213, |
| "grad_norm": 0.14092062413692474, |
| "learning_rate": 5.476796493729943e-06, |
| "loss": 0.38, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.570194384449244, |
| "grad_norm": 0.15495967864990234, |
| "learning_rate": 5.46428589640584e-06, |
| "loss": 0.3941, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.5723542116630669, |
| "grad_norm": 0.14512132108211517, |
| "learning_rate": 5.451772366399678e-06, |
| "loss": 0.3912, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.57451403887689, |
| "grad_norm": 0.15392383933067322, |
| "learning_rate": 5.439255982753717e-06, |
| "loss": 0.3751, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.5766738660907127, |
| "grad_norm": 0.14311961829662323, |
| "learning_rate": 5.426736824528236e-06, |
| "loss": 0.379, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.5788336933045355, |
| "grad_norm": 0.14618806540966034, |
| "learning_rate": 5.414214970801041e-06, |
| "loss": 0.3794, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.5809935205183585, |
| "grad_norm": 0.13308942317962646, |
| "learning_rate": 5.401690500666972e-06, |
| "loss": 0.3823, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.5831533477321815, |
| "grad_norm": 0.14792852103710175, |
| "learning_rate": 5.389163493237382e-06, |
| "loss": 0.379, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.5853131749460043, |
| "grad_norm": 0.15516813099384308, |
| "learning_rate": 5.376634027639664e-06, |
| "loss": 0.381, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.5874730021598271, |
| "grad_norm": 0.13429994881153107, |
| "learning_rate": 5.36410218301673e-06, |
| "loss": 0.3848, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.5896328293736501, |
| "grad_norm": 0.13838984072208405, |
| "learning_rate": 5.35156803852652e-06, |
| "loss": 0.3802, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.5917926565874732, |
| "grad_norm": 0.14528213441371918, |
| "learning_rate": 5.339031673341505e-06, |
| "loss": 0.3677, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.593952483801296, |
| "grad_norm": 0.14330457150936127, |
| "learning_rate": 5.326493166648179e-06, |
| "loss": 0.3754, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.5961123110151187, |
| "grad_norm": 0.150346577167511, |
| "learning_rate": 5.3139525976465675e-06, |
| "loss": 0.3867, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.5982721382289418, |
| "grad_norm": 0.13384312391281128, |
| "learning_rate": 5.301410045549719e-06, |
| "loss": 0.3807, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6004319654427646, |
| "grad_norm": 0.13676618039608002, |
| "learning_rate": 5.2888655895832075e-06, |
| "loss": 0.3776, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6025917926565874, |
| "grad_norm": 0.1279810667037964, |
| "learning_rate": 5.276319308984637e-06, |
| "loss": 0.3701, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.6047516198704104, |
| "grad_norm": 0.14258207380771637, |
| "learning_rate": 5.263771283003133e-06, |
| "loss": 0.3724, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.6069114470842334, |
| "grad_norm": 0.14498306810855865, |
| "learning_rate": 5.251221590898848e-06, |
| "loss": 0.3716, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.6090712742980562, |
| "grad_norm": 0.14295688271522522, |
| "learning_rate": 5.238670311942459e-06, |
| "loss": 0.3877, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.611231101511879, |
| "grad_norm": 0.12707604467868805, |
| "learning_rate": 5.226117525414663e-06, |
| "loss": 0.3724, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.613390928725702, |
| "grad_norm": 0.14804835617542267, |
| "learning_rate": 5.213563310605686e-06, |
| "loss": 0.3827, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.6155507559395248, |
| "grad_norm": 0.12879379093647003, |
| "learning_rate": 5.201007746814767e-06, |
| "loss": 0.3706, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.6177105831533476, |
| "grad_norm": 0.14704205095767975, |
| "learning_rate": 5.188450913349674e-06, |
| "loss": 0.3869, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.6198704103671706, |
| "grad_norm": 0.1534765362739563, |
| "learning_rate": 5.175892889526189e-06, |
| "loss": 0.3736, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.6220302375809936, |
| "grad_norm": 0.1547222137451172, |
| "learning_rate": 5.16333375466762e-06, |
| "loss": 0.3796, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.6241900647948164, |
| "grad_norm": 0.13741885125637054, |
| "learning_rate": 5.150773588104284e-06, |
| "loss": 0.3817, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.6263498920086392, |
| "grad_norm": 0.13468679785728455, |
| "learning_rate": 5.138212469173022e-06, |
| "loss": 0.3781, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.6285097192224622, |
| "grad_norm": 0.1447237730026245, |
| "learning_rate": 5.1256504772166885e-06, |
| "loss": 0.3609, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.6306695464362853, |
| "grad_norm": 0.14089703559875488, |
| "learning_rate": 5.1130876915836495e-06, |
| "loss": 0.3609, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.6328293736501078, |
| "grad_norm": 0.13828714191913605, |
| "learning_rate": 5.100524191627289e-06, |
| "loss": 0.377, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.6349892008639308, |
| "grad_norm": 0.14428219199180603, |
| "learning_rate": 5.087960056705499e-06, |
| "loss": 0.3702, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.6371490280777539, |
| "grad_norm": 0.15890643000602722, |
| "learning_rate": 5.075395366180186e-06, |
| "loss": 0.3838, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.6393088552915767, |
| "grad_norm": 0.1514156460762024, |
| "learning_rate": 5.062830199416764e-06, |
| "loss": 0.3852, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.6414686825053995, |
| "grad_norm": 0.13208477199077606, |
| "learning_rate": 5.050264635783654e-06, |
| "loss": 0.3925, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.6436285097192225, |
| "grad_norm": 0.13601286709308624, |
| "learning_rate": 5.037698754651786e-06, |
| "loss": 0.3847, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.6457883369330455, |
| "grad_norm": 0.1418139487504959, |
| "learning_rate": 5.025132635394095e-06, |
| "loss": 0.3744, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.6479481641468683, |
| "grad_norm": 0.14959508180618286, |
| "learning_rate": 5.0125663573850204e-06, |
| "loss": 0.3712, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.650107991360691, |
| "grad_norm": 0.12993188202381134, |
| "learning_rate": 5e-06, |
| "loss": 0.38, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.652267818574514, |
| "grad_norm": 0.14041666686534882, |
| "learning_rate": 4.987433642614981e-06, |
| "loss": 0.3751, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.654427645788337, |
| "grad_norm": 0.1548304408788681, |
| "learning_rate": 4.974867364605906e-06, |
| "loss": 0.3588, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.6565874730021597, |
| "grad_norm": 0.12369633466005325, |
| "learning_rate": 4.962301245348215e-06, |
| "loss": 0.3822, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.6587473002159827, |
| "grad_norm": 0.13229462504386902, |
| "learning_rate": 4.949735364216348e-06, |
| "loss": 0.3631, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.6609071274298057, |
| "grad_norm": 0.13191936910152435, |
| "learning_rate": 4.937169800583237e-06, |
| "loss": 0.3783, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.6630669546436285, |
| "grad_norm": 0.14189469814300537, |
| "learning_rate": 4.924604633819815e-06, |
| "loss": 0.3724, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.6652267818574513, |
| "grad_norm": 0.1306021511554718, |
| "learning_rate": 4.912039943294502e-06, |
| "loss": 0.3736, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.6673866090712743, |
| "grad_norm": 0.1423332244157791, |
| "learning_rate": 4.899475808372714e-06, |
| "loss": 0.3735, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.6695464362850974, |
| "grad_norm": 0.13784444332122803, |
| "learning_rate": 4.886912308416353e-06, |
| "loss": 0.3737, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.67170626349892, |
| "grad_norm": 0.13520213961601257, |
| "learning_rate": 4.874349522783313e-06, |
| "loss": 0.3678, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.673866090712743, |
| "grad_norm": 0.15318076312541962, |
| "learning_rate": 4.861787530826979e-06, |
| "loss": 0.3716, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.676025917926566, |
| "grad_norm": 0.12309125065803528, |
| "learning_rate": 4.8492264118957165e-06, |
| "loss": 0.386, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.6781857451403888, |
| "grad_norm": 0.1470710188150406, |
| "learning_rate": 4.8366662453323826e-06, |
| "loss": 0.3848, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.6803455723542116, |
| "grad_norm": 0.12907086312770844, |
| "learning_rate": 4.8241071104738115e-06, |
| "loss": 0.3689, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.6825053995680346, |
| "grad_norm": 0.13970084488391876, |
| "learning_rate": 4.811549086650327e-06, |
| "loss": 0.3814, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.6846652267818576, |
| "grad_norm": 0.13439306616783142, |
| "learning_rate": 4.798992253185233e-06, |
| "loss": 0.3717, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.6868250539956804, |
| "grad_norm": 0.13519345223903656, |
| "learning_rate": 4.786436689394317e-06, |
| "loss": 0.3765, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.6889848812095032, |
| "grad_norm": 0.13258984684944153, |
| "learning_rate": 4.773882474585338e-06, |
| "loss": 0.3809, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.6911447084233262, |
| "grad_norm": 0.12966322898864746, |
| "learning_rate": 4.761329688057543e-06, |
| "loss": 0.3782, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.693304535637149, |
| "grad_norm": 0.13643068075180054, |
| "learning_rate": 4.748778409101153e-06, |
| "loss": 0.3796, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.6954643628509718, |
| "grad_norm": 0.1507895290851593, |
| "learning_rate": 4.736228716996868e-06, |
| "loss": 0.3789, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.6976241900647948, |
| "grad_norm": 0.14031574130058289, |
| "learning_rate": 4.723680691015366e-06, |
| "loss": 0.3816, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.6997840172786178, |
| "grad_norm": 0.13055071234703064, |
| "learning_rate": 4.711134410416794e-06, |
| "loss": 0.3643, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7019438444924406, |
| "grad_norm": 0.15579389035701752, |
| "learning_rate": 4.6985899544502835e-06, |
| "loss": 0.3797, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.7041036717062634, |
| "grad_norm": 0.1301419883966446, |
| "learning_rate": 4.686047402353433e-06, |
| "loss": 0.3793, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.7062634989200864, |
| "grad_norm": 0.13526466488838196, |
| "learning_rate": 4.673506833351821e-06, |
| "loss": 0.3911, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.7084233261339092, |
| "grad_norm": 0.1373325139284134, |
| "learning_rate": 4.660968326658497e-06, |
| "loss": 0.3774, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.710583153347732, |
| "grad_norm": 0.1474619358778, |
| "learning_rate": 4.648431961473482e-06, |
| "loss": 0.368, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.712742980561555, |
| "grad_norm": 0.14143545925617218, |
| "learning_rate": 4.635897816983272e-06, |
| "loss": 0.3779, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.714902807775378, |
| "grad_norm": 0.14204931259155273, |
| "learning_rate": 4.6233659723603374e-06, |
| "loss": 0.3667, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.7170626349892009, |
| "grad_norm": 0.13979306817054749, |
| "learning_rate": 4.610836506762618e-06, |
| "loss": 0.3782, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.7192224622030237, |
| "grad_norm": 0.14510124921798706, |
| "learning_rate": 4.59830949933303e-06, |
| "loss": 0.3705, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.7213822894168467, |
| "grad_norm": 0.14503952860832214, |
| "learning_rate": 4.5857850291989596e-06, |
| "loss": 0.3804, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.7235421166306697, |
| "grad_norm": 0.13347502052783966, |
| "learning_rate": 4.573263175471766e-06, |
| "loss": 0.3706, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.7257019438444925, |
| "grad_norm": 0.12476824969053268, |
| "learning_rate": 4.560744017246284e-06, |
| "loss": 0.3756, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.7278617710583153, |
| "grad_norm": 0.13821221888065338, |
| "learning_rate": 4.548227633600322e-06, |
| "loss": 0.3802, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.7300215982721383, |
| "grad_norm": 0.13669945299625397, |
| "learning_rate": 4.535714103594162e-06, |
| "loss": 0.3818, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.732181425485961, |
| "grad_norm": 0.1308770775794983, |
| "learning_rate": 4.523203506270058e-06, |
| "loss": 0.3836, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.734341252699784, |
| "grad_norm": 0.1351071000099182, |
| "learning_rate": 4.510695920651742e-06, |
| "loss": 0.3757, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.736501079913607, |
| "grad_norm": 0.1277286410331726, |
| "learning_rate": 4.4981914257439254e-06, |
| "loss": 0.387, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.73866090712743, |
| "grad_norm": 0.1272444725036621, |
| "learning_rate": 4.485690100531793e-06, |
| "loss": 0.3829, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.7408207343412527, |
| "grad_norm": 0.14064733684062958, |
| "learning_rate": 4.473192023980509e-06, |
| "loss": 0.3822, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.7429805615550755, |
| "grad_norm": 0.13635459542274475, |
| "learning_rate": 4.460697275034717e-06, |
| "loss": 0.38, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.7451403887688985, |
| "grad_norm": 0.136144757270813, |
| "learning_rate": 4.448205932618042e-06, |
| "loss": 0.3794, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.7473002159827213, |
| "grad_norm": 0.14044472575187683, |
| "learning_rate": 4.4357180756325915e-06, |
| "loss": 0.3741, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.7494600431965441, |
| "grad_norm": 0.13555637001991272, |
| "learning_rate": 4.423233782958459e-06, |
| "loss": 0.369, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.7516198704103672, |
| "grad_norm": 0.1326342225074768, |
| "learning_rate": 4.410753133453222e-06, |
| "loss": 0.3784, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.7537796976241902, |
| "grad_norm": 0.13601535558700562, |
| "learning_rate": 4.398276205951443e-06, |
| "loss": 0.3821, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.755939524838013, |
| "grad_norm": 0.13336274027824402, |
| "learning_rate": 4.38580307926418e-06, |
| "loss": 0.3713, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.7580993520518358, |
| "grad_norm": 0.14658118784427643, |
| "learning_rate": 4.373333832178478e-06, |
| "loss": 0.3825, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.7602591792656588, |
| "grad_norm": 0.14051300287246704, |
| "learning_rate": 4.360868543456883e-06, |
| "loss": 0.3685, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.7624190064794818, |
| "grad_norm": 0.1231897696852684, |
| "learning_rate": 4.348407291836928e-06, |
| "loss": 0.37, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.7645788336933044, |
| "grad_norm": 0.13528205454349518, |
| "learning_rate": 4.335950156030653e-06, |
| "loss": 0.3855, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.7667386609071274, |
| "grad_norm": 0.14070774614810944, |
| "learning_rate": 4.323497214724099e-06, |
| "loss": 0.3752, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.7688984881209504, |
| "grad_norm": 0.1284414529800415, |
| "learning_rate": 4.31104854657681e-06, |
| "loss": 0.3659, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.7710583153347732, |
| "grad_norm": 0.13247400522232056, |
| "learning_rate": 4.298604230221341e-06, |
| "loss": 0.3727, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.773218142548596, |
| "grad_norm": 0.12880460917949677, |
| "learning_rate": 4.286164344262756e-06, |
| "loss": 0.3867, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.775377969762419, |
| "grad_norm": 0.12950289249420166, |
| "learning_rate": 4.273728967278137e-06, |
| "loss": 0.3685, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.777537796976242, |
| "grad_norm": 0.1209382489323616, |
| "learning_rate": 4.261298177816082e-06, |
| "loss": 0.3658, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.7796976241900648, |
| "grad_norm": 0.1271076798439026, |
| "learning_rate": 4.248872054396215e-06, |
| "loss": 0.3801, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.7818574514038876, |
| "grad_norm": 0.1265021562576294, |
| "learning_rate": 4.2364506755086856e-06, |
| "loss": 0.3719, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.7840172786177106, |
| "grad_norm": 0.12241175025701523, |
| "learning_rate": 4.224034119613671e-06, |
| "loss": 0.3744, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.7861771058315334, |
| "grad_norm": 0.12162206321954727, |
| "learning_rate": 4.211622465140887e-06, |
| "loss": 0.3797, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.7883369330453562, |
| "grad_norm": 0.12623707950115204, |
| "learning_rate": 4.199215790489091e-06, |
| "loss": 0.3859, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.7904967602591793, |
| "grad_norm": 0.13991111516952515, |
| "learning_rate": 4.186814174025582e-06, |
| "loss": 0.3736, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.7926565874730023, |
| "grad_norm": 0.12621738016605377, |
| "learning_rate": 4.174417694085711e-06, |
| "loss": 0.3743, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.794816414686825, |
| "grad_norm": 0.12811291217803955, |
| "learning_rate": 4.16202642897238e-06, |
| "loss": 0.3782, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.7969762419006479, |
| "grad_norm": 0.12236473709344864, |
| "learning_rate": 4.149640456955555e-06, |
| "loss": 0.3764, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.7991360691144709, |
| "grad_norm": 0.142435684800148, |
| "learning_rate": 4.137259856271767e-06, |
| "loss": 0.3719, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.801295896328294, |
| "grad_norm": 0.12946586310863495, |
| "learning_rate": 4.124884705123619e-06, |
| "loss": 0.3852, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.8034557235421165, |
| "grad_norm": 0.1189626008272171, |
| "learning_rate": 4.112515081679295e-06, |
| "loss": 0.3751, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.8056155507559395, |
| "grad_norm": 0.13230590522289276, |
| "learning_rate": 4.1001510640720525e-06, |
| "loss": 0.3688, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.8077753779697625, |
| "grad_norm": 0.13355307281017303, |
| "learning_rate": 4.087792730399749e-06, |
| "loss": 0.3885, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.8099352051835853, |
| "grad_norm": 0.13115477561950684, |
| "learning_rate": 4.075440158724339e-06, |
| "loss": 0.3807, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.812095032397408, |
| "grad_norm": 0.11709022521972656, |
| "learning_rate": 4.063093427071376e-06, |
| "loss": 0.3715, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.8142548596112311, |
| "grad_norm": 0.13939060270786285, |
| "learning_rate": 4.0507526134295314e-06, |
| "loss": 0.3718, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.8164146868250541, |
| "grad_norm": 0.13002587854862213, |
| "learning_rate": 4.038417795750086e-06, |
| "loss": 0.378, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.818574514038877, |
| "grad_norm": 0.13568507134914398, |
| "learning_rate": 4.0260890519464565e-06, |
| "loss": 0.3715, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.8207343412526997, |
| "grad_norm": 0.13115161657333374, |
| "learning_rate": 4.013766459893686e-06, |
| "loss": 0.374, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.8228941684665227, |
| "grad_norm": 0.1360725313425064, |
| "learning_rate": 4.001450097427965e-06, |
| "loss": 0.3915, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.8250539956803455, |
| "grad_norm": 0.14375773072242737, |
| "learning_rate": 3.989140042346134e-06, |
| "loss": 0.3823, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.8272138228941683, |
| "grad_norm": 0.14056192338466644, |
| "learning_rate": 3.9768363724051875e-06, |
| "loss": 0.3797, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.8293736501079914, |
| "grad_norm": 0.13145223259925842, |
| "learning_rate": 3.964539165321795e-06, |
| "loss": 0.3651, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.8315334773218144, |
| "grad_norm": 0.1401747614145279, |
| "learning_rate": 3.952248498771797e-06, |
| "loss": 0.3803, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.8336933045356372, |
| "grad_norm": 0.1457161009311676, |
| "learning_rate": 3.939964450389728e-06, |
| "loss": 0.3875, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.83585313174946, |
| "grad_norm": 0.1399625837802887, |
| "learning_rate": 3.927687097768309e-06, |
| "loss": 0.3855, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.838012958963283, |
| "grad_norm": 0.12442053109407425, |
| "learning_rate": 3.915416518457974e-06, |
| "loss": 0.3885, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.8401727861771058, |
| "grad_norm": 0.12682035565376282, |
| "learning_rate": 3.9031527899663705e-06, |
| "loss": 0.3708, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.8423326133909286, |
| "grad_norm": 0.12829378247261047, |
| "learning_rate": 3.890895989757874e-06, |
| "loss": 0.376, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.8444924406047516, |
| "grad_norm": 0.14053881168365479, |
| "learning_rate": 3.8786461952530955e-06, |
| "loss": 0.373, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.8466522678185746, |
| "grad_norm": 0.1281130015850067, |
| "learning_rate": 3.866403483828392e-06, |
| "loss": 0.3773, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.8488120950323974, |
| "grad_norm": 0.12932966649532318, |
| "learning_rate": 3.854167932815387e-06, |
| "loss": 0.383, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.8509719222462202, |
| "grad_norm": 0.1419646143913269, |
| "learning_rate": 3.841939619500468e-06, |
| "loss": 0.3674, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.8531317494600432, |
| "grad_norm": 0.12675082683563232, |
| "learning_rate": 3.8297186211243085e-06, |
| "loss": 0.3814, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.8552915766738662, |
| "grad_norm": 0.11979357898235321, |
| "learning_rate": 3.817505014881378e-06, |
| "loss": 0.38, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.857451403887689, |
| "grad_norm": 0.13714280724525452, |
| "learning_rate": 3.8052988779194478e-06, |
| "loss": 0.3823, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.8596112311015118, |
| "grad_norm": 0.13954536616802216, |
| "learning_rate": 3.7931002873391156e-06, |
| "loss": 0.3796, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.8617710583153348, |
| "grad_norm": 0.11842264980077744, |
| "learning_rate": 3.7809093201933078e-06, |
| "loss": 0.3761, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.8639308855291576, |
| "grad_norm": 0.12878850102424622, |
| "learning_rate": 3.7687260534868e-06, |
| "loss": 0.3821, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.8660907127429804, |
| "grad_norm": 0.14155155420303345, |
| "learning_rate": 3.756550564175727e-06, |
| "loss": 0.3748, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.8682505399568035, |
| "grad_norm": 0.12996132671833038, |
| "learning_rate": 3.744382929167094e-06, |
| "loss": 0.3741, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.8704103671706265, |
| "grad_norm": 0.12898430228233337, |
| "learning_rate": 3.7322232253182984e-06, |
| "loss": 0.3763, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.8725701943844493, |
| "grad_norm": 0.12044209241867065, |
| "learning_rate": 3.7200715294366376e-06, |
| "loss": 0.3747, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.874730021598272, |
| "grad_norm": 0.12121162563562393, |
| "learning_rate": 3.7079279182788263e-06, |
| "loss": 0.381, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.876889848812095, |
| "grad_norm": 0.12106562405824661, |
| "learning_rate": 3.695792468550517e-06, |
| "loss": 0.3767, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.8790496760259179, |
| "grad_norm": 0.118615061044693, |
| "learning_rate": 3.6836652569057994e-06, |
| "loss": 0.3708, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.8812095032397407, |
| "grad_norm": 0.12058837711811066, |
| "learning_rate": 3.6715463599467372e-06, |
| "loss": 0.3778, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.8833693304535637, |
| "grad_norm": 0.12485583126544952, |
| "learning_rate": 3.659435854222869e-06, |
| "loss": 0.3679, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.8855291576673867, |
| "grad_norm": 0.11957580596208572, |
| "learning_rate": 3.6473338162307314e-06, |
| "loss": 0.3709, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.8876889848812095, |
| "grad_norm": 0.12649306654930115, |
| "learning_rate": 3.635240322413375e-06, |
| "loss": 0.3803, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.8898488120950323, |
| "grad_norm": 0.12188448011875153, |
| "learning_rate": 3.6231554491598766e-06, |
| "loss": 0.3753, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.8920086393088553, |
| "grad_norm": 0.12264645844697952, |
| "learning_rate": 3.6110792728048636e-06, |
| "loss": 0.3736, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.8941684665226783, |
| "grad_norm": 0.12633198499679565, |
| "learning_rate": 3.599011869628033e-06, |
| "loss": 0.3734, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.896328293736501, |
| "grad_norm": 0.12245716154575348, |
| "learning_rate": 3.5869533158536583e-06, |
| "loss": 0.3661, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.898488120950324, |
| "grad_norm": 0.11652278900146484, |
| "learning_rate": 3.5749036876501196e-06, |
| "loss": 0.3775, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.900647948164147, |
| "grad_norm": 0.1323150098323822, |
| "learning_rate": 3.562863061129419e-06, |
| "loss": 0.3736, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.9028077753779697, |
| "grad_norm": 0.1193445473909378, |
| "learning_rate": 3.550831512346695e-06, |
| "loss": 0.3756, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.9049676025917925, |
| "grad_norm": 0.11626636981964111, |
| "learning_rate": 3.538809117299751e-06, |
| "loss": 0.3771, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.9071274298056156, |
| "grad_norm": 0.13352340459823608, |
| "learning_rate": 3.526795951928569e-06, |
| "loss": 0.3828, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.9092872570194386, |
| "grad_norm": 0.13351115584373474, |
| "learning_rate": 3.5147920921148267e-06, |
| "loss": 0.3645, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.9114470842332614, |
| "grad_norm": 0.11943700909614563, |
| "learning_rate": 3.502797613681429e-06, |
| "loss": 0.386, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.9136069114470842, |
| "grad_norm": 0.1416894644498825, |
| "learning_rate": 3.4908125923920204e-06, |
| "loss": 0.3771, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.9157667386609072, |
| "grad_norm": 0.12883397936820984, |
| "learning_rate": 3.478837103950509e-06, |
| "loss": 0.38, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.91792656587473, |
| "grad_norm": 0.12375036627054214, |
| "learning_rate": 3.4668712240005912e-06, |
| "loss": 0.3771, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.9200863930885528, |
| "grad_norm": 0.13057532906532288, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.3867, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.9222462203023758, |
| "grad_norm": 0.13063670694828033, |
| "learning_rate": 3.442968591846359e-06, |
| "loss": 0.3746, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.9244060475161988, |
| "grad_norm": 0.11987043917179108, |
| "learning_rate": 3.431031990624063e-06, |
| "loss": 0.3733, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.9265658747300216, |
| "grad_norm": 0.13609716296195984, |
| "learning_rate": 3.4191052998564344e-06, |
| "loss": 0.3766, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.9287257019438444, |
| "grad_norm": 0.1286943554878235, |
| "learning_rate": 3.407188594878938e-06, |
| "loss": 0.3777, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.9308855291576674, |
| "grad_norm": 0.15128813683986664, |
| "learning_rate": 3.3952819509639534e-06, |
| "loss": 0.3729, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.9330453563714904, |
| "grad_norm": 0.12446796149015427, |
| "learning_rate": 3.3833854433203185e-06, |
| "loss": 0.3773, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.935205183585313, |
| "grad_norm": 0.1316557675600052, |
| "learning_rate": 3.3714991470928393e-06, |
| "loss": 0.3843, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.937365010799136, |
| "grad_norm": 0.12782582640647888, |
| "learning_rate": 3.359623137361825e-06, |
| "loss": 0.3787, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.939524838012959, |
| "grad_norm": 0.13275963068008423, |
| "learning_rate": 3.347757489142608e-06, |
| "loss": 0.3809, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.9416846652267818, |
| "grad_norm": 0.1355566680431366, |
| "learning_rate": 3.3359022773850673e-06, |
| "loss": 0.3798, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.9438444924406046, |
| "grad_norm": 0.12694980204105377, |
| "learning_rate": 3.3240575769731662e-06, |
| "loss": 0.3825, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.9460043196544277, |
| "grad_norm": 0.13038381934165955, |
| "learning_rate": 3.312223462724472e-06, |
| "loss": 0.3861, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.9481641468682507, |
| "grad_norm": 0.1373014897108078, |
| "learning_rate": 3.300400009389678e-06, |
| "loss": 0.3828, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.9503239740820735, |
| "grad_norm": 0.1347927749156952, |
| "learning_rate": 3.2885872916521445e-06, |
| "loss": 0.3701, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.9524838012958963, |
| "grad_norm": 0.12417130172252655, |
| "learning_rate": 3.2767853841274154e-06, |
| "loss": 0.3823, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.9546436285097193, |
| "grad_norm": 0.1381063610315323, |
| "learning_rate": 3.264994361362753e-06, |
| "loss": 0.3768, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.956803455723542, |
| "grad_norm": 0.12305869907140732, |
| "learning_rate": 3.2532142978366654e-06, |
| "loss": 0.3803, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.9589632829373649, |
| "grad_norm": 0.12444626539945602, |
| "learning_rate": 3.241445267958438e-06, |
| "loss": 0.3717, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.961123110151188, |
| "grad_norm": 0.12498262524604797, |
| "learning_rate": 3.2296873460676557e-06, |
| "loss": 0.3739, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.963282937365011, |
| "grad_norm": 0.11730080097913742, |
| "learning_rate": 3.217940606433747e-06, |
| "loss": 0.379, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.9654427645788337, |
| "grad_norm": 0.13885721564292908, |
| "learning_rate": 3.2062051232555024e-06, |
| "loss": 0.3693, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.9676025917926565, |
| "grad_norm": 0.12217065691947937, |
| "learning_rate": 3.1944809706606123e-06, |
| "loss": 0.3739, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.9697624190064795, |
| "grad_norm": 0.1336861401796341, |
| "learning_rate": 3.182768222705198e-06, |
| "loss": 0.3747, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.9719222462203023, |
| "grad_norm": 0.12711098790168762, |
| "learning_rate": 3.171066953373338e-06, |
| "loss": 0.3821, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.9740820734341251, |
| "grad_norm": 0.1329392045736313, |
| "learning_rate": 3.1593772365766107e-06, |
| "loss": 0.376, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.9762419006479481, |
| "grad_norm": 0.12040119618177414, |
| "learning_rate": 3.147699146153621e-06, |
| "loss": 0.3738, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.9784017278617712, |
| "grad_norm": 0.23164218664169312, |
| "learning_rate": 3.1360327558695336e-06, |
| "loss": 0.3802, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.980561555075594, |
| "grad_norm": 0.11707053333520889, |
| "learning_rate": 3.1243781394156138e-06, |
| "loss": 0.3813, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.9827213822894167, |
| "grad_norm": 0.1273183971643448, |
| "learning_rate": 3.1127353704087477e-06, |
| "loss": 0.3779, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.9848812095032398, |
| "grad_norm": 0.1234814003109932, |
| "learning_rate": 3.1011045223909954e-06, |
| "loss": 0.3804, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.9870410367170628, |
| "grad_norm": 0.1258484572172165, |
| "learning_rate": 3.089485668829113e-06, |
| "loss": 0.3811, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.9892008639308856, |
| "grad_norm": 0.1231926903128624, |
| "learning_rate": 3.077878883114096e-06, |
| "loss": 0.3831, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.9913606911447084, |
| "grad_norm": 0.12332677841186523, |
| "learning_rate": 3.066284238560713e-06, |
| "loss": 0.3698, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.9935205183585314, |
| "grad_norm": 0.12334899604320526, |
| "learning_rate": 3.0547018084070344e-06, |
| "loss": 0.3768, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.9956803455723542, |
| "grad_norm": 0.11958076804876328, |
| "learning_rate": 3.043131665813988e-06, |
| "loss": 0.3684, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.997840172786177, |
| "grad_norm": 0.12707985937595367, |
| "learning_rate": 3.031573883864882e-06, |
| "loss": 0.382, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.14072422683238983, |
| "learning_rate": 3.0200285355649504e-06, |
| "loss": 0.3729, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.002159827213823, |
| "grad_norm": 0.1437283754348755, |
| "learning_rate": 3.0084956938408873e-06, |
| "loss": 0.3623, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.0043196544276456, |
| "grad_norm": 0.12962134182453156, |
| "learning_rate": 2.9969754315403865e-06, |
| "loss": 0.3649, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.0064794816414686, |
| "grad_norm": 0.13019190728664398, |
| "learning_rate": 2.9854678214316875e-06, |
| "loss": 0.3626, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.0086393088552916, |
| "grad_norm": 0.12730036675930023, |
| "learning_rate": 2.97397293620311e-06, |
| "loss": 0.3572, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.0107991360691146, |
| "grad_norm": 0.13056515157222748, |
| "learning_rate": 2.962490848462596e-06, |
| "loss": 0.3474, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.012958963282937, |
| "grad_norm": 0.13034315407276154, |
| "learning_rate": 2.951021630737255e-06, |
| "loss": 0.3679, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.0151187904967602, |
| "grad_norm": 0.14478375017642975, |
| "learning_rate": 2.9395653554728955e-06, |
| "loss": 0.3579, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.0172786177105833, |
| "grad_norm": 0.1417471021413803, |
| "learning_rate": 2.92812209503358e-06, |
| "loss": 0.365, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.019438444924406, |
| "grad_norm": 0.13237528502941132, |
| "learning_rate": 2.91669192170116e-06, |
| "loss": 0.3658, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.021598272138229, |
| "grad_norm": 0.1314917653799057, |
| "learning_rate": 2.9052749076748266e-06, |
| "loss": 0.3687, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.023758099352052, |
| "grad_norm": 0.11867891997098923, |
| "learning_rate": 2.8938711250706397e-06, |
| "loss": 0.3643, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.025917926565875, |
| "grad_norm": 0.13976238667964935, |
| "learning_rate": 2.8824806459210907e-06, |
| "loss": 0.3678, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.0280777537796975, |
| "grad_norm": 0.1445903778076172, |
| "learning_rate": 2.871103542174637e-06, |
| "loss": 0.3574, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.0302375809935205, |
| "grad_norm": 0.11427946388721466, |
| "learning_rate": 2.8597398856952473e-06, |
| "loss": 0.3569, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.0323974082073435, |
| "grad_norm": 0.13496039807796478, |
| "learning_rate": 2.8483897482619566e-06, |
| "loss": 0.3717, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.0345572354211665, |
| "grad_norm": 0.13540537655353546, |
| "learning_rate": 2.837053201568396e-06, |
| "loss": 0.3667, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.036717062634989, |
| "grad_norm": 0.12281273305416107, |
| "learning_rate": 2.825730317222358e-06, |
| "loss": 0.3541, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.038876889848812, |
| "grad_norm": 0.12640658020973206, |
| "learning_rate": 2.814421166745337e-06, |
| "loss": 0.3641, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.041036717062635, |
| "grad_norm": 0.12110286951065063, |
| "learning_rate": 2.803125821572068e-06, |
| "loss": 0.3597, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.0431965442764577, |
| "grad_norm": 0.13443802297115326, |
| "learning_rate": 2.791844353050094e-06, |
| "loss": 0.3709, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.0453563714902807, |
| "grad_norm": 0.11386435478925705, |
| "learning_rate": 2.7805768324393017e-06, |
| "loss": 0.3681, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.0475161987041037, |
| "grad_norm": 0.13114500045776367, |
| "learning_rate": 2.769323330911472e-06, |
| "loss": 0.3602, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.0496760259179267, |
| "grad_norm": 0.13121016323566437, |
| "learning_rate": 2.7580839195498397e-06, |
| "loss": 0.3567, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.0518358531317493, |
| "grad_norm": 0.11939337104558945, |
| "learning_rate": 2.746858669348634e-06, |
| "loss": 0.3611, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.0539956803455723, |
| "grad_norm": 0.11663561314344406, |
| "learning_rate": 2.7356476512126386e-06, |
| "loss": 0.3557, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.0561555075593954, |
| "grad_norm": 0.11576730012893677, |
| "learning_rate": 2.724450935956733e-06, |
| "loss": 0.3723, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.058315334773218, |
| "grad_norm": 0.1251356601715088, |
| "learning_rate": 2.713268594305458e-06, |
| "loss": 0.3637, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.060475161987041, |
| "grad_norm": 0.12093979120254517, |
| "learning_rate": 2.7021006968925613e-06, |
| "loss": 0.3752, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.062634989200864, |
| "grad_norm": 0.12214156985282898, |
| "learning_rate": 2.6909473142605522e-06, |
| "loss": 0.3638, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.064794816414687, |
| "grad_norm": 0.12628315389156342, |
| "learning_rate": 2.6798085168602595e-06, |
| "loss": 0.3667, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.0669546436285096, |
| "grad_norm": 0.12237667292356491, |
| "learning_rate": 2.668684375050378e-06, |
| "loss": 0.3653, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.0691144708423326, |
| "grad_norm": 0.1107870489358902, |
| "learning_rate": 2.6575749590970336e-06, |
| "loss": 0.3558, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.0712742980561556, |
| "grad_norm": 0.1208115965127945, |
| "learning_rate": 2.646480339173337e-06, |
| "loss": 0.3733, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.0734341252699786, |
| "grad_norm": 0.12692323327064514, |
| "learning_rate": 2.635400585358937e-06, |
| "loss": 0.3663, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.075593952483801, |
| "grad_norm": 0.11760963499546051, |
| "learning_rate": 2.624335767639582e-06, |
| "loss": 0.3638, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.077753779697624, |
| "grad_norm": 0.12751303613185883, |
| "learning_rate": 2.6132859559066704e-06, |
| "loss": 0.3547, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.079913606911447, |
| "grad_norm": 0.12997639179229736, |
| "learning_rate": 2.6022512199568205e-06, |
| "loss": 0.3558, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.08207343412527, |
| "grad_norm": 0.12291760742664337, |
| "learning_rate": 2.5912316294914232e-06, |
| "loss": 0.3506, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.084233261339093, |
| "grad_norm": 0.11633151024580002, |
| "learning_rate": 2.580227254116199e-06, |
| "loss": 0.3648, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.086393088552916, |
| "grad_norm": 0.12379375100135803, |
| "learning_rate": 2.5692381633407672e-06, |
| "loss": 0.3652, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.088552915766739, |
| "grad_norm": 0.12270376831293106, |
| "learning_rate": 2.558264426578192e-06, |
| "loss": 0.3625, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.0907127429805614, |
| "grad_norm": 0.12057667225599289, |
| "learning_rate": 2.547306113144564e-06, |
| "loss": 0.3712, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.0928725701943844, |
| "grad_norm": 0.1182745024561882, |
| "learning_rate": 2.536363292258543e-06, |
| "loss": 0.3686, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.0950323974082075, |
| "grad_norm": 0.12089554965496063, |
| "learning_rate": 2.5254360330409343e-06, |
| "loss": 0.3603, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.09719222462203, |
| "grad_norm": 0.12302310764789581, |
| "learning_rate": 2.514524404514248e-06, |
| "loss": 0.3599, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.099352051835853, |
| "grad_norm": 0.1283075213432312, |
| "learning_rate": 2.503628475602256e-06, |
| "loss": 0.3685, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.101511879049676, |
| "grad_norm": 0.11500417441129684, |
| "learning_rate": 2.49274831512957e-06, |
| "loss": 0.3657, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.103671706263499, |
| "grad_norm": 0.11335953325033188, |
| "learning_rate": 2.4818839918211963e-06, |
| "loss": 0.3689, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.1058315334773217, |
| "grad_norm": 0.12606894969940186, |
| "learning_rate": 2.4710355743021077e-06, |
| "loss": 0.359, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.1079913606911447, |
| "grad_norm": 0.11400944739580154, |
| "learning_rate": 2.4602031310968013e-06, |
| "loss": 0.3661, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.1101511879049677, |
| "grad_norm": 0.11969246715307236, |
| "learning_rate": 2.4493867306288772e-06, |
| "loss": 0.3618, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.1123110151187907, |
| "grad_norm": 0.11956711113452911, |
| "learning_rate": 2.4385864412206e-06, |
| "loss": 0.3516, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.1144708423326133, |
| "grad_norm": 0.11470730602741241, |
| "learning_rate": 2.4278023310924676e-06, |
| "loss": 0.3651, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.1166306695464363, |
| "grad_norm": 0.12043334543704987, |
| "learning_rate": 2.417034468362782e-06, |
| "loss": 0.3702, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.1187904967602593, |
| "grad_norm": 0.11915960907936096, |
| "learning_rate": 2.406282921047213e-06, |
| "loss": 0.3609, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.120950323974082, |
| "grad_norm": 0.1116413027048111, |
| "learning_rate": 2.395547757058379e-06, |
| "loss": 0.3576, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.123110151187905, |
| "grad_norm": 0.11029747128486633, |
| "learning_rate": 2.3848290442054096e-06, |
| "loss": 0.3618, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.125269978401728, |
| "grad_norm": 0.12164044380187988, |
| "learning_rate": 2.3741268501935212e-06, |
| "loss": 0.3557, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.127429805615551, |
| "grad_norm": 0.11805900186300278, |
| "learning_rate": 2.3634412426235886e-06, |
| "loss": 0.3665, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.1295896328293735, |
| "grad_norm": 0.12578925490379333, |
| "learning_rate": 2.3527722889917147e-06, |
| "loss": 0.3617, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.1317494600431965, |
| "grad_norm": 0.11140415817499161, |
| "learning_rate": 2.3421200566888096e-06, |
| "loss": 0.3529, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.1339092872570196, |
| "grad_norm": 0.12330644577741623, |
| "learning_rate": 2.3314846130001622e-06, |
| "loss": 0.3512, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.136069114470842, |
| "grad_norm": 0.11442252993583679, |
| "learning_rate": 2.320866025105016e-06, |
| "loss": 0.3527, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.138228941684665, |
| "grad_norm": 0.11933194845914841, |
| "learning_rate": 2.3102643600761445e-06, |
| "loss": 0.3481, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.140388768898488, |
| "grad_norm": 0.11264543980360031, |
| "learning_rate": 2.299679684879421e-06, |
| "loss": 0.3583, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.142548596112311, |
| "grad_norm": 0.11280512809753418, |
| "learning_rate": 2.289112066373411e-06, |
| "loss": 0.3629, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.1447084233261338, |
| "grad_norm": 0.11324049532413483, |
| "learning_rate": 2.2785615713089363e-06, |
| "loss": 0.3609, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.146868250539957, |
| "grad_norm": 0.10841232538223267, |
| "learning_rate": 2.268028266328655e-06, |
| "loss": 0.3613, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.14902807775378, |
| "grad_norm": 0.1152244582772255, |
| "learning_rate": 2.25751221796665e-06, |
| "loss": 0.3571, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.1511879049676024, |
| "grad_norm": 0.11110089719295502, |
| "learning_rate": 2.247013492647994e-06, |
| "loss": 0.3548, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.1533477321814254, |
| "grad_norm": 0.11328666657209396, |
| "learning_rate": 2.2365321566883437e-06, |
| "loss": 0.3586, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.1555075593952484, |
| "grad_norm": 0.11004538089036942, |
| "learning_rate": 2.2260682762935137e-06, |
| "loss": 0.3565, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.1576673866090714, |
| "grad_norm": 0.11562500894069672, |
| "learning_rate": 2.2156219175590623e-06, |
| "loss": 0.3619, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.159827213822894, |
| "grad_norm": 0.11296035349369049, |
| "learning_rate": 2.2051931464698636e-06, |
| "loss": 0.3656, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.161987041036717, |
| "grad_norm": 0.11270337551832199, |
| "learning_rate": 2.1947820288997067e-06, |
| "loss": 0.3496, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.16414686825054, |
| "grad_norm": 0.11527759581804276, |
| "learning_rate": 2.1843886306108686e-06, |
| "loss": 0.3695, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.166306695464363, |
| "grad_norm": 0.11408324539661407, |
| "learning_rate": 2.174013017253701e-06, |
| "loss": 0.3651, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.1684665226781856, |
| "grad_norm": 0.10843408107757568, |
| "learning_rate": 2.1636552543662187e-06, |
| "loss": 0.3692, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.1706263498920086, |
| "grad_norm": 0.11223003268241882, |
| "learning_rate": 2.153315407373679e-06, |
| "loss": 0.3545, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.1727861771058317, |
| "grad_norm": 0.11480898410081863, |
| "learning_rate": 2.1429935415881753e-06, |
| "loss": 0.3609, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.1749460043196542, |
| "grad_norm": 0.1133100613951683, |
| "learning_rate": 2.132689722208223e-06, |
| "loss": 0.361, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.1771058315334773, |
| "grad_norm": 0.11355537176132202, |
| "learning_rate": 2.1224040143183444e-06, |
| "loss": 0.3681, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.1792656587473003, |
| "grad_norm": 0.11831656098365784, |
| "learning_rate": 2.112136482888663e-06, |
| "loss": 0.3555, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.1814254859611233, |
| "grad_norm": 0.11772197484970093, |
| "learning_rate": 2.1018871927744844e-06, |
| "loss": 0.3604, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.183585313174946, |
| "grad_norm": 0.10822444409132004, |
| "learning_rate": 2.0916562087158964e-06, |
| "loss": 0.3583, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.185745140388769, |
| "grad_norm": 0.21270522475242615, |
| "learning_rate": 2.0814435953373554e-06, |
| "loss": 0.3651, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.187904967602592, |
| "grad_norm": 0.11271930485963821, |
| "learning_rate": 2.0712494171472776e-06, |
| "loss": 0.367, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.190064794816415, |
| "grad_norm": 0.1191214919090271, |
| "learning_rate": 2.061073738537635e-06, |
| "loss": 0.3566, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.1922246220302375, |
| "grad_norm": 0.1228100061416626, |
| "learning_rate": 2.0509166237835398e-06, |
| "loss": 0.3553, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.1943844492440605, |
| "grad_norm": 0.11371961981058121, |
| "learning_rate": 2.040778137042852e-06, |
| "loss": 0.3621, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.1965442764578835, |
| "grad_norm": 0.10948773473501205, |
| "learning_rate": 2.030658342355765e-06, |
| "loss": 0.3612, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.198704103671706, |
| "grad_norm": 0.10944036394357681, |
| "learning_rate": 2.0205573036443994e-06, |
| "loss": 0.3619, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.200863930885529, |
| "grad_norm": 0.11753126233816147, |
| "learning_rate": 2.0104750847124075e-06, |
| "loss": 0.3636, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.203023758099352, |
| "grad_norm": 0.12510347366333008, |
| "learning_rate": 2.0004117492445614e-06, |
| "loss": 0.3789, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.205183585313175, |
| "grad_norm": 0.1162487342953682, |
| "learning_rate": 1.990367360806359e-06, |
| "loss": 0.3595, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.2073434125269977, |
| "grad_norm": 0.12260331958532333, |
| "learning_rate": 1.980341982843616e-06, |
| "loss": 0.3659, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.2095032397408207, |
| "grad_norm": 0.11793196201324463, |
| "learning_rate": 1.9703356786820687e-06, |
| "loss": 0.3644, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.2116630669546438, |
| "grad_norm": 0.11070533841848373, |
| "learning_rate": 1.9603485115269743e-06, |
| "loss": 0.3587, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.2138228941684663, |
| "grad_norm": 0.10772062093019485, |
| "learning_rate": 1.9503805444627054e-06, |
| "loss": 0.358, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.2159827213822894, |
| "grad_norm": 0.11722833663225174, |
| "learning_rate": 1.9404318404523605e-06, |
| "loss": 0.3529, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.2181425485961124, |
| "grad_norm": 0.11525849252939224, |
| "learning_rate": 1.930502462337362e-06, |
| "loss": 0.3526, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.2203023758099354, |
| "grad_norm": 0.12186475098133087, |
| "learning_rate": 1.920592472837057e-06, |
| "loss": 0.3642, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.222462203023758, |
| "grad_norm": 0.11602187156677246, |
| "learning_rate": 1.910701934548329e-06, |
| "loss": 0.3741, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.224622030237581, |
| "grad_norm": 0.12122868001461029, |
| "learning_rate": 1.900830909945189e-06, |
| "loss": 0.3658, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.226781857451404, |
| "grad_norm": 0.11481517553329468, |
| "learning_rate": 1.8909794613783943e-06, |
| "loss": 0.3586, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.2289416846652266, |
| "grad_norm": 0.10677429288625717, |
| "learning_rate": 1.8811476510750486e-06, |
| "loss": 0.367, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.2311015118790496, |
| "grad_norm": 0.11565054953098297, |
| "learning_rate": 1.8713355411382117e-06, |
| "loss": 0.3629, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.2332613390928726, |
| "grad_norm": 0.11869722604751587, |
| "learning_rate": 1.8615431935464984e-06, |
| "loss": 0.3455, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.2354211663066956, |
| "grad_norm": 0.12298930436372757, |
| "learning_rate": 1.8517706701536998e-06, |
| "loss": 0.377, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.237580993520518, |
| "grad_norm": 0.11223292350769043, |
| "learning_rate": 1.8420180326883857e-06, |
| "loss": 0.3611, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.239740820734341, |
| "grad_norm": 0.10755477845668793, |
| "learning_rate": 1.8322853427535148e-06, |
| "loss": 0.3636, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.2419006479481642, |
| "grad_norm": 0.11490552872419357, |
| "learning_rate": 1.822572661826047e-06, |
| "loss": 0.3606, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.2440604751619873, |
| "grad_norm": 0.11000396311283112, |
| "learning_rate": 1.8128800512565514e-06, |
| "loss": 0.3643, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.24622030237581, |
| "grad_norm": 0.10895387083292007, |
| "learning_rate": 1.803207572268826e-06, |
| "loss": 0.3623, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.248380129589633, |
| "grad_norm": 0.11881309747695923, |
| "learning_rate": 1.7935552859595058e-06, |
| "loss": 0.3598, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.250539956803456, |
| "grad_norm": 0.11568914353847504, |
| "learning_rate": 1.7839232532976746e-06, |
| "loss": 0.3652, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.2526997840172784, |
| "grad_norm": 0.10827185958623886, |
| "learning_rate": 1.7743115351244883e-06, |
| "loss": 0.3616, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.2548596112311015, |
| "grad_norm": 0.12083268910646439, |
| "learning_rate": 1.7647201921527802e-06, |
| "loss": 0.3696, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.2570194384449245, |
| "grad_norm": 0.11744555830955505, |
| "learning_rate": 1.7551492849666857e-06, |
| "loss": 0.3547, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.2591792656587475, |
| "grad_norm": 0.11333145946264267, |
| "learning_rate": 1.7455988740212576e-06, |
| "loss": 0.3648, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.26133909287257, |
| "grad_norm": 0.1083984524011612, |
| "learning_rate": 1.7360690196420816e-06, |
| "loss": 0.3609, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.263498920086393, |
| "grad_norm": 0.12069600075483322, |
| "learning_rate": 1.7265597820248987e-06, |
| "loss": 0.3617, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.265658747300216, |
| "grad_norm": 0.11563380807638168, |
| "learning_rate": 1.7170712212352187e-06, |
| "loss": 0.3554, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.267818574514039, |
| "grad_norm": 0.11244919896125793, |
| "learning_rate": 1.7076033972079503e-06, |
| "loss": 0.3526, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.2699784017278617, |
| "grad_norm": 0.11943572014570236, |
| "learning_rate": 1.698156369747016e-06, |
| "loss": 0.3639, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.2721382289416847, |
| "grad_norm": 0.11513727903366089, |
| "learning_rate": 1.6887301985249754e-06, |
| "loss": 0.3622, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.2742980561555077, |
| "grad_norm": 0.11251917481422424, |
| "learning_rate": 1.6793249430826502e-06, |
| "loss": 0.3606, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.2764578833693303, |
| "grad_norm": 0.11887813359498978, |
| "learning_rate": 1.6699406628287423e-06, |
| "loss": 0.3602, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.2786177105831533, |
| "grad_norm": 0.1043018326163292, |
| "learning_rate": 1.6605774170394683e-06, |
| "loss": 0.3597, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.2807775377969763, |
| "grad_norm": 0.11690463870763779, |
| "learning_rate": 1.651235264858177e-06, |
| "loss": 0.3706, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.282937365010799, |
| "grad_norm": 0.11119679361581802, |
| "learning_rate": 1.6419142652949793e-06, |
| "loss": 0.3755, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.285097192224622, |
| "grad_norm": 0.12275518476963043, |
| "learning_rate": 1.6326144772263752e-06, |
| "loss": 0.3617, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.287257019438445, |
| "grad_norm": 0.11455702781677246, |
| "learning_rate": 1.6233359593948777e-06, |
| "loss": 0.3561, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.289416846652268, |
| "grad_norm": 0.1072060838341713, |
| "learning_rate": 1.6140787704086502e-06, |
| "loss": 0.3595, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.2915766738660905, |
| "grad_norm": 0.11446718126535416, |
| "learning_rate": 1.6048429687411294e-06, |
| "loss": 0.3579, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.2937365010799136, |
| "grad_norm": 0.1233833059668541, |
| "learning_rate": 1.5956286127306591e-06, |
| "loss": 0.3571, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.2958963282937366, |
| "grad_norm": 0.11054225265979767, |
| "learning_rate": 1.586435760580118e-06, |
| "loss": 0.3592, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.2980561555075596, |
| "grad_norm": 0.11470706015825272, |
| "learning_rate": 1.5772644703565564e-06, |
| "loss": 0.3602, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.300215982721382, |
| "grad_norm": 0.1131376326084137, |
| "learning_rate": 1.5681147999908308e-06, |
| "loss": 0.3579, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.302375809935205, |
| "grad_norm": 0.1124383881688118, |
| "learning_rate": 1.5589868072772279e-06, |
| "loss": 0.3592, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.304535637149028, |
| "grad_norm": 0.13568998873233795, |
| "learning_rate": 1.5498805498731146e-06, |
| "loss": 0.3687, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.306695464362851, |
| "grad_norm": 0.11868295818567276, |
| "learning_rate": 1.5407960852985582e-06, |
| "loss": 0.3741, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.308855291576674, |
| "grad_norm": 0.11386443674564362, |
| "learning_rate": 1.531733470935976e-06, |
| "loss": 0.3702, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.311015118790497, |
| "grad_norm": 0.11155420541763306, |
| "learning_rate": 1.5226927640297663e-06, |
| "loss": 0.3543, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.31317494600432, |
| "grad_norm": 0.11469469219446182, |
| "learning_rate": 1.5136740216859464e-06, |
| "loss": 0.3718, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.3153347732181424, |
| "grad_norm": 0.10761052370071411, |
| "learning_rate": 1.5046773008717968e-06, |
| "loss": 0.3728, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.3174946004319654, |
| "grad_norm": 0.10855443775653839, |
| "learning_rate": 1.4957026584154926e-06, |
| "loss": 0.3612, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.3196544276457884, |
| "grad_norm": 0.11300813406705856, |
| "learning_rate": 1.4867501510057548e-06, |
| "loss": 0.3629, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.3218142548596115, |
| "grad_norm": 0.1190650686621666, |
| "learning_rate": 1.4778198351914853e-06, |
| "loss": 0.358, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.323974082073434, |
| "grad_norm": 0.11081087589263916, |
| "learning_rate": 1.4689117673814135e-06, |
| "loss": 0.3579, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.326133909287257, |
| "grad_norm": 0.10845163464546204, |
| "learning_rate": 1.4600260038437376e-06, |
| "loss": 0.3547, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.32829373650108, |
| "grad_norm": 0.10712606459856033, |
| "learning_rate": 1.4511626007057667e-06, |
| "loss": 0.3702, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.3304535637149026, |
| "grad_norm": 0.10121244937181473, |
| "learning_rate": 1.4423216139535735e-06, |
| "loss": 0.3701, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.3326133909287257, |
| "grad_norm": 0.10943249613046646, |
| "learning_rate": 1.4335030994316357e-06, |
| "loss": 0.3673, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.3347732181425487, |
| "grad_norm": 0.11610903590917587, |
| "learning_rate": 1.4247071128424838e-06, |
| "loss": 0.3603, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.3369330453563713, |
| "grad_norm": 0.11263252794742584, |
| "learning_rate": 1.4159337097463515e-06, |
| "loss": 0.3646, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.3390928725701943, |
| "grad_norm": 0.11808553338050842, |
| "learning_rate": 1.407182945560817e-06, |
| "loss": 0.3551, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.3412526997840173, |
| "grad_norm": 0.11071130633354187, |
| "learning_rate": 1.3984548755604655e-06, |
| "loss": 0.3591, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.3434125269978403, |
| "grad_norm": 0.10774732381105423, |
| "learning_rate": 1.38974955487653e-06, |
| "loss": 0.3701, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.345572354211663, |
| "grad_norm": 0.10596179217100143, |
| "learning_rate": 1.3810670384965469e-06, |
| "loss": 0.3619, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.347732181425486, |
| "grad_norm": 0.10586302727460861, |
| "learning_rate": 1.372407381264011e-06, |
| "loss": 0.3671, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.349892008639309, |
| "grad_norm": 0.11271238327026367, |
| "learning_rate": 1.3637706378780209e-06, |
| "loss": 0.369, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.352051835853132, |
| "grad_norm": 0.11300753057003021, |
| "learning_rate": 1.3551568628929434e-06, |
| "loss": 0.366, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.3542116630669545, |
| "grad_norm": 0.10634942352771759, |
| "learning_rate": 1.346566110718061e-06, |
| "loss": 0.3608, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.3563714902807775, |
| "grad_norm": 0.11670755594968796, |
| "learning_rate": 1.337998435617235e-06, |
| "loss": 0.3649, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.3585313174946005, |
| "grad_norm": 0.11227838695049286, |
| "learning_rate": 1.3294538917085586e-06, |
| "loss": 0.3496, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.360691144708423, |
| "grad_norm": 0.11369525641202927, |
| "learning_rate": 1.3209325329640126e-06, |
| "loss": 0.367, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.362850971922246, |
| "grad_norm": 0.10753148049116135, |
| "learning_rate": 1.312434413209131e-06, |
| "loss": 0.3654, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.365010799136069, |
| "grad_norm": 0.11079417914152145, |
| "learning_rate": 1.3039595861226579e-06, |
| "loss": 0.3535, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.367170626349892, |
| "grad_norm": 0.10849615931510925, |
| "learning_rate": 1.2955081052362072e-06, |
| "loss": 0.3584, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.3693304535637147, |
| "grad_norm": 0.10960622876882553, |
| "learning_rate": 1.2870800239339237e-06, |
| "loss": 0.3578, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.3714902807775378, |
| "grad_norm": 0.11225436627864838, |
| "learning_rate": 1.2786753954521508e-06, |
| "loss": 0.3645, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.373650107991361, |
| "grad_norm": 0.11186996102333069, |
| "learning_rate": 1.2702942728790897e-06, |
| "loss": 0.3564, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.375809935205184, |
| "grad_norm": 0.10800560563802719, |
| "learning_rate": 1.2619367091544654e-06, |
| "loss": 0.3595, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.3779697624190064, |
| "grad_norm": 0.11503534764051437, |
| "learning_rate": 1.2536027570691938e-06, |
| "loss": 0.363, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.3801295896328294, |
| "grad_norm": 0.1053680032491684, |
| "learning_rate": 1.2452924692650443e-06, |
| "loss": 0.3668, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.3822894168466524, |
| "grad_norm": 0.10837449133396149, |
| "learning_rate": 1.2370058982343109e-06, |
| "loss": 0.3646, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.384449244060475, |
| "grad_norm": 0.10401103645563126, |
| "learning_rate": 1.2287430963194807e-06, |
| "loss": 0.3523, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.386609071274298, |
| "grad_norm": 0.1130133643746376, |
| "learning_rate": 1.2205041157129017e-06, |
| "loss": 0.3522, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.388768898488121, |
| "grad_norm": 0.11143437772989273, |
| "learning_rate": 1.2122890084564542e-06, |
| "loss": 0.3622, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.390928725701944, |
| "grad_norm": 0.1088298037648201, |
| "learning_rate": 1.204097826441218e-06, |
| "loss": 0.3524, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.3930885529157666, |
| "grad_norm": 0.11658685654401779, |
| "learning_rate": 1.1959306214071508e-06, |
| "loss": 0.3649, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.3952483801295896, |
| "grad_norm": 0.10530900955200195, |
| "learning_rate": 1.18778744494276e-06, |
| "loss": 0.3732, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.3974082073434126, |
| "grad_norm": 0.10576412826776505, |
| "learning_rate": 1.1796683484847731e-06, |
| "loss": 0.3528, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.3995680345572357, |
| "grad_norm": 0.10664583742618561, |
| "learning_rate": 1.1715733833178178e-06, |
| "loss": 0.3638, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.4017278617710582, |
| "grad_norm": 0.11170324683189392, |
| "learning_rate": 1.1635026005740902e-06, |
| "loss": 0.3632, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.4038876889848813, |
| "grad_norm": 0.10899297147989273, |
| "learning_rate": 1.1554560512330437e-06, |
| "loss": 0.3717, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.4060475161987043, |
| "grad_norm": 0.10355883091688156, |
| "learning_rate": 1.1474337861210543e-06, |
| "loss": 0.3669, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.408207343412527, |
| "grad_norm": 0.11601343005895615, |
| "learning_rate": 1.1394358559111101e-06, |
| "loss": 0.3675, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.41036717062635, |
| "grad_norm": 0.10625651478767395, |
| "learning_rate": 1.1314623111224865e-06, |
| "loss": 0.3696, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.412526997840173, |
| "grad_norm": 0.1087704598903656, |
| "learning_rate": 1.1235132021204226e-06, |
| "loss": 0.3678, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.4146868250539955, |
| "grad_norm": 0.1125335842370987, |
| "learning_rate": 1.1155885791158128e-06, |
| "loss": 0.3676, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.4168466522678185, |
| "grad_norm": 0.10977572947740555, |
| "learning_rate": 1.1076884921648834e-06, |
| "loss": 0.3597, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.4190064794816415, |
| "grad_norm": 0.11624909937381744, |
| "learning_rate": 1.0998129911688766e-06, |
| "loss": 0.3645, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.4211663066954645, |
| "grad_norm": 0.11193333566188812, |
| "learning_rate": 1.0919621258737384e-06, |
| "loss": 0.3679, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.423326133909287, |
| "grad_norm": 0.10702624171972275, |
| "learning_rate": 1.0841359458697986e-06, |
| "loss": 0.3675, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.42548596112311, |
| "grad_norm": 0.11081477999687195, |
| "learning_rate": 1.0763345005914649e-06, |
| "loss": 0.3733, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.427645788336933, |
| "grad_norm": 0.11152873933315277, |
| "learning_rate": 1.0685578393169054e-06, |
| "loss": 0.3634, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.429805615550756, |
| "grad_norm": 0.11278684437274933, |
| "learning_rate": 1.0608060111677409e-06, |
| "loss": 0.3646, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.4319654427645787, |
| "grad_norm": 0.10329707711935043, |
| "learning_rate": 1.053079065108728e-06, |
| "loss": 0.3616, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.4341252699784017, |
| "grad_norm": 0.11579885333776474, |
| "learning_rate": 1.0453770499474585e-06, |
| "loss": 0.3642, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.4362850971922247, |
| "grad_norm": 0.11287212371826172, |
| "learning_rate": 1.037700014334047e-06, |
| "loss": 0.3588, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.4384449244060473, |
| "grad_norm": 0.10435645282268524, |
| "learning_rate": 1.0300480067608232e-06, |
| "loss": 0.3621, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.4406047516198703, |
| "grad_norm": 0.1117047443985939, |
| "learning_rate": 1.0224210755620257e-06, |
| "loss": 0.3665, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.4427645788336934, |
| "grad_norm": 0.11821126937866211, |
| "learning_rate": 1.014819268913495e-06, |
| "loss": 0.3659, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.4449244060475164, |
| "grad_norm": 0.11257217824459076, |
| "learning_rate": 1.0072426348323754e-06, |
| "loss": 0.3629, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.447084233261339, |
| "grad_norm": 0.10960426181554794, |
| "learning_rate": 9.99691221176805e-07, |
| "loss": 0.3702, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.449244060475162, |
| "grad_norm": 0.11274091899394989, |
| "learning_rate": 9.921650756456164e-07, |
| "loss": 0.3552, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.451403887688985, |
| "grad_norm": 0.11033818125724792, |
| "learning_rate": 9.84664245778037e-07, |
| "loss": 0.3622, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.453563714902808, |
| "grad_norm": 0.11202115565538406, |
| "learning_rate": 9.771887789533818e-07, |
| "loss": 0.3641, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.4557235421166306, |
| "grad_norm": 0.10436037182807922, |
| "learning_rate": 9.69738722390765e-07, |
| "loss": 0.3722, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.4578833693304536, |
| "grad_norm": 0.11260079592466354, |
| "learning_rate": 9.623141231487904e-07, |
| "loss": 0.3664, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.4600431965442766, |
| "grad_norm": 0.10981511324644089, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.3729, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.462203023758099, |
| "grad_norm": 0.11340730637311935, |
| "learning_rate": 9.475414840568903e-07, |
| "loss": 0.3614, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.464362850971922, |
| "grad_norm": 0.10501902550458908, |
| "learning_rate": 9.401935375189802e-07, |
| "loss": 0.3601, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.466522678185745, |
| "grad_norm": 0.11369086056947708, |
| "learning_rate": 9.32871234925159e-07, |
| "loss": 0.3669, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.468682505399568, |
| "grad_norm": 0.10842647403478622, |
| "learning_rate": 9.255746225270689e-07, |
| "loss": 0.3582, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.470842332613391, |
| "grad_norm": 0.1089843288064003, |
| "learning_rate": 9.183037464140804e-07, |
| "loss": 0.3532, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.473002159827214, |
| "grad_norm": 0.1023058295249939, |
| "learning_rate": 9.110586525129988e-07, |
| "loss": 0.3473, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.475161987041037, |
| "grad_norm": 0.1110844761133194, |
| "learning_rate": 9.038393865877725e-07, |
| "loss": 0.3629, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.4773218142548594, |
| "grad_norm": 0.10468819737434387, |
| "learning_rate": 8.966459942392108e-07, |
| "loss": 0.3631, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.4794816414686824, |
| "grad_norm": 0.11002985388040543, |
| "learning_rate": 8.894785209046886e-07, |
| "loss": 0.3584, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.4816414686825055, |
| "grad_norm": 0.10573374480009079, |
| "learning_rate": 8.823370118578628e-07, |
| "loss": 0.3681, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.4838012958963285, |
| "grad_norm": 0.11796517670154572, |
| "learning_rate": 8.752215122083874e-07, |
| "loss": 0.3617, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.485961123110151, |
| "grad_norm": 0.1184302419424057, |
| "learning_rate": 8.68132066901623e-07, |
| "loss": 0.3672, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.488120950323974, |
| "grad_norm": 0.13177676498889923, |
| "learning_rate": 8.610687207183604e-07, |
| "loss": 0.3573, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.490280777537797, |
| "grad_norm": 0.11671025305986404, |
| "learning_rate": 8.540315182745329e-07, |
| "loss": 0.3569, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.4924406047516197, |
| "grad_norm": 0.10741881281137466, |
| "learning_rate": 8.470205040209362e-07, |
| "loss": 0.3558, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.4946004319654427, |
| "grad_norm": 0.12825675308704376, |
| "learning_rate": 8.400357222429473e-07, |
| "loss": 0.3575, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.4967602591792657, |
| "grad_norm": 0.10776403546333313, |
| "learning_rate": 8.330772170602424e-07, |
| "loss": 0.3589, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.4989200863930887, |
| "grad_norm": 0.11745335906744003, |
| "learning_rate": 8.261450324265225e-07, |
| "loss": 0.3617, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.5010799136069113, |
| "grad_norm": 0.10803595185279846, |
| "learning_rate": 8.192392121292336e-07, |
| "loss": 0.3636, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.5032397408207343, |
| "grad_norm": 0.11620043963193893, |
| "learning_rate": 8.123597997892918e-07, |
| "loss": 0.3688, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.5053995680345573, |
| "grad_norm": 0.11771270632743835, |
| "learning_rate": 8.055068388608011e-07, |
| "loss": 0.3633, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.5075593952483803, |
| "grad_norm": 0.11002473533153534, |
| "learning_rate": 7.986803726307901e-07, |
| "loss": 0.3649, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.509719222462203, |
| "grad_norm": 0.11476074159145355, |
| "learning_rate": 7.918804442189271e-07, |
| "loss": 0.3482, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.511879049676026, |
| "grad_norm": 0.10824240744113922, |
| "learning_rate": 7.851070965772572e-07, |
| "loss": 0.3502, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.514038876889849, |
| "grad_norm": 0.11206220835447311, |
| "learning_rate": 7.783603724899258e-07, |
| "loss": 0.3668, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.5161987041036715, |
| "grad_norm": 0.11207690834999084, |
| "learning_rate": 7.716403145729073e-07, |
| "loss": 0.3585, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.5183585313174945, |
| "grad_norm": 0.10834087431430817, |
| "learning_rate": 7.649469652737407e-07, |
| "loss": 0.3557, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.5205183585313176, |
| "grad_norm": 0.11165751516819, |
| "learning_rate": 7.582803668712579e-07, |
| "loss": 0.3654, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.52267818574514, |
| "grad_norm": 0.10847879201173782, |
| "learning_rate": 7.51640561475318e-07, |
| "loss": 0.362, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.524838012958963, |
| "grad_norm": 0.11347544938325882, |
| "learning_rate": 7.450275910265415e-07, |
| "loss": 0.3631, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.526997840172786, |
| "grad_norm": 0.11547064036130905, |
| "learning_rate": 7.384414972960419e-07, |
| "loss": 0.3613, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.529157667386609, |
| "grad_norm": 0.11166190356016159, |
| "learning_rate": 7.318823218851668e-07, |
| "loss": 0.3664, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.531317494600432, |
| "grad_norm": 0.11519124358892441, |
| "learning_rate": 7.253501062252338e-07, |
| "loss": 0.3715, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.533477321814255, |
| "grad_norm": 0.12818704545497894, |
| "learning_rate": 7.188448915772673e-07, |
| "loss": 0.3568, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.535637149028078, |
| "grad_norm": 0.11333166062831879, |
| "learning_rate": 7.123667190317396e-07, |
| "loss": 0.366, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.537796976241901, |
| "grad_norm": 0.11098440736532211, |
| "learning_rate": 7.059156295083064e-07, |
| "loss": 0.3651, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.5399568034557234, |
| "grad_norm": 0.11005040258169174, |
| "learning_rate": 6.994916637555571e-07, |
| "loss": 0.3658, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.5421166306695464, |
| "grad_norm": 0.10551054775714874, |
| "learning_rate": 6.930948623507505e-07, |
| "loss": 0.3654, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.5442764578833694, |
| "grad_norm": 0.10704758763313293, |
| "learning_rate": 6.86725265699561e-07, |
| "loss": 0.3562, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.546436285097192, |
| "grad_norm": 0.1092720702290535, |
| "learning_rate": 6.803829140358237e-07, |
| "loss": 0.3619, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.548596112311015, |
| "grad_norm": 0.10640691220760345, |
| "learning_rate": 6.74067847421277e-07, |
| "loss": 0.3674, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.550755939524838, |
| "grad_norm": 0.10517946630716324, |
| "learning_rate": 6.677801057453143e-07, |
| "loss": 0.3556, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.552915766738661, |
| "grad_norm": 0.10489367693662643, |
| "learning_rate": 6.615197287247299e-07, |
| "loss": 0.3766, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.555075593952484, |
| "grad_norm": 0.11467967927455902, |
| "learning_rate": 6.552867559034687e-07, |
| "loss": 0.3569, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.5572354211663066, |
| "grad_norm": 0.11009713262319565, |
| "learning_rate": 6.490812266523716e-07, |
| "loss": 0.3654, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.5593952483801297, |
| "grad_norm": 0.10729658603668213, |
| "learning_rate": 6.429031801689362e-07, |
| "loss": 0.3564, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.5615550755939527, |
| "grad_norm": 0.1073872372508049, |
| "learning_rate": 6.36752655477062e-07, |
| "loss": 0.3606, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.5637149028077753, |
| "grad_norm": 0.10580222308635712, |
| "learning_rate": 6.30629691426804e-07, |
| "loss": 0.371, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.5658747300215983, |
| "grad_norm": 0.11771810799837112, |
| "learning_rate": 6.245343266941328e-07, |
| "loss": 0.3597, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.5680345572354213, |
| "grad_norm": 0.11992885917425156, |
| "learning_rate": 6.184665997806832e-07, |
| "loss": 0.3559, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.570194384449244, |
| "grad_norm": 0.11079053580760956, |
| "learning_rate": 6.124265490135161e-07, |
| "loss": 0.3635, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.572354211663067, |
| "grad_norm": 0.10871004313230515, |
| "learning_rate": 6.064142125448763e-07, |
| "loss": 0.3625, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.57451403887689, |
| "grad_norm": 0.11944089829921722, |
| "learning_rate": 6.004296283519478e-07, |
| "loss": 0.3531, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.5766738660907125, |
| "grad_norm": 0.11835870891809464, |
| "learning_rate": 5.944728342366179e-07, |
| "loss": 0.3596, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.5788336933045355, |
| "grad_norm": 0.10851329565048218, |
| "learning_rate": 5.885438678252342e-07, |
| "loss": 0.3692, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.5809935205183585, |
| "grad_norm": 0.10725897550582886, |
| "learning_rate": 5.826427665683715e-07, |
| "loss": 0.3621, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.5831533477321815, |
| "grad_norm": 0.10977955162525177, |
| "learning_rate": 5.767695677405921e-07, |
| "loss": 0.3536, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.5853131749460045, |
| "grad_norm": 0.11643577367067337, |
| "learning_rate": 5.709243084402128e-07, |
| "loss": 0.3624, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.587473002159827, |
| "grad_norm": 0.11957161873579025, |
| "learning_rate": 5.651070255890689e-07, |
| "loss": 0.3567, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.58963282937365, |
| "grad_norm": 0.11547524482011795, |
| "learning_rate": 5.593177559322776e-07, |
| "loss": 0.3526, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.591792656587473, |
| "grad_norm": 0.10810908675193787, |
| "learning_rate": 5.535565360380146e-07, |
| "loss": 0.3627, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.5939524838012957, |
| "grad_norm": 0.10978656262159348, |
| "learning_rate": 5.478234022972756e-07, |
| "loss": 0.3689, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.5961123110151187, |
| "grad_norm": 0.11710033565759659, |
| "learning_rate": 5.421183909236494e-07, |
| "loss": 0.354, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.5982721382289418, |
| "grad_norm": 0.10731150209903717, |
| "learning_rate": 5.364415379530891e-07, |
| "loss": 0.3672, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.6004319654427643, |
| "grad_norm": 0.10609705001115799, |
| "learning_rate": 5.307928792436812e-07, |
| "loss": 0.3541, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.6025917926565874, |
| "grad_norm": 0.11076472699642181, |
| "learning_rate": 5.251724504754258e-07, |
| "loss": 0.3651, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.6047516198704104, |
| "grad_norm": 0.11111797392368317, |
| "learning_rate": 5.19580287150005e-07, |
| "loss": 0.3557, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.6069114470842334, |
| "grad_norm": 0.10651623457670212, |
| "learning_rate": 5.140164245905633e-07, |
| "loss": 0.3537, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.6090712742980564, |
| "grad_norm": 0.11073900759220123, |
| "learning_rate": 5.084808979414779e-07, |
| "loss": 0.3623, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.611231101511879, |
| "grad_norm": 0.11509796231985092, |
| "learning_rate": 5.029737421681446e-07, |
| "loss": 0.3669, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.613390928725702, |
| "grad_norm": 0.11190790683031082, |
| "learning_rate": 4.97494992056754e-07, |
| "loss": 0.3662, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.615550755939525, |
| "grad_norm": 0.11598829925060272, |
| "learning_rate": 4.920446822140673e-07, |
| "loss": 0.3617, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.6177105831533476, |
| "grad_norm": 0.11533954739570618, |
| "learning_rate": 4.866228470672041e-07, |
| "loss": 0.3589, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.6198704103671706, |
| "grad_norm": 0.10564534366130829, |
| "learning_rate": 4.812295208634238e-07, |
| "loss": 0.3626, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.6220302375809936, |
| "grad_norm": 0.11170712113380432, |
| "learning_rate": 4.758647376699033e-07, |
| "loss": 0.3672, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.624190064794816, |
| "grad_norm": 0.11519314348697662, |
| "learning_rate": 4.705285313735297e-07, |
| "loss": 0.3666, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.626349892008639, |
| "grad_norm": 0.11605649441480637, |
| "learning_rate": 4.6522093568068307e-07, |
| "loss": 0.3484, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.6285097192224622, |
| "grad_norm": 0.11404189467430115, |
| "learning_rate": 4.599419841170216e-07, |
| "loss": 0.3555, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.6306695464362853, |
| "grad_norm": 0.11835578829050064, |
| "learning_rate": 4.546917100272735e-07, |
| "loss": 0.3552, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.632829373650108, |
| "grad_norm": 0.11513664573431015, |
| "learning_rate": 4.494701465750217e-07, |
| "loss": 0.3522, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.634989200863931, |
| "grad_norm": 0.11740648001432419, |
| "learning_rate": 4.4427732674250045e-07, |
| "loss": 0.3625, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.637149028077754, |
| "grad_norm": 0.12071909755468369, |
| "learning_rate": 4.391132833303807e-07, |
| "loss": 0.3684, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.639308855291577, |
| "grad_norm": 0.1136975884437561, |
| "learning_rate": 4.3397804895756957e-07, |
| "loss": 0.3684, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.6414686825053995, |
| "grad_norm": 0.11149821430444717, |
| "learning_rate": 4.2887165606099513e-07, |
| "loss": 0.3603, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.6436285097192225, |
| "grad_norm": 0.12100395560264587, |
| "learning_rate": 4.237941368954124e-07, |
| "loss": 0.3624, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.6457883369330455, |
| "grad_norm": 0.1222655400633812, |
| "learning_rate": 4.1874552353319107e-07, |
| "loss": 0.3526, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.647948164146868, |
| "grad_norm": 0.11921314895153046, |
| "learning_rate": 4.137258478641176e-07, |
| "loss": 0.3647, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.650107991360691, |
| "grad_norm": 0.11398887634277344, |
| "learning_rate": 4.087351415951918e-07, |
| "loss": 0.3593, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.652267818574514, |
| "grad_norm": 0.11155658215284348, |
| "learning_rate": 4.0377343625042587e-07, |
| "loss": 0.37, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.6544276457883367, |
| "grad_norm": 0.1191490963101387, |
| "learning_rate": 3.9884076317064813e-07, |
| "loss": 0.3588, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.6565874730021597, |
| "grad_norm": 0.12826910614967346, |
| "learning_rate": 3.9393715351330243e-07, |
| "loss": 0.3566, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.6587473002159827, |
| "grad_norm": 0.11224586516618729, |
| "learning_rate": 3.890626382522539e-07, |
| "loss": 0.3604, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.6609071274298057, |
| "grad_norm": 0.11304951459169388, |
| "learning_rate": 3.8421724817758745e-07, |
| "loss": 0.3719, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.6630669546436287, |
| "grad_norm": 0.10955885052680969, |
| "learning_rate": 3.794010138954213e-07, |
| "loss": 0.3611, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.6652267818574513, |
| "grad_norm": 0.11885318905115128, |
| "learning_rate": 3.7461396582771035e-07, |
| "loss": 0.3732, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.6673866090712743, |
| "grad_norm": 0.11816181242465973, |
| "learning_rate": 3.698561342120499e-07, |
| "loss": 0.3577, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.6695464362850974, |
| "grad_norm": 0.11143229156732559, |
| "learning_rate": 3.651275491014905e-07, |
| "loss": 0.3561, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.67170626349892, |
| "grad_norm": 0.113620825111866, |
| "learning_rate": 3.604282403643472e-07, |
| "loss": 0.3659, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.673866090712743, |
| "grad_norm": 0.11192460358142853, |
| "learning_rate": 3.557582376840063e-07, |
| "loss": 0.3627, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.676025917926566, |
| "grad_norm": 0.11559736728668213, |
| "learning_rate": 3.511175705587433e-07, |
| "loss": 0.3632, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.6781857451403885, |
| "grad_norm": 0.11298345029354095, |
| "learning_rate": 3.465062683015341e-07, |
| "loss": 0.3617, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.6803455723542116, |
| "grad_norm": 0.1136719286441803, |
| "learning_rate": 3.419243600398703e-07, |
| "loss": 0.3534, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.6825053995680346, |
| "grad_norm": 0.11135457456111908, |
| "learning_rate": 3.373718747155752e-07, |
| "loss": 0.3723, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.6846652267818576, |
| "grad_norm": 0.10721197724342346, |
| "learning_rate": 3.328488410846187e-07, |
| "loss": 0.3551, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.6868250539956806, |
| "grad_norm": 0.11308667808771133, |
| "learning_rate": 3.283552877169399e-07, |
| "loss": 0.3667, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.688984881209503, |
| "grad_norm": 0.10848429799079895, |
| "learning_rate": 3.2389124299626483e-07, |
| "loss": 0.3643, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.691144708423326, |
| "grad_norm": 0.11723221838474274, |
| "learning_rate": 3.194567351199257e-07, |
| "loss": 0.3717, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.693304535637149, |
| "grad_norm": 0.12472040206193924, |
| "learning_rate": 3.150517920986851e-07, |
| "loss": 0.3608, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.695464362850972, |
| "grad_norm": 0.11016938090324402, |
| "learning_rate": 3.106764417565561e-07, |
| "loss": 0.3588, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.697624190064795, |
| "grad_norm": 0.11815854161977768, |
| "learning_rate": 3.0633071173062966e-07, |
| "loss": 0.3617, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.699784017278618, |
| "grad_norm": 0.1177084818482399, |
| "learning_rate": 3.0201462947089865e-07, |
| "loss": 0.3576, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.7019438444924404, |
| "grad_norm": 0.11179111897945404, |
| "learning_rate": 2.9772822224008515e-07, |
| "loss": 0.3667, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.7041036717062634, |
| "grad_norm": 0.11454194784164429, |
| "learning_rate": 2.9347151711346556e-07, |
| "loss": 0.3707, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.7062634989200864, |
| "grad_norm": 0.10757472366094589, |
| "learning_rate": 2.892445409787037e-07, |
| "loss": 0.3628, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.708423326133909, |
| "grad_norm": 0.11914849281311035, |
| "learning_rate": 2.850473205356774e-07, |
| "loss": 0.3468, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.710583153347732, |
| "grad_norm": 0.1173713430762291, |
| "learning_rate": 2.8087988229631325e-07, |
| "loss": 0.3668, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.712742980561555, |
| "grad_norm": 0.11365855485200882, |
| "learning_rate": 2.76742252584416e-07, |
| "loss": 0.359, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.714902807775378, |
| "grad_norm": 0.11546127498149872, |
| "learning_rate": 2.7263445753550275e-07, |
| "loss": 0.364, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.717062634989201, |
| "grad_norm": 0.11186777800321579, |
| "learning_rate": 2.685565230966408e-07, |
| "loss": 0.3526, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.7192224622030237, |
| "grad_norm": 0.10442403703927994, |
| "learning_rate": 2.6450847502627883e-07, |
| "loss": 0.3551, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.7213822894168467, |
| "grad_norm": 0.12204797565937042, |
| "learning_rate": 2.604903388940899e-07, |
| "loss": 0.3587, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.7235421166306697, |
| "grad_norm": 0.11084363609552383, |
| "learning_rate": 2.5650214008080544e-07, |
| "loss": 0.3679, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.7257019438444923, |
| "grad_norm": 0.10979737341403961, |
| "learning_rate": 2.525439037780558e-07, |
| "loss": 0.3717, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.7278617710583153, |
| "grad_norm": 0.11145438998937607, |
| "learning_rate": 2.486156549882135e-07, |
| "loss": 0.3613, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.7300215982721383, |
| "grad_norm": 0.11015837639570236, |
| "learning_rate": 2.447174185242324e-07, |
| "loss": 0.3652, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.732181425485961, |
| "grad_norm": 0.1096833273768425, |
| "learning_rate": 2.40849219009493e-07, |
| "loss": 0.3531, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.734341252699784, |
| "grad_norm": 0.109636589884758, |
| "learning_rate": 2.3701108087764657e-07, |
| "loss": 0.3596, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.736501079913607, |
| "grad_norm": 0.11428305506706238, |
| "learning_rate": 2.3320302837245846e-07, |
| "loss": 0.3659, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.73866090712743, |
| "grad_norm": 0.11387787014245987, |
| "learning_rate": 2.2942508554765764e-07, |
| "loss": 0.3726, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.740820734341253, |
| "grad_norm": 0.10690239071846008, |
| "learning_rate": 2.2567727626678527e-07, |
| "loss": 0.3651, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.7429805615550755, |
| "grad_norm": 0.10845934599637985, |
| "learning_rate": 2.2195962420304083e-07, |
| "loss": 0.3608, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.7451403887688985, |
| "grad_norm": 0.11751694232225418, |
| "learning_rate": 2.1827215283913683e-07, |
| "loss": 0.3659, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.7473002159827216, |
| "grad_norm": 0.10652041435241699, |
| "learning_rate": 2.1461488546714425e-07, |
| "loss": 0.3634, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.749460043196544, |
| "grad_norm": 0.10296986997127533, |
| "learning_rate": 2.1098784518835292e-07, |
| "loss": 0.3632, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.751619870410367, |
| "grad_norm": 0.10827996581792831, |
| "learning_rate": 2.0739105491312028e-07, |
| "loss": 0.3624, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.75377969762419, |
| "grad_norm": 0.11208463460206985, |
| "learning_rate": 2.0382453736072838e-07, |
| "loss": 0.3552, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.7559395248380127, |
| "grad_norm": 0.11274047195911407, |
| "learning_rate": 2.0028831505924162e-07, |
| "loss": 0.3613, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.7580993520518358, |
| "grad_norm": 0.10478544235229492, |
| "learning_rate": 1.967824103453597e-07, |
| "loss": 0.3592, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.760259179265659, |
| "grad_norm": 0.10351528972387314, |
| "learning_rate": 1.9330684536428335e-07, |
| "loss": 0.3693, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.762419006479482, |
| "grad_norm": 0.11334282159805298, |
| "learning_rate": 1.8986164206957037e-07, |
| "loss": 0.3615, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.7645788336933044, |
| "grad_norm": 0.10871846228837967, |
| "learning_rate": 1.8644682222299703e-07, |
| "loss": 0.3644, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.7667386609071274, |
| "grad_norm": 0.10826321691274643, |
| "learning_rate": 1.8306240739442094e-07, |
| "loss": 0.3599, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.7688984881209504, |
| "grad_norm": 0.1105961948633194, |
| "learning_rate": 1.7970841896164658e-07, |
| "loss": 0.3652, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.7710583153347734, |
| "grad_norm": 0.10997821390628815, |
| "learning_rate": 1.7638487811028616e-07, |
| "loss": 0.3675, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.773218142548596, |
| "grad_norm": 0.1074373796582222, |
| "learning_rate": 1.7309180583363062e-07, |
| "loss": 0.3542, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.775377969762419, |
| "grad_norm": 0.10459216684103012, |
| "learning_rate": 1.6982922293251548e-07, |
| "loss": 0.3538, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.777537796976242, |
| "grad_norm": 0.10451044887304306, |
| "learning_rate": 1.6659715001518583e-07, |
| "loss": 0.367, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.7796976241900646, |
| "grad_norm": 0.10947411507368088, |
| "learning_rate": 1.6339560749717154e-07, |
| "loss": 0.3515, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.7818574514038876, |
| "grad_norm": 0.11110340058803558, |
| "learning_rate": 1.6022461560115498e-07, |
| "loss": 0.3603, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.7840172786177106, |
| "grad_norm": 0.10515395551919937, |
| "learning_rate": 1.5708419435684463e-07, |
| "loss": 0.3547, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.786177105831533, |
| "grad_norm": 0.10683929920196533, |
| "learning_rate": 1.5397436360084784e-07, |
| "loss": 0.3617, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.7883369330453562, |
| "grad_norm": 0.10624652355909348, |
| "learning_rate": 1.5089514297654594e-07, |
| "loss": 0.3553, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.7904967602591793, |
| "grad_norm": 0.11002147197723389, |
| "learning_rate": 1.4784655193396947e-07, |
| "loss": 0.3557, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.7926565874730023, |
| "grad_norm": 0.1125330999493599, |
| "learning_rate": 1.448286097296764e-07, |
| "loss": 0.3544, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.7948164146868253, |
| "grad_norm": 0.11160624772310257, |
| "learning_rate": 1.4184133542663014e-07, |
| "loss": 0.3694, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.796976241900648, |
| "grad_norm": 0.10507107526063919, |
| "learning_rate": 1.388847478940797e-07, |
| "loss": 0.3713, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.799136069114471, |
| "grad_norm": 0.107913538813591, |
| "learning_rate": 1.3595886580743677e-07, |
| "loss": 0.3698, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.801295896328294, |
| "grad_norm": 0.11146403104066849, |
| "learning_rate": 1.330637076481639e-07, |
| "loss": 0.36, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.8034557235421165, |
| "grad_norm": 0.10874520242214203, |
| "learning_rate": 1.3019929170365376e-07, |
| "loss": 0.3639, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.8056155507559395, |
| "grad_norm": 0.11767850816249847, |
| "learning_rate": 1.2736563606711384e-07, |
| "loss": 0.3618, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.8077753779697625, |
| "grad_norm": 0.10746905952692032, |
| "learning_rate": 1.2456275863745426e-07, |
| "loss": 0.3624, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.809935205183585, |
| "grad_norm": 0.10965242981910706, |
| "learning_rate": 1.2179067711917015e-07, |
| "loss": 0.3732, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.812095032397408, |
| "grad_norm": 0.10720682889223099, |
| "learning_rate": 1.1904940902223661e-07, |
| "loss": 0.3661, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.814254859611231, |
| "grad_norm": 0.11190472543239594, |
| "learning_rate": 1.1633897166199227e-07, |
| "loss": 0.3572, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.816414686825054, |
| "grad_norm": 0.10630635917186737, |
| "learning_rate": 1.136593821590326e-07, |
| "loss": 0.3587, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.818574514038877, |
| "grad_norm": 0.10910697281360626, |
| "learning_rate": 1.1101065743910122e-07, |
| "loss": 0.3666, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.8207343412526997, |
| "grad_norm": 0.11752592027187347, |
| "learning_rate": 1.0839281423298375e-07, |
| "loss": 0.3638, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.8228941684665227, |
| "grad_norm": 0.11391156911849976, |
| "learning_rate": 1.0580586907639912e-07, |
| "loss": 0.3605, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.8250539956803458, |
| "grad_norm": 0.11459757387638092, |
| "learning_rate": 1.032498383099001e-07, |
| "loss": 0.365, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.8272138228941683, |
| "grad_norm": 0.10249683260917664, |
| "learning_rate": 1.007247380787657e-07, |
| "loss": 0.3609, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.8293736501079914, |
| "grad_norm": 0.11776190996170044, |
| "learning_rate": 9.823058433290178e-08, |
| "loss": 0.3667, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.8315334773218144, |
| "grad_norm": 0.10746931284666061, |
| "learning_rate": 9.576739282673886e-08, |
| "loss": 0.3598, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.833693304535637, |
| "grad_norm": 0.1106642559170723, |
| "learning_rate": 9.333517911913281e-08, |
| "loss": 0.3627, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.83585313174946, |
| "grad_norm": 0.1114298552274704, |
| "learning_rate": 9.093395857326714e-08, |
| "loss": 0.3521, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.838012958963283, |
| "grad_norm": 0.11040709167718887, |
| "learning_rate": 8.856374635655696e-08, |
| "loss": 0.3618, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.8401727861771056, |
| "grad_norm": 0.10548478364944458, |
| "learning_rate": 8.622455744054958e-08, |
| "loss": 0.3574, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.8423326133909286, |
| "grad_norm": 0.1121056005358696, |
| "learning_rate": 8.391640660083411e-08, |
| "loss": 0.3693, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.8444924406047516, |
| "grad_norm": 0.11348962038755417, |
| "learning_rate": 8.163930841694589e-08, |
| "loss": 0.3569, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.8466522678185746, |
| "grad_norm": 0.10726695507764816, |
| "learning_rate": 7.939327727227441e-08, |
| "loss": 0.3667, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.8488120950323976, |
| "grad_norm": 0.10446982830762863, |
| "learning_rate": 7.717832735397335e-08, |
| "loss": 0.3685, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.85097192224622, |
| "grad_norm": 0.11472396552562714, |
| "learning_rate": 7.499447265286952e-08, |
| "loss": 0.364, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.853131749460043, |
| "grad_norm": 0.10750308632850647, |
| "learning_rate": 7.284172696337688e-08, |
| "loss": 0.3626, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.8552915766738662, |
| "grad_norm": 0.11191460490226746, |
| "learning_rate": 7.072010388340656e-08, |
| "loss": 0.3623, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.857451403887689, |
| "grad_norm": 0.10993245989084244, |
| "learning_rate": 6.862961681428304e-08, |
| "loss": 0.3549, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.859611231101512, |
| "grad_norm": 0.11314646899700165, |
| "learning_rate": 6.657027896065982e-08, |
| "loss": 0.3542, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.861771058315335, |
| "grad_norm": 0.1285964399576187, |
| "learning_rate": 6.454210333043275e-08, |
| "loss": 0.3572, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.8639308855291574, |
| "grad_norm": 0.10818547010421753, |
| "learning_rate": 6.254510273466186e-08, |
| "loss": 0.3676, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.8660907127429804, |
| "grad_norm": 0.10412049293518066, |
| "learning_rate": 6.057928978748906e-08, |
| "loss": 0.3685, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.8682505399568035, |
| "grad_norm": 0.10978944599628448, |
| "learning_rate": 5.864467690605613e-08, |
| "loss": 0.3671, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.8704103671706265, |
| "grad_norm": 0.1174926683306694, |
| "learning_rate": 5.674127631043025e-08, |
| "loss": 0.3658, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.8725701943844495, |
| "grad_norm": 0.11143560707569122, |
| "learning_rate": 5.4869100023523526e-08, |
| "loss": 0.3624, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.874730021598272, |
| "grad_norm": 0.10805241763591766, |
| "learning_rate": 5.302815987101917e-08, |
| "loss": 0.3636, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.876889848812095, |
| "grad_norm": 0.11456768959760666, |
| "learning_rate": 5.121846748129544e-08, |
| "loss": 0.3537, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.879049676025918, |
| "grad_norm": 0.1143973246216774, |
| "learning_rate": 4.944003428535349e-08, |
| "loss": 0.361, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.8812095032397407, |
| "grad_norm": 0.11492909491062164, |
| "learning_rate": 4.769287151674407e-08, |
| "loss": 0.3529, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.8833693304535637, |
| "grad_norm": 0.11312732100486755, |
| "learning_rate": 4.597699021149649e-08, |
| "loss": 0.3604, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.8855291576673867, |
| "grad_norm": 0.11396172642707825, |
| "learning_rate": 4.429240120804923e-08, |
| "loss": 0.3601, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.8876889848812093, |
| "grad_norm": 0.10564181953668594, |
| "learning_rate": 4.263911514718222e-08, |
| "loss": 0.365, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.8898488120950323, |
| "grad_norm": 0.11512638628482819, |
| "learning_rate": 4.10171424719491e-08, |
| "loss": 0.3658, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.8920086393088553, |
| "grad_norm": 0.11602869629859924, |
| "learning_rate": 3.9426493427611177e-08, |
| "loss": 0.3611, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.8941684665226783, |
| "grad_norm": 0.11228124052286148, |
| "learning_rate": 3.786717806157136e-08, |
| "loss": 0.3615, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.896328293736501, |
| "grad_norm": 0.1036510244011879, |
| "learning_rate": 3.633920622331311e-08, |
| "loss": 0.3621, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.898488120950324, |
| "grad_norm": 0.11727307736873627, |
| "learning_rate": 3.4842587564337674e-08, |
| "loss": 0.3569, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.900647948164147, |
| "grad_norm": 0.10487374663352966, |
| "learning_rate": 3.337733153810141e-08, |
| "loss": 0.362, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.90280777537797, |
| "grad_norm": 0.10877780616283417, |
| "learning_rate": 3.194344739995803e-08, |
| "loss": 0.349, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.9049676025917925, |
| "grad_norm": 0.11223684251308441, |
| "learning_rate": 3.054094420709863e-08, |
| "loss": 0.365, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.9071274298056156, |
| "grad_norm": 0.103155717253685, |
| "learning_rate": 2.9169830818496226e-08, |
| "loss": 0.3592, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.9092872570194386, |
| "grad_norm": 0.11470197141170502, |
| "learning_rate": 2.783011589484741e-08, |
| "loss": 0.3578, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.911447084233261, |
| "grad_norm": 0.12167331576347351, |
| "learning_rate": 2.6521807898520214e-08, |
| "loss": 0.353, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.913606911447084, |
| "grad_norm": 0.1081666648387909, |
| "learning_rate": 2.5244915093499134e-08, |
| "loss": 0.3703, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.915766738660907, |
| "grad_norm": 0.11052247881889343, |
| "learning_rate": 2.3999445545332955e-08, |
| "loss": 0.3593, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.9179265658747298, |
| "grad_norm": 0.10058227181434631, |
| "learning_rate": 2.2785407121084236e-08, |
| "loss": 0.371, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.920086393088553, |
| "grad_norm": 0.11320126056671143, |
| "learning_rate": 2.1602807489279344e-08, |
| "loss": 0.3549, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.922246220302376, |
| "grad_norm": 0.11561363190412521, |
| "learning_rate": 2.0451654119860164e-08, |
| "loss": 0.3578, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.924406047516199, |
| "grad_norm": 0.10961954295635223, |
| "learning_rate": 1.9331954284137476e-08, |
| "loss": 0.3676, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.926565874730022, |
| "grad_norm": 0.10924555361270905, |
| "learning_rate": 1.8243715054744315e-08, |
| "loss": 0.3726, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.9287257019438444, |
| "grad_norm": 0.10880248993635178, |
| "learning_rate": 1.71869433055899e-08, |
| "loss": 0.3547, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.9308855291576674, |
| "grad_norm": 0.10798300057649612, |
| "learning_rate": 1.6161645711819664e-08, |
| "loss": 0.3569, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.9330453563714904, |
| "grad_norm": 0.11236506700515747, |
| "learning_rate": 1.5167828749770853e-08, |
| "loss": 0.3681, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.935205183585313, |
| "grad_norm": 0.10835976153612137, |
| "learning_rate": 1.4205498696930332e-08, |
| "loss": 0.3613, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.937365010799136, |
| "grad_norm": 0.12566576898097992, |
| "learning_rate": 1.3274661631899055e-08, |
| "loss": 0.3637, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.939524838012959, |
| "grad_norm": 0.10957465320825577, |
| "learning_rate": 1.2375323434348773e-08, |
| "loss": 0.3504, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.9416846652267816, |
| "grad_norm": 0.10701923072338104, |
| "learning_rate": 1.1507489784989278e-08, |
| "loss": 0.3607, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.9438444924406046, |
| "grad_norm": 0.10923349112272263, |
| "learning_rate": 1.067116616552899e-08, |
| "loss": 0.3706, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.9460043196544277, |
| "grad_norm": 0.11076433211565018, |
| "learning_rate": 9.866357858642206e-09, |
| "loss": 0.3746, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.9481641468682507, |
| "grad_norm": 0.10817807167768478, |
| "learning_rate": 9.09306994793635e-09, |
| "loss": 0.3648, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.9503239740820737, |
| "grad_norm": 0.10961637645959854, |
| "learning_rate": 8.351307317917002e-09, |
| "loss": 0.3571, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.9524838012958963, |
| "grad_norm": 0.10122332721948624, |
| "learning_rate": 7.641074653961244e-09, |
| "loss": 0.3681, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.9546436285097193, |
| "grad_norm": 0.10331834852695465, |
| "learning_rate": 6.962376442284368e-09, |
| "loss": 0.3566, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.9568034557235423, |
| "grad_norm": 0.10392733663320541, |
| "learning_rate": 6.315216969912663e-09, |
| "loss": 0.3422, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.958963282937365, |
| "grad_norm": 0.1083427146077156, |
| "learning_rate": 5.699600324657328e-09, |
| "loss": 0.3711, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.961123110151188, |
| "grad_norm": 0.1106184870004654, |
| "learning_rate": 5.115530395087276e-09, |
| "loss": 0.3639, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.963282937365011, |
| "grad_norm": 0.10812865942716599, |
| "learning_rate": 4.5630108705063684e-09, |
| "loss": 0.3647, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.9654427645788335, |
| "grad_norm": 0.11043433845043182, |
| "learning_rate": 4.042045240927883e-09, |
| "loss": 0.3706, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.9676025917926565, |
| "grad_norm": 0.1146334782242775, |
| "learning_rate": 3.5526367970539765e-09, |
| "loss": 0.3564, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.9697624190064795, |
| "grad_norm": 0.11209335923194885, |
| "learning_rate": 3.094788630254031e-09, |
| "loss": 0.369, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.971922246220302, |
| "grad_norm": 0.10334824025630951, |
| "learning_rate": 2.6685036325457826e-09, |
| "loss": 0.3614, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.974082073434125, |
| "grad_norm": 0.12000252306461334, |
| "learning_rate": 2.2737844965775578e-09, |
| "loss": 0.3677, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.976241900647948, |
| "grad_norm": 0.10969026386737823, |
| "learning_rate": 1.9106337156099553e-09, |
| "loss": 0.3506, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.978401727861771, |
| "grad_norm": 0.11796250939369202, |
| "learning_rate": 1.5790535835003006e-09, |
| "loss": 0.3698, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.980561555075594, |
| "grad_norm": 0.11176804453134537, |
| "learning_rate": 1.2790461946887712e-09, |
| "loss": 0.3574, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.9827213822894167, |
| "grad_norm": 0.10805931687355042, |
| "learning_rate": 1.0106134441850712e-09, |
| "loss": 0.3732, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.9848812095032398, |
| "grad_norm": 0.11747419834136963, |
| "learning_rate": 7.737570275573314e-10, |
| "loss": 0.3544, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.987041036717063, |
| "grad_norm": 0.11195072531700134, |
| "learning_rate": 5.684784409182298e-10, |
| "loss": 0.3743, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.9892008639308854, |
| "grad_norm": 0.11737102270126343, |
| "learning_rate": 3.9477898091944135e-10, |
| "loss": 0.3672, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.9913606911447084, |
| "grad_norm": 0.1034155786037445, |
| "learning_rate": 2.5265974474109054e-10, |
| "loss": 0.3586, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.9935205183585314, |
| "grad_norm": 0.11616694182157516, |
| "learning_rate": 1.4212163008509028e-10, |
| "loss": 0.36, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.995680345572354, |
| "grad_norm": 0.11564143747091293, |
| "learning_rate": 6.316533517125578e-11, |
| "loss": 0.3624, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.997840172786177, |
| "grad_norm": 0.11294636130332947, |
| "learning_rate": 1.57913587295333e-11, |
| "loss": 0.3607, |
| "step": 1388 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.10668095201253891, |
| "learning_rate": 0.0, |
| "loss": 0.3576, |
| "step": 1389 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1389, |
| "total_flos": 2.853679693771571e+16, |
| "train_loss": 0.023199697249737295, |
| "train_runtime": 5911.0235, |
| "train_samples_per_second": 90.117, |
| "train_steps_per_second": 0.235 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1389, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.853679693771571e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|